#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2016-10-06 11:26:38 +0300 (Thu, 06 Oct 2016) $ 
#$Rev: 4730 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v2.1/data/AtomProperties/bin/atom_properties_merge $
#------------------------------------------------------------------------------
#*
#  Merges several yaml format files containing atom information into a
#  single structure. Different output formats are available.
#**

use strict;
use warnings;

use COD::SOptions qw( getOptions get_value );
use COD::SUsage qw( usage );

use XML::Simple;
use YAML qw( Dump Bless Load LoadFile );

my @default_property_order = qw( atomic_number name family period group block
                                 atomic_weight atomic_mass covalent_radius
                                 vdw_radius charge common_charge valence );

my $verbose = 0;
my $fill_missing = 1;
my $override_values = 0;
my $output_format = 'yaml';
my $namespace = 'COD::AtomProperties';

#* USAGE:
#*     $0 file1.yaml [file2.yaml] [file3.yaml]
#* OPTIONS:
#*
#* -f, --fill-missing
#*                     Add empty values for missing atom properties. A list 
#*                     of properties is constructed by taking distinct 
#*                     properties from all of the atom entries (default).
#* --no-fill-missing
#*                     Do no add empty values for missing atom properties.
#*
#* -o, --override-values
#*                     Override property values with the newest ones.
#* --no-overide-values, --keep-original-values
#*                     Keep the oldest property values (default).
#*
#* -v, --verbose
#*                     Output warning messages.
#* -s, --silent
#*                     Do not output warning messages (default).
#*
#* -y, --yaml
#*                     Output merged files in yaml format (default).
#* -p, --perl-hash
#*                     Output merged files as a perl module containing
#*                     a perl hash.
#*
#* -n, --namespace
#*                     Namespace to be used when outputing a perl module
#*                     (default 'COD::AtomProperties').
#*
#* --help, --usage
#*                     Print a short usage message (this message) and exit.
#**
@ARGV = getOptions(
    '-f,--fill-missing'      => sub { $fill_missing = 1 },
    '--no-fill-missing'      => sub { $fill_missing = 0 },

    '-o,--override-values'   => sub { $override_values = 1 },
    '--no-overide-values'    => sub { $override_values = 0 },
    '--keep-original-values' => sub { $override_values = 0 },

    '-v,--verbose'           => sub { $verbose = 1 },
    '-s,--silent'            => sub { $verbose = 0 },

    '-y,--yaml'              => sub { $output_format = 'yaml' },
    '-p,--perl-hash'         => sub { $output_format = 'perl' },

    '-n,--namespace'         => sub { $namespace =  get_value() },

    '--help,--usage'   => sub { usage; exit }
);

@ARGV = ( '-' ) unless @ARGV;

my %atoms_merged;

foreach ( @ARGV ) {

    my $atoms_new;

    if ( $_ eq '-' ) {
        my $text = do { local $/; <STDIN> };
        $atoms_new = Load($text);
    } else {
        $atoms_new = LoadFile($_)
    }

    if ( !keys %atoms_merged ) {
        %atoms_merged = %{$atoms_new};
        next;
    }

    if ( $verbose && keys %atoms_merged != keys %{$atoms_new} ) {
        print {*STDERR} "Warning: Hashes are of different sizes. \n";
    }

    foreach my $type ( keys %{$atoms_new} ) {
        if ( !exists $atoms_merged{$type} ) {
            $atoms_merged{$type} = $atoms_new->{$type};
            next;
        }

        my $atom1 = $atoms_new->{$type};
        my $atom2 = $atoms_merged{$type};
        foreach( keys %{$atom1} ) {
            if ( !exists $atom2->{$_} ) {
                $atoms_merged{$type}{$_} = $atom1->{$_}
            } elsif ( $override_values ) {

                if ( $verbose && !identical($atom1->{$_}, $atom2->{$_}) ) {
                    print {*STDERR} "types are not identical\n";
                }

                $atoms_merged{$type}{$_} = $atom1->{$_}
           }
        }
    }
}

# Getting all distinct atom properties and their reference types
my %all_fields;
foreach my $type ( values %atoms_merged ) {
    foreach ( keys %{$type} ) {
        $all_fields{$_} = ref( $type->{$_} );
    }
}

# Filling hash with empty values of required type
if ( $fill_missing ) {
    foreach my $element ( keys %atoms_merged ) {
        foreach ( keys %all_fields ) {
            if ( ! exists $atoms_merged{$element} -> {$_} ) {
                if ( $all_fields{$_} eq 'ARRAY' ) {
                    $atoms_merged{$element} -> {$_} = [];
                } elsif ( $all_fields{$_} eq 'HASH' ) {
                    $atoms_merged{$element} -> {$_} = {};
                } else {
                    $atoms_merged{$element} -> {$_} = '';
                }
            }
        }
    }
}

# Filtering out options that do not appear in default properties array
my @unrecognized_properties;
my %hash_old = map { $_ => 1 } @default_property_order;
foreach ( keys %all_fields ) {
    if ( ! exists $hash_old{$_} ) {
        push @unrecognized_properties, $_;
    }
}

push @default_property_order, sort @unrecognized_properties;

if ( $output_format eq 'perl' ) {
    print_perl_hash( \%atoms_merged, \@default_property_order, $namespace )
} elsif ( $output_format eq 'yaml' ) {
    my @print_order = sort { $atoms_merged{$a} -> {'atomic_number'} <=>
                             $atoms_merged{$b} -> {'atomic_number'} ||
                             $a cmp $b # this line is mainly to order D and H
                           } keys %atoms_merged;
    Bless(\%atoms_merged)->keys(\@print_order);
    print Dump \%atoms_merged;
};

sub print_perl_hash
{
    my ( $atoms_merged, $property_order, $namespace ) = @_;

    print "package $namespace;\n" .
          "\n" .
          "use strict;\n" .
          "\n" .
          "require Exporter;\n" .
          '@' . $namespace . '::ISA = qw(Exporter);' . "\n" .
          '@' . $namespace . '::EXPORT = qw( ok );' . "\n" .
          "\n" .
          '%' . $namespace . '::atoms = (' . "\n";

    my @print_order = sort { $atoms_merged->{$a}->{'atomic_number'} <=>
                             $atoms_merged->{$b}->{'atomic_number'} ||
                             $a cmp $b # this line is mainly to order D and H
                      } keys %{$atoms_merged};

    foreach my $atom ( @print_order ) {

        print ' 'x5 . "\"$atom\" => {\n";

        foreach ( @{$property_order} ) {
            if ( exists $atoms_merged -> {$atom} -> {$_} ) {
                my $entry = $atoms_merged -> {$atom} -> {$_};
                if ( ref($entry) eq 'ARRAY' ) {
                    $entry = '[' . join(', ', map { qq('$_') } @{$entry}) . ']';
                 } elsif( ref($entry) eq 'HASH' ) {

                    my @key_order = keys %{$entry};
                    if ( $_ eq 'covalent_radii' ) {
                        @key_order = qw( single double triple );
                    } elsif ( $_ eq 'spin_state_radii' ) {
                        @key_order = ( 'l.s.', 'h.s.' )
                    };

                    my @keys;
                    foreach my $key ( @key_order ) {
                        if ( exists $entry->{$key} ) {
                            push @keys, $key;
                        }
                    };

                    if ( keys %{$entry} > 0 ) {
                        $entry = "{\n" .
                            join(",\n",
                                map { ' 'x17 ."\"$_\" => \"" .
                                      $entry -> {$_} . '"' } @keys ) .
                            "\n" . ' 'x13 . '}';
                    } else {
                        $entry = '{}'
                    }
                 } else {
                    $entry = "\"$entry\"";
                 }

                 print ' 'x13 . "\"$_\" => " . $entry  . ",\n";
            }
        }
        print ' 'x11 . "},\n";
    }

    print ");\n\n1;\n";

    return;
}

sub same_array_elements
{
    my ($array1, $array2) = @_;
    my %counts = ();
    $counts{$_} += 1 foreach (@{$array1});
    $counts{$_} -= 1 foreach (@{$array2});
    return !(grep { $_ != 0 } values %counts);
}

sub identical
{
    my ($value1, $value2) = @_;

    my $identical;

    if ( ref $value1 ne ref $value2 ) {
        $identical = 0;
    } elsif ( ref $value1 eq 'ARRAY' ) {
        $identical = same_array_elements($value1, $value2);
    } else {
        $identical = ( $value1 eq $value2 );
    }

    return $identical;
}
