#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: andrius $
#$Date: 2017-06-28 17:20:57 +0300 (Wed, 28 Jun 2017) $ 
#$Revision: 5452 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v2.1/scripts/cif_merge $
#------------------------------------------------------------------------------
#*
#* Merge data values in identical data blocks of multiple CIFs. Unique
#* data items are combined into a single data block of the output CIF;
#* data items with equivalent values are silently merged into one. When
#* a value differs for the same data item and the same data block in two
#* files, a warning (error) is issued.
#*
#* USAGE:
#*    $0 --options input1.cif input*.cif
#**

use strict;
use warnings;
use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::CIF::Tags::Manage qw( set_loop_tag set_tag );
use COD::CIF::Tags::Print qw( print_cif );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage options );
use COD::SUsage qw(usage options);
use COD::ErrorHandler qw( process_warnings
                          process_errors
                          process_parser_messages
                          report_message );
use COD::ToolsVersion;

my $use_parser = 'c';
my $override_tags;
my $override_all = 0;
my $merge_tags;

my $die_on_error_level = {
    ERROR   => 1,
    WARNING => 0,
    NOTE    => 0
};

#* OPTIONS:
#*   --override-all
#*                     Override all data items in the recipient CIF.
#*   --dont-override-all
#*                     Do not override all data items in the recipient CIF
#*                     (default).
#*
#*   --override-tags  _tag1,_tag2
#*                     Replace the values of the provided data items
#*                     (_tag1 and _tag2 in this example) with the new ones
#*                     instead of keeping the old values.
#*
#*   --merge-tags  _tag1,_tag2
#*                     Merge the values of the provided data items
#*                     (_tag1 and _tag2 in this example).
#*
#*   --use-perl-parser
#*                     Use Perl parser to parse CIF files.
#*   --use-c-parser
#*                     Use C parser to parse CIF files (default).
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    "--merge-tags"         => \$merge_tags,
    "--override-tags"      => \$override_tags,
    "--override-all"       => sub { $override_all = 1 },
    "--dont-override-all"  => sub { $override_all = 0 },
    "--use-perl-parser"    => sub { $use_parser = "perl" },
    "--use-c-parser"       => sub { $use_parser = "c" },
    "--options"            => sub { options; exit },
    "--help,--usage"       => sub { usage; exit },
    '--version'         => sub { print 'cod-tools version ',
                                 $COD::ToolsVersion::Version, "\n";
                                 exit }
);

my ( %override_tags, %merge_tags );

if( $override_tags ) {
    %override_tags =
        map {( lc($_), $_ )} split m/,/, $override_tags;
}

if( $merge_tags ) {
    %merge_tags =
        map {( lc($_), $_ )} split m/,/, $merge_tags;
}

@ARGV = ( "-" ) unless @ARGV;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

my %merged_datablocks;
my @merged_datablock_names;

for my $filename (@ARGV) {
    my $options = { parser => $use_parser, no_print   => 1 };
    my ($data, $err_count, $messages) = parse_cif($filename, $options);
    process_parser_messages( $messages, $die_on_error_level );

    canonicalize_all_names( $data );

    if( !@$data || !defined $data->[0] || !defined $data->[0]{name} ) {
        report_message( {
           'program'   => $0,
           'filename'  => $filename,
           'err_level' => 'WARNING',
           'message'   => 'file seems to be empty'
        }, $die_on_error_level->{'WARNING'} );
        next;
    }

    for my $datablock (@$data) {
        my $dataname = 'data_' . $datablock->{name} if defined $datablock->{name};

        local $SIG{__WARN__} = sub { process_warnings( {
                                       'message'  => @_,
                                       'program'  => $0,
                                       'add_pos'  => $dataname,
                                       'filename' => $filename
                                   }, $die_on_error_level ) };

        eval {

        if( !exists $merged_datablocks{$dataname} ) {
            push( @merged_datablock_names, $dataname );
            $merged_datablocks{$dataname} = $datablock;
        } else {
            my $new_datablock = $datablock;
            my $old_datablock = $merged_datablocks{$dataname};
            for my $tag (@{$new_datablock->{tags}}) {
                if( !exists $old_datablock->{values}{$tag} ) {
                    ## print ">>> old data block does not have tag $tag\n";
                    if( exists $new_datablock->{inloop}{$tag} ) {
                        ## print ">>> tag '$tag' is in a loop...\n";
                        ## warn "Merging loops (tag $tag) is not supported " .
                        ##     "in this version";
                        my $new_loop_id = $new_datablock->{inloop}{$tag};
                        ## print ">>> ... it is loop $new_loop_id.\n";
                        my $old_loop_id;
                        for my $loop_tag (
                            @{$new_datablock->{loops}[$new_loop_id]} ) {
                            ## print ">>> checking '$loop_tag'...\n";
                            if( exists $old_datablock->{values}{$loop_tag} &&
                                exists $old_datablock->{inloop}{$loop_tag} ) {
                                $old_loop_id = $old_datablock->{inloop}{$loop_tag};
                                die unless defined $old_loop_id; # assert...
                                ## print ">>> found it in $old_loop_id\n";
                                if( int( @{$old_datablock->
                                            {values}{$loop_tag}} ) !=
                                    int( @{$new_datablock->{values}{$tag}} )) {
                                    die "ERROR, looped data item '$loop_tag' has "
                                      . "a different number of values than the "
                                      . "'$tag' data item in file '${filename}' "
                                      . "even though the data items must end up "
                                      , "in the same loop\n";
                                }
                                last;
                            }
                        }
                        if( defined $old_loop_id ) {
                            push( @{$old_datablock->{loops}[$old_loop_id]},
                                  $tag );
                            $old_datablock->{inloop}{$tag} = $old_loop_id;
                            ## print ">>> pushing '$tag' to old loop $old_loop_id\n";
                            ## print ">>> it already has tags @{$old_datablock->{loops}[$old_loop_id]}\n";
                        } else {
                            push( @{$old_datablock->{loops}}, [ $tag ] );
                            $old_datablock->{inloop}{$tag} =
                                $#{$old_datablock->{loops}};
                            ## print ">>> starting new loop $#{$old_datablock->{loops}} for tag '$tag'\n";
                        }
                    }
                    ## print "\n";

                    push( @{$old_datablock->{tags}}, $tag );

                    $old_datablock->{values}{$tag} =
                        $new_datablock->{values}{$tag};

                    $old_datablock->{precisions}{$tag} =
                        $new_datablock->{precisions}{$tag};

                    $old_datablock->{types}{$tag} =
                        $new_datablock->{types}{$tag};
                } else {
                    if( $merge_tags{lc($tag)} ) {
                        for( my $i = 0;
                             $i <= $#{$old_datablock->{values}{$tag}};
                             $i ++ ) {
                            $old_datablock->{values}{$tag}[$i] .= "\n" .
                                $new_datablock->{values}{$tag}[$i];
                        }
                    } elsif( defined $override_tags{lc($tag)} ||
                             $override_all ) {
                        if( defined $old_datablock->{inloop}{$tag} ) {
                            set_loop_tag( $old_datablock, $tag, undef,
                                          $new_datablock->{values}{$tag} );
                        } else {
                            set_tag( $old_datablock, $tag,
                                     $new_datablock->{values}{$tag}[0] )
                        }
                    } elsif( !values_are_equal( $old_datablock->{values}{$tag},
                                           $new_datablock->{values}{$tag} )) {
                        if ( scalar @{$new_datablock->{values}{$tag}} == 1 ) {
                            warn "WARNING, data item '$tag' value " .
                                 "'$new_datablock->{values}{$tag}[0]' differs " .
                                 "from the value '$old_datablock->{values}{$tag}[0]' " .
                                 "encountered in previously processed files\n";
                        } else {
                            warn 'WARNING, values of the looped data item ' .
                                 "'$tag' differ from the values encountered " .
                                 "in previously processed files\n";
                        }
                    }
                }
            }
        }

    };
    if ($@) {
        process_errors( {
            'message'       => $@,
            'program'       => $0,
            'filename'      => $filename,
            'add_pos'       => $dataname
        }, $die_on_error_level->{'ERROR'} );
    };

    }
}

## use COD::Serialise qw( serialiseRef );
## serialiseRef( \%merged_datablocks );

for my $dataname (@merged_datablock_names) {
    print_cif( $merged_datablocks{$dataname},
               { keep_tag_order => 1, preserve_loop_order => 1 } );
}

sub values_are_equal
{
    my ($old_values, $new_values) = @_;

    if( int(@$old_values) != int(@$new_values) ) {
        return 0;
    }

    for( my $i = 0; $i <= $#{$new_values}; $i ++ ) {
        if( $new_values->[$i] ne $old_values->[$i] ) {
            return 0;
        }
    }

    return 1;
}
