#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: andrius $
#$Date: 2017-05-30 14:51:03 +0300 (Tue, 30 May 2017) $
#$Revision: 5376 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v2.1/scripts/cif_fix_values $
#------------------------------------------------------------------------------
#*
#* Corrects various CIF file values and outputs the made changes into the 
#* standard I/O streams. The script is capable of making these corrections:
#*   - Converting temperature from Celsius degrees to Kelvins degrees,
#*     removing temperature units of measurement, processing other
#*     undefined temperature values;
#*   - Fixing misspelled values by using the provided replacement value file;
#*   - Fixing enumeration values in CIF file against CIF dictionaries;
#*   - Correcting values of '_exptl_crystal_density_meas' data item.
#*
#* All described corrections are enabled by default, but can be disabled by 
#* prefixing the corresponding options by '--do-not', '--dont' or '--no'
#* (for example, '--no-fix-temperature').
#*
#* USAGE:
#*    $0 --options input1.cif input*.cif
#**

use strict;
use warnings;
use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::CIF::Tags::DictTags;
use COD::CIF::Tags::COD;
use COD::CIF::Tags::TCOD;
use COD::CIF::Tags::DFT;
use COD::CIF::Tags::Print qw( print_cif pack_precision fold );
use COD::CIF::Tags::Manage qw( set_tag rename_tag );
use COD::SOptions qw( getOptions get_value );
use COD::SUsage qw( usage options );
use COD::ErrorHandler qw(process_warnings process_errors
                         process_parser_messages report_message);
use COD::ToolsVersion;

my $Id = '$Id: cif_fix_values 5376 2017-05-30 11:51:03Z andrius $';
my $keep_tag_order = 0;

my $fix_misspelled_values = 1;
my $replacement_file;
my $fix_temperature = 1;
my $fix_enums = 1;
my $fix_value_of_exptl_crystal_density_meas = 1;
my $fix_value_of_refine_ls_weighting_scheme = 1;
my $fix_value_of_atom_sites_solution = 1;
my $treat_as_set = 1;

my $die_on_errors   = 1;
my $die_on_warnings = 0;
my $die_on_notes    = 0;

my $use_parser = 'c';

my $dictParsed = [];
my @dictionaries = ();
my %dictTags;
my %tagDicts;

sub dont_fix_any
{
    $fix_misspelled_values = 0;
    $fix_temperature = 0;
    $fix_enums = 0;
    $treat_as_set = 0;
    $fix_value_of_exptl_crystal_density_meas = 0;
    $fix_value_of_refine_ls_weighting_scheme = 0;
    $fix_value_of_atom_sites_solution = 0;

    return;
}

my %default_enums = (
    '_atom_site_adp_type'                    => [ 'Uani', 'Uiso', 'Uovl',
                                                  'Umpe', 'Bani', 'Biso', 'Bovl' ],
    '_atom_site_calc_flag'                   => [ 'd', 'calc', 'c', 'dum' ],
    '_atom_site_refinement_flags_adp'        => [ '.', 'T', 'U', 'TU' ],
    '_atom_site_refinement_flags_occupancy'  => [ '.', 'P' ],
    '_atom_site_refinement_flags_posn'       => [ '.', 'D', 'G', 'R', 'S',
                                                  'DG', 'DR', 'DS', 'GR',
                                                  'GS', 'RS', 'DGR', 'DGS',
                                                  'DRS', 'GRS', 'DGRS' ],
    '_atom_site_refinement_flags'            => [ '.', 'S', 'G', 'R',
                                                  'D', 'T', 'U', 'P' ],
    '_atom_sites_solution_hydrogens'         => [ 'difmap', 'vecmap', 'heavy',
                                                  'direct', 'geom', 'disper',
                                                  'isomor', 'notdet', 'dual', 'other' ],
    '_atom_sites_solution_primary'           => [ 'difmap', 'vecmap', 'heavy',
                                                  'direct', 'geom', 'disper',
                                                  'isomor', 'notdet', 'dual', 'other' ],
    '_atom_sites_solution_secondary'         => [ 'difmap', 'vecmap', 'heavy',
                                                  'direct', 'geom', 'disper',
                                                  'isomor', 'notdet', 'dual', 'other' ],
    '_atom_site_thermal_displace_type'       => [ 'Uani', 'Uiso', 'Uovl', 'Umpe',
                                                  'Bani', 'Biso', 'Bovl' ],
    '_chemical_absolute_configuration'       => [ 'rm', 'ad', 'rmad',
                                                  'syn', 'unk', '.' ],
    '_chemical_conn_bond_type'               => [ 'sing', 'doub', 'trip', 'quad',
                                                  'arom', 'poly', 'delo', 'pi' ],
    '_chemical_enantioexcess_bulk_technique' => [ 'OA', 'CD', 'EC', 'other' ],
    '_chemical_enantioexcess_crystal_technique' => [ 'CD', 'EC', 'other' ],
    '_citation_coordinate_linkage'           => [ 'no', 'n', 'yes', 'y' ],
    '_diffrn_radiation_probe'                => [ 'x-ray', 'neutron',
                                                  'electron', 'gamma' ],
    '_diffrn_radiation_wavelength_determination' => [ 'fundamental',
                                                      'estimated', 'refined' ],
    '_diffrn_radiation_xray_symbol'          => [ 'K-L~3~', 'K-L~2~',
                                                  'K-M~3~', 'K-L~2,3~' ],
    '_diffrn_refln_scan_mode_backgd'         => [ 'st', 'mo' ],
    '_diffrn_refln_scan_mode'                => [ 'om', 'ot', 'q' ],
    '_diffrn_source_target'                  => [ 'H', 'He', 'Li', 'Be', 'B',
                                                  'C', 'N', 'O', 'F', 'Ne',
                                                  'Na', 'Mg', 'Al', 'Si', 'P',
                                                  'S', 'Cl', 'Ar', 'K', 'Ca',
                                                  'Sc', 'Ti', 'V', 'Cr', 'Mn',
                                                  'Fe', 'Co', 'Ni', 'Cu', 'Zn',
                                                  'Ga', 'Ge', 'As', 'Se', 'Br',
                                                  'Kr', 'Rb', 'Sr', 'Y', 'Zr',
                                                  'Nb', 'Mo', 'Tc', 'Ru', 'Rh',
                                                  'Pd', 'Ag', 'Cd', 'In', 'Sn',
                                                  'Sb', 'Te', 'I', 'Xe', 'Cs',
                                                  'Ba', 'La', 'Ce', 'Pr', 'Nd',
                                                  'Pm', 'Sm', 'Eu', 'Gd', 'Tb',
                                                  'Dy', 'Ho', 'Er', 'Tm', 'Yb',
                                                  'Lu', 'Hf', 'Ta', 'W', 'Re',
                                                  'Os', 'Ir', 'Pt', 'Au', 'Hg',
                                                  'Tl', 'Pb', 'Bi', 'Po', 'At',
                                                  'Rn', 'Fr', 'Ra', 'Ac', 'Th',
                                                  'Pa', 'U', 'Np', 'Pu', 'Am',
                                                  'Cm', 'Bk', 'Cf', 'Es', 'Fm',
                                                  'Md', 'No', 'Lr' ],
    '_exptl_absorpt_correction_type'         => [ 'analytical', 'cylinder',
                                                  'empirical', 'gaussian',
                                                  'integration', 'multi-scan',
                                                  'none', 'numerical',
                                                  'psi-scan', 'refdelf', 'sphere' ],
    '_exptl_crystal_colour_lustre'           => [ 'metallic', 'dull', 'clear' ],
    '_exptl_crystal_colour_modifier'         => [ 'light', 'dark', 'whitish',
                                                  'blackish', 'grayish', 'brownish',
                                                  'reddish', 'pinkish', 'orangish',
                                                  'yellowish', 'greenish', 'bluish' ],
    '_exptl_crystal_colour_primary'          => [ 'colourless', 'white', 'black',
                                                  'gray', 'brown', 'red', 'pink',
                                                  'orange', 'yellow', 'green',
                                                  'blue', 'violet' ],
    '_geom_angle_publ_flag'                  => [ 'no', 'n', 'yes', 'y' ],
    '_geom_bond_publ_flag'                   => [ 'no', 'n', 'yes', 'y' ],
    '_geom_contact_publ_flag'                => [ 'no', 'n', 'yes', 'y' ],
    '_geom_hbond_publ_flag'                  => [ 'no', 'n', 'yes', 'y' ],
    '_geom_torsion_publ_flag'                => [ 'no', 'n', 'yes', 'y' ],
    '_publ_body_element'                     => [ 'section', 'subsection',
                                                  'subsubsection', 'appendix',
                                                  'footnote' ],
    '_publ_body_format'                      => [ 'ascii', 'cif', 'latex', 'rtf',
                                                  'sgml', 'tex', 'troff' ],
    '_publ_manuscript_incl_extra_defn'       => [ 'no', 'n', 'yes', 'y' ],
    '_publ_requested_category'               => [ 'FA', 'FI', 'FO', 'FM', 'CI',
                                                  'CO', 'CM', 'EI', 'EO', 'EM',
                                                  'QI', 'QO', 'QM', 'AD', 'SC' ],
    '_refine_ls_hydrogen_treatment'          => [ 'refall', 'refxyz', 'refU', 'noref',
                                                  'constr', 'mixed', 'undef' ],
    '_refine_ls_matrix_type'                 => [ 'full', 'fullcycle', 'atomblock',
                                                  'userblock', 'diagonal', 'sparse' ],
    '_refine_ls_structure_factor_coef'       => [ 'F', 'Fsqd', 'Inet' ],
    '_refine_ls_weighting_scheme'            => [ 'sigma', 'unit', 'calc' ],
    '_refln_include_status'                  => [ 'o', '<', '-', 'x', 'h', 'l' ],
    '_refln_observed_status'                 => [ 'o', '<', '-', 'x', 'h', 'l' ],
    '_refln_refinement_status'               => [ 'incl', 'excl', 'extn' ],
    '_space_group_crystal_system'            => [ 'triclinic', 'monoclinic',
                                                  'orthorhombic', 'tetragonal',
                                                  'trigonal', 'hexagonal', 'cubic' ],
    '_symmetry_cell_setting'                 => [ 'triclinic', 'monoclinic',
                                                  'orthorhombic', 'tetragonal',
                                                  'rhombohedral', 'trigonal',
                                                  'hexagonal', 'cubic' ],
    );
my $default_enums = \%default_enums;

my @default_set_tags = ( '_atom_site_refinement_flags' );

#* OPTIONS:
#*   --fix-temperature
#*                     Corrects temperature values which have units specified
#*                     or converts between Celsius degrees and Kelvins.
#*                     Changes 'room/ambiante temperature' to the appropriate
#*                     numeric value. Changes other undefined values
#*                     ('none', 'not given') to '?' symbol.
#*
#*   --fix-misspelled replacement_value.lst
#*                     Corrects misspelled values in the CIF file. Provide a file
#*                     for the check.
#*
#*   --fix-enums
#*                     Corrects enumeration values in the CIF against CIF
#*                     dictionaries. Provide dictionaries using option
#*                     --dictionary (or -d),  otherwise script uses the
#*                     built-in table from CIF Core dictionary.
#*
#*   --treat-as-set
#*                     Treats certain enumeration values as set data type in
#*                     the CIF against CIF dictionaries (default). Provide
#*                     dictionaries using --dictionary (or -d) option, otherwise
#*                     script uses the built-in table from CIF Core dictionary.
#*
#*   --fix-density-meas
#*                     Corrects value of data item '_exptl_crystal_density_meas'.
#*                     Values which are perceived as 'not measured' are changed
#*                     to '?'. The correct units are deleted. The new data item
#*                     is created if there are information about temperature.
#*
#*   --fix-weighting-scheme
#*                     Changes the value of the data item '_refine_ls_weighting_scheme'
#*                     to the value 'calc' if it contains details which seems to
#*                     have a formula, and creates new data item
#*                     '_refine_ls_weighting_details' to store that information.
#*                     If value is equal to number one, then it is changed to
#*                     the enumeration value 'unit'.
#*
#*   --fix-atom-sites-solution
#*                     Corrects the most frequent mistakes in the values of
#*                     data items '_atom_sites_solution_primary',
#*                     '_atom_sites_solution_secondary' and
#*                     '_atom_sites_solution_hydrogens'.
#*
#*   -d, --dictionary cif_core.dic
#*                     Any valid CIF file, with CIF dictionary (according to DDL2).
#*                     You can define any number of CIF dictionaries, in a way like
#*                     this:
#*                     $0 --dictionary cif_core.dic --dictionary cod_core.dic
#*
#*   -D, --add-dictionaries cif_core.dic
#*                     You can add CIF dictionary, in a way like
#*                     this:
#*                     $0 --add-dictionaries cif_core.dic
#*
#*   --clear-dictionaries
#*                     You can clear all dictionaries used for checking
#*                     CIF file, in a way like this:
#*                     $0 --clear-dictionaries
#*
#*   --use-perl-parser
#*   --use-c-parser
#*                     Specify parser to parse CIF files. C parser is default.
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    '--fix-misspelled'
        => sub{ $fix_misspelled_values = 1;
                $replacement_file = get_value(); },
    '--fix-only-misspelled'
        => sub{ dont_fix_any(); $fix_misspelled_values = 1;
                $replacement_file = get_value(); },
    '--do-not-fix-misspelled,--no-fix-misspelled,' .
    '--dont-fix-misspelled'
        => sub{ $fix_misspelled_values = 0;
                undef $replacement_file; },

    '--fix-temperature' => sub { $fix_temperature = 1; },
    '--fix-only-temperature'
        => sub{ dont_fix_any(); $fix_temperature = 1; },
    '--do-not-fix-temperature,--no-fix-temperature,' .
    '--dont-fix-temperature'
        => sub{ $fix_temperature = 0; },

    '--fix-enums' => sub{ $fix_enums = 1; },
    '--fix-only-enums'
        => sub{ dont_fix_any(); $fix_enums = 1; },
    '--do-not-fix-enums,--dont-fix-enums,--no-fix-enums'
        => sub{ $fix_enums = 0; },

    '--treat-as-set'    => sub{ $treat_as_set = 1; },
    '--do-not-treat-as-set,--dont-treat-as-set,' .
    '--no-treat-as-set' => sub{ $treat_as_set = 0; },

    '--fix-density-meas'
        => sub{ $fix_value_of_exptl_crystal_density_meas = 1; },
    '--fix-only-density-meas'
        => sub{ dont_fix_any();
            $fix_value_of_exptl_crystal_density_meas = 1; },
    '--do-not-fix-density-meas,' .
    '--no-fix-density-meas,' .
    '--dont-fix-density-meas'
        => sub{ $fix_value_of_exptl_crystal_density_meas = 0; },

    '--fix-weighting-scheme'
        => sub{ $fix_value_of_refine_ls_weighting_scheme = 1;},
    '--fix-only-weighting-scheme'
        => sub{ dont_fix_any();
            $fix_value_of_refine_ls_weighting_scheme = 1; },
    '--do-not-fix-weighting-scheme,' .
    '--no-fix-weighting-scheme,' .
    '--dont-fix-weighting-scheme'
        => sub{ $fix_value_of_refine_ls_weighting_scheme = 0 },

    '--fix-atom-sites-solution'
        => sub{ $fix_value_of_atom_sites_solution = 1; },
    '--fix-only-atom-sites-solution'
        => sub{ dont_fix_any(); $fix_value_of_atom_sites_solution = 1; },
    '--do-not-fix-atom-sites-solution,' .
    '--no-fix-atom-sites-solution,' .
    '--dont-fix-atom-sites-solution'
        => sub{ $fix_value_of_atom_sites_solution = 0; },

    '-d,--dictionary' => sub{ @dictionaries = get_value },
    '-D,--add-dictionaries' => sub{ push @dictionaries, get_value },
    '--clear-dictionaries' => sub{ @dictionaries = (); },

    '--use-perl-parser'       => sub { $use_parser = 'perl' },
    '--use-c-parser'          => sub { $use_parser = 'c' },
    '--options'               => sub { options; exit },
    '--help,--usage'          => sub { usage; exit },
    '--version'       => sub { print 'cod-tools version ',
                               $COD::ToolsVersion::Version, "\n";
                               exit }
);

my $die_on_error_level = {
    ERROR   => $die_on_errors,
    WARNING => $die_on_warnings,
    NOTE    => $die_on_notes
};

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

#
# Subroutines:
#

sub insert_report_to_comments
{
    my( $dataset, $insert_reports ) = @_;
    if( @{$insert_reports} > 0 ) {
        my $comments_tag = '_cod_depositor_comments';
        my $values = $dataset->{values};
        my $reports_value = join "\n\n", @{$insert_reports};
        my $title =
            "The following automatic conversions were performed:\n\n" .
            join "\n",
                 map { fold( 70, ' +', ' ', $_ ) }
                    split m/\n/, $reports_value;

        if( exists $values->{$comments_tag} ) {
            $values->{$comments_tag}[0] .= "\n\n" . $title;
        } else {
            $values->{$comments_tag}[0] = "\n" . $title;
        }
        my $signature = $Id;
        $signature =~ s/^\$|\$$//g;
        $values->{$comments_tag}[0] .=
            "\n\n" . 'Automatic conversion script' .
            "\n" . $signature;
    }

    return;
}

# Counts sigma value for the &pack_precision subroutine

sub get_sigma($$)
{
    my( $value , $sig ) = @_;
    $value =~ m/([^.]*)\.?(\d*)/;
    if( $2 ) {
        return $sig*10**(length($2)*(-1) )
    } else {
        return $sig;
    }
}

sub replacement_candidates($$)
{
    my( $cif_value, $dict_value_list ) = @_;
    my @candidate_list = ();
    foreach my $dict_tag_value( @{$dict_value_list} ) {
        if( $cif_value eq $dict_tag_value ) {
            return ();
        }
        my $test_dict_value = $dict_tag_value;
        $test_dict_value =~ s/[-_\s]//g;
        my $test_cif_value = $cif_value;
        $test_cif_value =~ s/[-_\s]//g;

        if( lc $test_cif_value eq lc $test_dict_value ) {
            push @candidate_list, $dict_tag_value;
            next;
        }
        if( lc $cif_value eq lc $dict_tag_value ) {
            push @candidate_list, $dict_tag_value;
            next;
        }
    }
    if( scalar( @candidate_list ) == 1 ) {
        return @candidate_list;
    } else {
        return @{$dict_value_list};
    }
}

sub make_count($$)
{
    my %notes_warnings = %{ $_[0] };
    my @messages = @{ $_[1] };
    my @reports;
    foreach my $message( @messages ) {
        if(! exists $notes_warnings{$message} ) {
            die "ERROR, error while counting '$message' audit message\n";
        }
        if( exists $notes_warnings{$message} ) {
            my $count = $notes_warnings{$message};
            my $times =
                ( $count =~ /^(\d*[02-9])?1$/ ) ? 'time' : 'times';
            if( $notes_warnings{$message} == 1 ) {
                $message .= '.';
            } else {
                $message .= " ($count $times).";
            }
            push @reports, $message;
        }
    }
    return @reports;
}

my %value_spelling = ();
eval {
    local $SIG{__WARN__} = sub { process_warnings( {
                                   'message'       => @_,
                                   'program'       => $0,
                                   'filename'      => $replacement_file,
                                 }, $die_on_error_level ) };

    if( defined $replacement_file ) {
        open my $list, '<', $replacement_file or die 'ERROR, '
          . 'could not open replacement list file for input -- '
          . lcfirst($!) . "\n";

        foreach( <$list> ) {
            chomp;
            if( /^#/ or /^\s*$/ ) {
                next;
            }
            if( /^(\S+)\s+(\S+)\s+(\S+)$/ ) {
                push @{ $value_spelling{$1} }, [ $2, $3 ];
            } else {
                warn "WARNING, unrecognized string '$_' in "
                   . "replacement value file\n";
            }
        }

        close $list or die 'ERROR, '
           . 'error while closing replacement value file after reading -- '
           . lcfirst($!) . "\n";
    }
};
if ($@) {
    process_errors( {
      'message'       => $@,
      'program'       => $0,
      'filename'      => $replacement_file
    }, $die_on_errors );
};

if( @dictionaries ) {
    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    for my $dict ( @dictionaries ) {
        my ( $data, $err_count, $messages ) = parse_cif( $dict, $options );
        process_parser_messages( $messages, $die_on_error_level );

        canonicalize_all_names( $data );

        my( $dict_name , $dict_version , $dict_update );
        for my $dataset( @{$data} ) {
            next unless $dataset->{name} eq 'on_this_dictionary' &&
                exists $dataset->{values};
            my $values = $dataset->{values};
            my $dataname = 'data_' . $dataset->{name};

            if ( ! defined $values ) {
                report_message( {
                   'program'   => $0,
                   'filename'  => $dict,
                   'add_pos'   => $dataname,
                   'err_level' => 'ERROR',
                   'message'   => 'dictionary does not contain data' },
                   $die_on_errors );
            };

            $dict_name = $values->{_dictionary_name};
            $dict_version = $values->{_dictionary_version};
            $dict_update = $values->{_dictionary_update};
            last;
        }
        for my $dataset( @{$data} ) {
            my $values = $dataset->{values};
            my $values_name = $values->{_name};
            my $values_enum = $values->{_enumeration};
            if( (defined $values_name) && (defined $values_enum) ) {
                my @enums = @{$values_enum};
                my @tag_names = @{$values_name};
                foreach( @tag_names ) {
                    if( !defined $tagDicts{$_} ) {
                        $dictTags{$_} = \@enums;
                        $tagDicts{$_} = [
                            $dict,
                            defined $dict_name ?
                            join( ', ', @{$dict_name} ) : undef,
                            defined $dict_version ?
                            join( ', ', @{$dict_version} ) : undef,
                            defined $dict_update ?
                            join( ', ', @{$dict_update} ) : undef
                            ];
                    } else {
                        report_message( {
                           'program'   => $0,
                           'err_level' => 'WARNING',
                           'message'   => "data item '$_' is already defined by "
                                        . "the dictionary '$dict'" },
                           $die_on_warnings );
                    }
                }
            }
        }
    }
} else {
    my $dict = 'the built-in table from CIF Core';
    my $dict_name = 'cif_core.dic';
    my $dict_version = '2.4.1';
    my $dict_update = '2010-06-29';
    foreach my $tag( keys %default_enums ) {
        my $enums = $default_enums{$tag};
        $dictTags{$tag} = $enums;
        $tagDicts{$tag} = [ $dict, $dict_name,
                            $dict_version, $dict_update ];
    }
}

# to print out all tags and theirs enum values from the given dictionary
#foreach( keys %dictTags ) {
#    print "'$_' => [ ";
#    foreach( @{ $dictTags{$_} } ){
#        print '"' . $_ . '"' .', ';
#    }
#    print "\n";
#}
#exit 0;

sub fix_misspelled_values($$) {
    my( $dataset, $value_spelling ) = @_;
    my %reports = ();
    my @insert_reports = ();
    my @uniq_messages;
    my $tags = $dataset->{tags};
    my $values = $dataset->{values};
    foreach my $tag( @{$tags} ) {
        if(! exists $value_spelling{$tag} ) {
            next;
        }
        foreach my $tag_value( @{ $values->{$tag} } ) {
            if( $tag_value =~ /^[.?]$/ ){
                next;
            }

            my $count = 0;
            my $old_value = $tag_value;
            foreach my $pair( @{ $value_spelling{$tag} } ){
                my $regex = $pair->[0];
                my $replacement = $pair->[1];

                if ( $tag_value =~ /^ $regex $/xi ){
                    $tag_value =~ s/^ $regex $/$replacement/xi;
                    $count++;
                }
            }
            if( $count > 1 ) {
                 warn 'WARNING, more than 1 different substitution '
                    . "was applied on value '$old_value'\n";
            } elsif( $count == 1 ) {
                 my $message_key =
                     "NOTE, '$tag' value '$old_value' " .
                     "was replaced with the value '$tag_value' " .
                     'as specified in the replacement file ' .
                     "'$replacement_file'";
                 if(! exists $reports{$message_key} ) {
                     $reports{$message_key} = 0;
                     push @uniq_messages, $message_key;
                 }
                 $reports{$message_key} ++;
            }
        }
    }

    my @report_messages = make_count( \%reports, \@uniq_messages );

    foreach my $report( @report_messages ) {
        if( $report =~ /^(NOTE,\s+)(.+)$/ ) {
            my $comment_message = $2;
            warn "NOTE, $comment_message\n";
            push @insert_reports, $comment_message;
        }
    }

    return @insert_reports;
}

my $number_pos =
    '(?:\+?' .
    '(?:\d+(?:\.\d*)?|\.\d+)' .
    '(?:[eE][-+]?\d+)?)';
my $number_neg =
    '(?:\-' .
    '(?:\d+(?:\.\d*)?|\.\d+)' .
    '(?:[eE][-+]?\d+)?)';
my $temp_K  =
    '(?:(?i:K(?i:elvin?)?)|(?i:K))';
my $temp_C  =
    '(?:(?i:deg\.?(?:rees?)?)?\s*(?i:C(?i:el[sc]ius)?)|' .
    '(?i:Deg\.?(?:rees?)?\s*[Cc]?)|' .
    '(?:(?:(?i:[\\\/]+o)|(?i:O)|(?:[\\\/]*\%))' .
    '(?:[-_\s]*)(?i:C\.?)?)|' .
    '(?:[ ]*0(?i:C\.?))|' .
    '(?i:(?i:degrees?)?(?:[-_\s]*)centigrades?))';
my $temp_RT =
    '(?:(?:(?i:temp\\\\\'erature)\s*ambi[ae]nte?)|' .
    '(?:(?:(?i:room)|(?i:amb(?i:i[ae]nte?)))' .
    '\s*(?i:tem[pt](?:\.|erature)?)?)|(?i:rt))';
my $temp_undef =
    '(?:(?i:ye?s?)|(?i:no?(?i:ne)?)|(?i:unknown)|' .
    '(?i:not?\s*(?:(?i:meas*ure?d?)|(?i:important)|' .
    '(?i:determine?d?)|(?i:avai?lable?)|(?i:relevant)|' .
    '(?i:recorde?d?)))|(?i:N\/?(?i:[DA]))|\s*|[-])';
my $sigma = '(?:\d+\.\d+|\d+\.|\.\d+|\d+)';
my $temp_dec =
    '(?i: d\.?(?i:ec\.?)?' .
    '(?i:omp\.?)?(?i:os(?i:e[ds]?|ition))?\s*(?i:at)?)';
my $measured_at = qr/
    (?: (?:measured)? \s* at )
    /ix;
my $value_not_measured = qr/
    (?:
        not[ _]measured|none|na|n\.a\.|n\/[ad]|
        -+|no|nm|n|no[ntr]|
            (?:[mn]oi?[ntr]|nit|npt|no|pas|nicht)
            [- _']+
            (?:[mn]ea?s?a?urt?e*[ds]|meas|measurement|
            mes(?:ua|ou|asu)red|meas(?:e|y|ua|ou|us|ru|hu)red|
            meausred|measused|measure|measrued|measued|mesur\\'ee|
            performed|determined|applied|available|done|gemessen)
        |
        notmeasured|not[ ]being[ ]measured|
        unknown|\?none|mone|nnoe|nonne|noref|nonr|
        not[ ]measured'\?|\?|\?"|0\?|\?0|\?\/|'[ ]'|\/
    )
    /ix;
my $units_equiv_Mg_per_m3 = qr/
    (?: (?!)
    |   Mg [ .] m  \^? -3 \^?
    |   Mg \/   m  \^?  3 \^?
    |   g  [ .] cm \^? -3 \^?
    |   g  \/   cm \^?  3 \^?
    )
    /x;
my $unit_weights = '(?i:unit\s+weights?\s*(?:applied)?)';

sub fix_temperature($) {
    my( $dataset ) = @_;
    my @insert_reports = ();
    my $values = $dataset->{values};
    my @temp_tags = qw ( _cell_measurement_temperature
                         _chemical_temperature_decomposition
                         _chemical_temperature_sublimation
                         _diffrn_ambient_temperature
                         _exptl_crystal_density_meas_temp
                         _chemical_melting_point );
    for my $tag( @temp_tags ) {
        next if !exists $values->{$tag};
        for my $i( 0..$#{$values->{$tag}} ) {
            my $temperature = $values->{$tag}[$i];
            my $temperature_modif = $temperature;
            $temperature_modif =~ s/^\s+|^\n+|\n+$|\s+$//g;

            if( $temperature_modif =~ /^[.?]$/ ) {
                next;
            }
            if( $temperature_modif =~
                /^($number_pos|$number_pos\(\d+\))$/ ) {
                next;
            }
            if( $temperature_modif =~
                /^ \(?($temp_dec)?\)?(?:[-_,\s]*)
                        ($number_pos)\(?($sigma)?\)?
                        (?:[-_\s]*)$temp_K?(?:[-_,\s]*)
                        \(?($temp_dec)?\)?$
                        /x )  {
                if( $1 || $4 ) {
                    my $old_tag = $tag;
                    my $new_tag =
                        '_chemical_temperature_decomposition';
                    $values->{$tag}[$i] =
                        pack_precision( $2 , $3 );
                    my $new_val = $values->{$tag}[$i];
                    rename_tag
                        ( $dataset, $old_tag, $new_tag );
                    my $report_msg =
                        "'$old_tag' tag was changed to '$new_tag' " .
                        "since the value had been '$temperature'. " .
                        "The value '$temperature' was changed to " .
                        "'$new_val'.";
                    push @insert_reports, $report_msg;
                    warn "NOTE, $report_msg\n";
                    next;
                }
            }
            if( $temperature_modif =~
                /^
                    ([><])[_\s]*($number_pos|$number_neg)
                    \(?($sigma)?\)?
                    (?:[-_\'\s]*)(?:$temp_C)(?:[-_,\s]*)
                    \(?($temp_dec)?\)?$
                    /x ) {
                my $sign = $1;
                my $number = $2;
                my $sig  = $3;
                my $old_tag = $tag;
                if( $4 ) {
                    $old_tag = '_chemical_temperature_decomposition';
                }
                if( $old_tag !~
                    /_cell_measurement_temperature/ ) {
                    if( $sig ) {
                        $sig = get_sigma( $number , $sig );
                    }
                    if( $sign =~ /\>/ ) {
                        $values->{$tag}[$i] =
                            pack_precision
                            ( $number + 273.15 , $sig );
                        my $new_val = $values->{$tag}[$i];

                        my $new_tag = $old_tag . '_gt';
                        rename_tag
                            ( $dataset, $tag, $new_tag );
                        my $report_msg =
                            "'$tag' tag was changed to " .
                            "'$new_tag' since the value was " .
                            'specified as \'more than\' (\'>\') a ' .
                            'certain temperature. The value ' .
                            "'$temperature' was changed to " .
                            "'$new_val' - it was converted from " .
                            'degrees Celsius(C) to Kelvins(K).';
                        push @insert_reports, $report_msg;
                        warn "NOTE, $report_msg\n";
                        next;
                    }
                    if( $sign =~ /\</ ) {
                        $values->{$tag}[$i] =
                            pack_precision
                            ( $number + 273.15 , $sig );
                        my $new_val = $values->{$tag}[$i];
                        my $new_tag = $old_tag . '_lt';
                        rename_tag
                            ( $dataset, $tag, $new_tag );
                        my $report_msg =
                            "'$tag' tag was changed to " .
                            "'$new_tag' since the value was " .
                            'specified as \'less than\' (\'<\') a ' .
                            'certain temperature. The value ' .
                            "'$temperature' was changed to " .
                            "'$new_val' - it was converted from " .
                            'degrees Celsius(C) to Kelvins(K).';
                        push @insert_reports, $report_msg;
                        warn "NOTE, $report_msg\n";
                        next;
                    }
                }
                next;
            }
            if( $temperature_modif =~
                /^
                        ([><])(?:[-_\s]*)($number_pos)
                        \(?($sigma)?\)?(?:[-_\s]*)
                        (?:$temp_K)?(?:[-_,\s]*)
                        \(?($temp_dec)?\)?$
                        /x ) {
                my $sign = $1;
                my $number = $2;
                my $sig  = $3;
                my $old_tag = $tag;
                if( $4 ) {
                    $old_tag = '_chemical_temperature_decomposition';
                }
                if( $old_tag !~
                    /_cell_measurement_temperature/ ) {
                    if( $sign =~ /\>/ ) {
                        $values->{$tag}[$i] =
                            pack_precision
                            ( $number , $sig );
                        my $new_val = $values->{$tag}[$i];
                        my $new_tag = $old_tag . '_gt';
                        rename_tag
                            ( $dataset, $tag, $new_tag );
                        my $report_msg =
                            "'$tag' tag was changed to " .
                            "'$new_tag' since the value was " .
                            'specified as \'more than\' (\'>\') a ' .
                            'certain temperature. The value ' .
                            "'$temperature' was changed to " .
                            "'$new_val' - it should be numeric " .
                            'and without a unit designator.';
                        push @insert_reports, $report_msg;
                        warn "NOTE, $report_msg\n";
                        next;
                    }
                    if( $sign =~ /\</ ) {
                        $values->{$tag}[$i] =
                            pack_precision
                            ( $number , $sig );
                        my $new_val = $values->{$tag}[$i];
                        my $new_tag = $old_tag . '_lt';
                        rename_tag
                            ( $dataset, $tag, $new_tag );
                        my $report_msg =
                            "'$tag' tag was changed to " .
                            "'$new_tag' since the value was " .
                            'specified as \'less than\' (\'<\') a ' .
                            'certain temperature. The value ' .
                            "'$temperature' was changed to " .
                            "'$new_val' - it should be numeric " .
                            'and without a unit designator.';
                        push @insert_reports, $report_msg;
                        warn "NOTE, $report_msg\n";
                        next;
                    }
                }
                next;
            }
            if( $temperature_modif =~
                /^
                    ($number_pos)\s*(?:\()?
                    [\s]*(?:\+|\+\/?\-)?
                    [\s]*($sigma)(?:\))?$
                    /x ) {
                my $check_value = $temperature_modif;
                $values->{$tag}[$i] =
                    pack_precision( $1, $2 );
                my $new_val = $values->{$tag}[$i];
                $check_value =~ s/\s+//g;
                if( $check_value eq  $new_val ) {
                    my $report_msg =
                        "'$tag' value '$temperature' was changed to " .
                        "'$new_val' - the value was reformatted.";
                    push @insert_reports, $report_msg;
                    warn "NOTE, $report_msg\n";
                    next;
                } else {
                    my $report_msg =
                        "'$tag' value '$temperature' was changed to " .
                        "'$new_val' - precision was estimated.";
                    push @insert_reports, $report_msg;
                    warn "NOTE, $report_msg\n";
                    next;
                }
            }
            if( $temperature_modif =~
                /^
                    ($number_pos)(?:\()?($sigma)?
                    (?:\))?(?:[-_\s]*)\(?$temp_K\)?$
                    /x ) {
                $values->{$tag}[$i] =
                    pack_precision( $1, $2 );
                my $new_val = $values->{$tag}[$i];
                my $report_msg =
                    "'$tag' value '$temperature' was changed to " .
                    "'$new_val' - the value should be numeric " .
                    'and without a unit designator.';
                push @insert_reports, $report_msg;
                warn "NOTE, $report_msg\n";
                next;
            }
            if( $temperature_modif =~
                /^
                        ($number_neg)(?:\()?($sigma)?
                        (?:\))?(?:[-_\s]*)$temp_C?$
                        /x ) {
                my $number = $1;
                my $sig = $2;
                if( $sig ) {
                    $sig = get_sigma( $number , $sig );
                }
                $values->{$tag}[$i] =
                    pack_precision( $1 + 273.15, $sig );
                my $new_val = $values->{$tag}[$i];
                my $report_msg =
                    "'$tag' value '$temperature' was changed to " .
                    "'$new_val' - it was converted from degrees " .
                    'Celsius(C) to Kelvins(K).';
                push @insert_reports, $report_msg;
                warn "NOTE, $report_msg\n";
                next;
            }
            if( $temperature_modif =~
                /^
                    ($temp_RT)(?:[-_\s]*)$
                    /x ) {
                $values->{$tag}[$i] = '295(2)';
                my $report_msg =
                    "'$tag' value '$temperature' was changed to " .
                    '\'295(2)\' - the room/ambient temperature ' .
                    'average [293;298] in Kelvins(K) was taken.';
                push @insert_reports, $report_msg;
                warn "NOTE, $report_msg\n";
                next;
            }
            if( $temperature_modif =~
                /^
                    ($temp_undef)(?:[-_\s]*)$
                    /x ) {
                $values->{$tag}[$i] = '?';
                my $report_msg =
                    "'$tag' value '$temperature' ".
                    'was changed to \'?\' - the ' .
                    'value is undefined or not given.';
                push @insert_reports, $report_msg;
                warn "NOTE, $report_msg\n";
                next;
            }

            # A temperature range is provided
            if( $temperature_modif =~
                /^
                    ($number_pos)\s*[\-\/\:]+\s*($number_pos)
                    (?:[-_\s]*)\(?((?:$temp_C)|(?:$temp_K))?\)?
                    (?:[-_,\s]*)\(?($temp_dec)?\)?$
                    /x )   {
                my $temp_gt = $1;
                my $temp_lt = $2;
                my $temp_u  = $3;
                my $temp_d  = $4;

                if( $temp_lt > $temp_gt ) {
                    my $is_in_Celsius = (defined $temp_u && $temp_u =~ $temp_C);
                    my $number = ($temp_gt + $temp_lt)/2;
                    # Converting from degrees Celsius to degrees Kelvin
                    if ($is_in_Celsius) {
                    $number += 273.15;
                    }
                    my $sig = ($temp_lt - $temp_gt)/2;

                    my $new_val = pack_precision( $number, $sig );
                    $values->{$tag}[$i] = $new_val;
                    my $report_msg;

                    if( $temp_d ) {
                        my $new_tag = '_chemical_temperature_decomposition';
                        rename_tag( $dataset, $tag, $new_tag );
                        $report_msg =
                            "'$tag' tag was changed to '$new_tag' "
                          . 'since the value had been given as '
                          . "'$temperature'. The value ";
                    } else {
                        $report_msg = "'$tag' value '$temperature' ";
                    }

                    $report_msg .= "was changed to '$new_val' - ";
                    if ( $is_in_Celsius ) {
                        $report_msg .= 'it was converted from degrees '
                                     . 'Celsius(C) to Kelvins(K), '
                    }
                    $report_msg .= 'the average value was taken and '
                                 . 'precision was estimated.';

                    push @insert_reports, $report_msg;
                    warn "NOTE, $report_msg\n";
                    next;
                }
            }

            if( $temperature_modif =~
                /^ ($temp_dec)?(?:[-_,\s]*)
                        \(?($number_pos)\(?($sigma)?\)?
                        (?:[-_\s]*)(?:$temp_C|(?:\+\s*273(?:[\.\,]\d+)?\)?))
                        (?:[-_,\s]*)\(?($temp_dec)?\)?$
                        /x )  {
                my $number = $2;
                my $sig = $3;
                if( $sig ) {
                    $sig = get_sigma( $number , $sig );
                }
                if( defined $1 || defined $4 ) {
                    my $old_tag = $tag;
                    my $new_tag =
                        '_chemical_temperature_decomposition';
                    $values->{$tag}[$i] =
                        pack_precision
                        ( $number + 273.15, $sig );
                    my $new_val = $values->{$tag}[$i];
                    rename_tag
                        ( $dataset, $old_tag, $new_tag );
                    my $report_msg =
                        "'$old_tag' tag was changed to '$new_tag' " .
                        'since the value had been given as ' .
                        "'$temperature'. " .
                        "The value '$temperature' was changed to " .
                        "'$new_val' - " .
                        'it was converted from degrees Celsius(C) ' .
                        'to Kelvins(K).';
                    push @insert_reports, $report_msg;
                    warn "NOTE, $report_msg\n";
                    next;
                } else {
                    $values->{$tag}[$i] =
                        pack_precision
                        ( $number + 273.15, $sig );
                    my $new_val = $values->{$tag}[$i];
                    my $report_msg =
                        "'$tag' value '$temperature' was changed to " .
                        "'$new_val' - it was converted from degrees " .
                        'Celsius(C) to Kelvins(K).';
                    push @insert_reports, $report_msg;
                    warn "NOTE, $report_msg\n";
                    next;
                }
            }
            if( $temperature_modif !~
                /^$number_pos(?:[(]$sigma[)])?$/x ) {
                if( length($temperature) > 40 ) {
                        $temperature = substr $temperature, 0, 40;
                        $temperature .= '...';
                    }
                warn "WARNING, '$tag' value is '$temperature', but it "
                   . 'should be numeric, i.e. \'FLOAT\' or \'INT\', '
                   . 'permitted range is [0.0;+inf], the value should be '
                   . 'in Kelvins(K) without a unit designator' . "\n";
                next;
            }
        }
    }
    return @insert_reports
}

sub fix_value_of_exptl_crystal_density_meas($) {
    my( $dataset ) = @_;
    my @insert_reports = ();
    my $values = $dataset->{values};
    my @value_tags = (
        '_exptl_crystal_density_meas',
        );
    for my $tag ( @value_tags ) {
        next if !exists $values->{$tag};
        for my $i( 0..$#{$values->{$tag}} ) {
            my $value = $values->{$tag}[$i];
            my $value_modif = $value;
            $value_modif =~ s/^\s+|\s+$//g;

            if( $value_modif =~ /^[.?]$/ ) {
                next;
            }
            if( $value_modif =~
                /^($number_pos|$number_pos\(\d+\))$/ ) {
                next;
            }

            if( $value_modif =~
                /^
                    (?: ' \s*)?
                    $value_not_measured
                    (?: \s* ')?
                $/x ) {
                $values->{$tag}[$i] = '?';
                my $report_msg =
                    "'$tag' value '$value' ".
                    'was changed to \'?\' - the ' .
                    'value is perceived as not measured.';
                push @insert_reports, $report_msg;
                warn "NOTE, $report_msg\n";
                next;
            }

            if( $value_modif =~
                /^
                    (?: ' \s*)?
                    (?> ($number_pos) )
                    # atomic group is used for not to match 150K
                    # otherwise it backtracks and matches: [15,0,K]
                    \s* (?: \( \s* ($sigma) \s* \) )?
                    \s* \(? \s* ($units_equiv_Mg_per_m3)? \s* \)?
                    \s* \(? \s* $measured_at ? \s*
                        (?: ($temp_RT) |
                        ($number_pos | $number_neg)
                        \s* (?: \( \s* ($sigma) \s* \) )?
                        \s* (?: ($temp_C) | ($temp_K) )?
                        )?
                    \s* \)?
                    (?: \s* ')?
                $/x ) {

                my $meas_number = $1;
                my $meas_sigma = $2;
                my $units = $3;
                my $room_temp = $4;
                my $temp_number = $5;
                my $temp_sigma = $6;
                my $temp_Celsius = $7;
                my $temp_Kelvins = $8;

                my $report_msg;

                if( defined $temp_number && $temp_number < 0
                    && ! defined $temp_Celsius ){
                    next;
                }

                if( ! defined $units && ! defined $room_temp
                    && ! defined $temp_number ){
                    next;
                }

                if( defined $meas_sigma ){
                    $meas_sigma = get_sigma( $meas_number, $meas_sigma );
                }

                my $new_meas_value =
                    $values->{$tag}[$i] =
                    pack_precision( $meas_number, $meas_sigma );

                $report_msg = "'$tag' value '$value' was changed " .
                              "to '$new_meas_value'.";

                if( defined $units ){
                    $report_msg .= " Units '$units' were correct but " .
                                   'unnecessary, so they were removed.';
                }

                if( defined $room_temp || defined $temp_number ) {

                    my $new_tag = $tag . '_temp';
                    my $temp_value;

                    if( defined $room_temp ){
                        $temp_value = '295(2)';
                    }

                    if( defined $temp_number ){
                        if( defined $temp_sigma ){
                            $temp_sigma = get_sigma( $temp_number, $temp_sigma );
                        }
                        $temp_value = pack_precision
                        ( $temp_number + ( defined $temp_Celsius ? 273.15 : 0 ),
                          $temp_sigma );
                    }

                    set_tag( $dataset, $new_tag, $temp_value );

                    $report_msg .=
                        " A new data item '$new_tag' was created with the " .
                        "value set to '$temp_value'" .
                            ( defined $temp_Celsius ? ' - it was converted ' .
                             'from degrees Celsius(C) to Kelvins(K)' : '' ) .
                        '.';
                }

                push @insert_reports, $report_msg;
                warn "NOTE, $report_msg\n";
                next;
            }
        }
    }
    return @insert_reports
}

sub fix_value_of_refine_ls_weighting_scheme($) {
    my( $dataset ) = @_;
    my @insert_reports = ();
    my $values = $dataset->{values};
    my $tag = '_refine_ls_weighting_scheme';
    my $enums = join '|', @{ $default_enums{$tag} };

    return () if !exists $values->{$tag};

    for my $i( 0..$#{$values->{$tag}} ) {
        my $value = $values->{$tag}[$i];
        my $value_modif = $value;
        $value_modif =~ s/^\s+|\s+$//g;

        # Silently removes spaces
        if( $value_modif =~ /^([.?]|$enums)$/ ) {
            $values->{$tag}[$i] = $1;
            next;
        }

        next if( $value_modif =~ /\?$/ );

        if( $value_modif =~ /^$value_not_measured$/ ) {
            $values->{$tag}[$i] = '?';

            my $report_msg =
                "'$tag' value '$value' ".
                'was changed to \'?\' - the ' .
                'value is perceived as not measured.';
            push @insert_reports, $report_msg;
            warn "NOTE, $report_msg\n";
            next;
        }

        if( $value_modif =~ /^\s*
            (?: 1 (?:\.0*)? |
                $unit_weights 
            ) \s*
            $/xi ) {
            my $new_value = 'unit';
            $values->{$tag}[$i] = $new_value;

            my $report_msg =
                "'$tag' value '$value' ".
                "was changed to '$new_value'.";
            push @insert_reports, $report_msg;
            warn "NOTE, $report_msg\n";
            next;
        }

        if( $value_modif =~
            /^
                (?: (calc\b)? \s*
                    (w \s* (?:\^-1\^)? \s* ={1,2} \s*)?
                    (.*)
                )
            $/xi ) {

            my $calc = $1;
            my $w = $2;
            my $weighting_value = $3;

            if( defined $w || $weighting_value =~ m/^ \s*
                    (?:  $number_pos \s* \/
                    |    4\(?F 
                    |    [\[({](?!w) 
                    )
                /xi ) {

                $values->{$tag}[$i] = 'calc';

                my $report_msg;

                $report_msg =
                    "'$tag' value '$value' was changed to 'calc'." ;

                my $new_value = ( defined $w ? $w : 'w = ' ) .
                                  $weighting_value;

                my $new_tag = '_refine_ls_weighting_details';

                set_tag( $dataset, $new_tag, $new_value );

                $report_msg .= " A new data item '$new_tag' was created with " .
                    'the value set to ' . "'$new_value'.";

                push @insert_reports, $report_msg;
                warn "NOTE, $report_msg\n";
                next;

            }

            if( length($value_modif) > 40 ) {
                    $value_modif = substr $value_modif, 0, 40;
                    $value_modif .= '...';
                }
            warn "WARNING, '$tag' value is '$value_modif', but it "
               . 'must be one of the enumeration values '
               . '[' . join( ', ', @{$default_enums{$tag}} ) . "]\n";
        }
    }

    return @insert_reports
}

sub fix_value_of_atom_sites_solution($) {
    my( $dataset ) = @_;
    my @insert_reports;
    my $values = $dataset->{values};
    foreach my $tag ( qw(   _atom_sites_solution_primary
                            _atom_sites_solution_secondary
                            _atom_sites_solution_hydrogens ) ) {
        next if !exists $values->{$tag};
        for my $i (0..$#{$values->{$tag}}) {
            my $value = $values->{$tag}[$i];

            $value =~ s/^\s*direct methods?\s*$/direct/i;

            $value =~ s/^\s*heavy[ \-]atom([ \-]method)?\s*$/heavy/i;

            $value =~ s/^\s*geometric( positions|ally( placed)?)?\s*$/geom/i;
            $value =~ s/^\s*placed geometrically\s*$/geom/i;

            $value =~ s/^diffmap$/difmap/i;
            $value =~ s/^\s*diff?(erence)?([ \-]fourier)? maps?\s*$/difmap/i;

            $value =~ s/^\s*diff?(map)?(\s+and\s+|\s*[&\/,+]\s*)geom\s*$/mixed/i;
            $value =~ s/^\s*geom(\s+and\s+|\s*[&\/,+]\s*)diff?(map)?\s*$/mixed/i;

            if( $values->{$tag}[$i] ne $value ) {
                my $old_value = $values->{$tag}[$i];
                $values->{$tag}[$i] = $value;

                my $report_msg = "'$tag' value '$old_value' " .
                                 "was changed to '$value'.";
                push @insert_reports, $report_msg;
                warn "NOTE, $report_msg\n";
            }
        }
    }

    return @insert_reports
}

sub fix_enums($) {
    my( $dataset ) = @_;
    my %reports;
    my @uniq_messages;
    my @insert_reports = ();
    my $values = $dataset->{values};
    my $tags = $dataset->{tags};
    foreach my $tag( @{$tags} ) {
        next if !exists $values->{$tag};
        foreach my $tag_value( @{$values->{$tag}} ) {
            next if ( $treat_as_set && grep { /^$tag$/ } @default_set_tags );

            next if( $tag_value =~ /^[.?]$/ );

            next if !exists $dictTags{$tag};

            my $message_key;

            my @replacement_list =
                replacement_candidates( $tag_value,
                                        $dictTags{$tag} );

            next if !@replacement_list;

            if( scalar( @replacement_list ) == 1 ) {
                my $new_value = shift @replacement_list;
                my $old_value = $tag_value;
                $tag_value = $new_value;
                $message_key =
                    "NOTE, '$tag' value '$old_value' " .
                    "changed to '$new_value' " .
                    "according to $tagDicts{$tag}[0] " .
                    'dictionary' .
                    (defined $tagDicts{$tag}[1] ?
                     " named '$tagDicts{$tag}[1]'" : '') .
                     (defined $tagDicts{$tag}[2] ?
                      " version $tagDicts{$tag}[2]" : '') .
                      (defined $tagDicts{$tag}[3] ?
                       " from $tagDicts{$tag}[3]" : '');
            } else {
                my $dict_values = join ', ', @replacement_list;
                my $val = $tag_value;
                $val =~ s/^\n|\n$//g;
                if( length($val) > 30 ) {
                    $val = substr $val, 0, 30;
                    $val .= '...';
                }
                $message_key =
                    "WARNING, '$tag' value '$val' must be one of "
                  . 'the enumeration values [' . $dict_values . '] '
                  . "according to $tagDicts{$tag}[0] dictionary" .
                    (defined $tagDicts{$tag}[1] ?
                     " named '$tagDicts{$tag}[1]'" : '') .
                     (defined $tagDicts{$tag}[2] ?
                      " version $tagDicts{$tag}[2]" : '') .
                      (defined $tagDicts{$tag}[3] ?
                       " from $tagDicts{$tag}[3]" : '');
            }
            if(! exists $reports{$message_key} ) {
                $reports{$message_key} = 0;
                push @uniq_messages, $message_key;
            }
            $reports{$message_key} ++;
        }
    }

    my @report_messages = make_count( \%reports, \@uniq_messages );
    foreach my $report( @report_messages ) {
        if ( $report =~ /^(WARNING,\s+)(.+)$/ ) {
            warn "WARNING, $2\n";
        } elsif( $report =~ /^(NOTE,\s+)(.+)$/ ) {
            warn "NOTE, $2\n";
            push @insert_reports, $2;
        }
    };

    return @insert_reports;
}

sub treat_as_set($) {
    my( $dataset ) = @_;
    my %reports;
    my @uniq_messages;
    my @insert_reports = ();
    my $values = $dataset->{values};
    my $tags = $dataset->{tags};
    foreach my $tag( @{$tags} ) {
        next if !grep { /^$tag$/ } @default_set_tags;
        next if !exists $values->{$tag};
        my $set_values = join '', @{$default_enums{$tag}};
        # '.' specifies that none of the flags were set so it should 
        # not be in the concatenated string
        $set_values =~ s/\.//;
        my $dict_info = (defined $tagDicts{$tag}[1] ?
                        " named '$tagDicts{$tag}[1]'" : '') .
                        (defined $tagDicts{$tag}[2] ?
                        " version $tagDicts{$tag}[2]" : '') .
                        (defined $tagDicts{$tag}[3] ?
                        " from $tagDicts{$tag}[3]" : '');

        foreach my $tag_value( @{$values->{$tag}} ) {
            next if( $tag_value =~ /^[.?]$/ );

            my $message_key = undef;
            if ( $tag_value =~ /[^$set_values]/i ) {
                my $dict_values = join ', ', @{$default_enums{$tag}};
                my $val = $tag_value;
                $val =~ s/^\n|\n$//g;
                if( length($val) > 30 ) {
                    $val = substr $val, 0, 30;
                    $val .= '...';
                }
                $message_key = "WARNING, '$tag' value '$val' should "
                             . 'only contain a combination of the '
                             . "enumeration values [$dict_values] "
                             . "according to $tagDicts{$tag}[0] "
                             . 'dictionary' . $dict_info;
            } else {
                my $new_value = '';
                my $val = $tag_value;
                foreach( split m//, $val ) {
                    my @replacement_list =
                        replacement_candidates( $_, $dictTags{$tag} );

                    if( scalar( @replacement_list ) == 1 ) {
                        $new_value .= shift @replacement_list;
                    } else  {
                        $new_value .= $_;
                    }
                }

                my $old_value = $tag_value;

                if ($new_value =~ /([$values])[^\1]*\1/ ) {
                    $message_key =
                        "WARNING, '$tag' value '$new_value' " .
                        "('$old_value' before processing) " .
                        'should not contain duplicate values ' .
                        "according to $tagDicts{$tag}[0] " .
                        'dictionary' . $dict_info;
                } elsif ($tag_value ne $new_value) {
                    $tag_value = $new_value;
                    $message_key =
                        "NOTE, '$tag' value '$old_value' " .
                        "changed to '$new_value' " .
                        "according to $tagDicts{$tag}[0] " .
                        'dictionary' . $dict_info;
                }
            }

            if ( defined $message_key ) {
                if(! exists $reports{$message_key} ) {
                    $reports{$message_key} = 0;
                    push @uniq_messages, $message_key;
                }
                $reports{$message_key} ++;
            }
        }
    }

    my @report_messages = make_count( \%reports, \@uniq_messages );
    foreach my $report( @report_messages ) {
        if ( $report =~ /^(WARNING,\s+)(.+)$/ ) {
            warn "WARNING, $2\n";
        } elsif( $report =~ /^(NOTE,\s+)(.+)$/ ) {
            warn "NOTE, $2\n";
            push @insert_reports, $2;
        }
    };

    return @insert_reports;
}

my @dictionary_tags = ( @COD::CIF::Tags::DictTags::tag_list,
                        @COD::CIF::Tags::COD::tag_list,
                        @COD::CIF::Tags::TCOD::tag_list,
                        @COD::CIF::Tags::DFT::tag_list );
my %dictionary_tags = map { $_ => $_ } @dictionary_tags;

@ARGV = ('-') unless @ARGV;

for my $filename (@ARGV) {
    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );

    canonicalize_all_names( $data );

    for my $dataset( @{$data} ) {
        my @insert_reports = ();

        my $dataname = 'data_' . $dataset->{name} if defined $dataset->{name};

        local $SIG{__WARN__} = sub { process_warnings( {
                                       'message'       => @_,
                                       'program'       => $0,
                                       'filename'      => $filename,
                                       'add_pos'       => $dataname
                                     }, $die_on_error_level ) };

        eval {
            if( $fix_temperature ) {
                my @temperature_reports = fix_temperature( $dataset );
                push @insert_reports, @temperature_reports;
            }
            if( $fix_misspelled_values ) {
                my @misspell_reports =
                        fix_misspelled_values( $dataset, \%value_spelling );
                push @insert_reports, @misspell_reports;
            }
            if( $fix_enums ) {
                my @enums_reports = fix_enums( $dataset );
                push @insert_reports, @enums_reports;
            }
            if ( $fix_value_of_exptl_crystal_density_meas ) {
                my @reports_of_exptl_crystal_density_meas =
                    fix_value_of_exptl_crystal_density_meas( $dataset );
                push @insert_reports, @reports_of_exptl_crystal_density_meas;
            }
            if( $fix_value_of_refine_ls_weighting_scheme ) {
                my @reports_of_refine_ls_weighting_scheme =
                    fix_value_of_refine_ls_weighting_scheme( $dataset );
                push @insert_reports, @reports_of_refine_ls_weighting_scheme;
            }

            if( $fix_value_of_atom_sites_solution ) {
                my @reports_of_atom_sites_solution =
                    fix_value_of_atom_sites_solution( $dataset );
                push @insert_reports, @reports_of_atom_sites_solution;
            }

            if( $treat_as_set ) {
                my @set_reports = treat_as_set( $dataset );
                push @insert_reports, @set_reports;
            }

            insert_report_to_comments( $dataset , \@insert_reports );

            print_cif( $dataset, {
                exclude_misspelled_tags => 0,
                preserve_loop_order => 1,
                fold_long_fields => 0,
                dictionary_tags => \%dictionary_tags,
                dictionary_tag_list => \@dictionary_tags,
                keep_tag_order => $keep_tag_order,
            });
        };
        if ($@) {
            process_errors( {
              'message'       => $@,
              'program'       => $0,
              'filename'      => $filename,
              'add_pos'       => $dataname },
               $die_on_errors );
        };
    }
}
