#!/usr/bin/python3

# Copyright © 2012 Jakub Wilk <jwilk@jwilk.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the “Software”), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import collections
import datetime
import os
import sys
import xml.etree.cElementTree as etree

iso_codes_dir = '/usr/share/xml/iso-codes/';

class Panic(ValueError):
    pass

def main():
    basedir = os.path.join(
        os.path.dirname(__file__),
        os.pardir,
    )
    path = os.path.join(basedir, 'data', 'iso-codes')
    sys.stdout = open(path + '.tmp', 'wt', encoding='UTF-8')
    print('''\
# This file has been generated automatically by private/update-iso-codes.
# Do not edit.
# Last update: {today}
'''.format(today=datetime.date.today()))
    generate_iso_639()
    generate_iso_3166()
    sys.stdout.close()
    os.rename(path + '.tmp', path)

def generate_iso_639():
    # =======================
    # ISO 639: language codes
    # =======================
    l2b_to_2t = {}
    iso_639 = {}
    for event, element in etree.iterparse(os.path.join(iso_codes_dir, 'iso_639.xml')):
        if element.tag != 'iso_639_entry':
            continue
        l2b = element.get('iso_639_2B_code')
        l2t = element.get('iso_639_2T_code')
        if l2b == l2t == 'qaa-qtz':
            continue
        for l in l2b, l2t:
            if len(l) != 3:
                raise Panic('len({!r}) != 3'.format(l))
        if l2b != l2t:
            l2b_to_2t[l2b] = l2t
    iso_639 = collections.defaultdict(lambda: [None])
    for event, element in etree.iterparse(os.path.join(iso_codes_dir, 'iso_639_3.xml')):
        if element.tag != 'iso_639_3_entry':
            continue
        code = element.get('id')
        if len(code) != 3:
            raise Panic('len({!r}) != 3'.format(code))
        code1 = element.get('part1_code')
        code2 = element.get('part2_code')
        if code2 is None:
            # We're not insted in languages that are not in 639-2 (yet?).
            continue
        if code2 != code:
            raise Panic('{!r} != {!r}'.format(code, code2))
        scope = element.get('scope')
        if scope in {'S', 'L'}:
            # Not a real language, ignore.
            continue
        elif scope == 'C':
            # We're not interested in collective languages (yet?).
            if code1 is None:
                continue
        elif scope in {'M', 'I'}:
            pass
        else:
            raise Panic('unknown scope: {!r}'.format(scope))
        status = element.get('status')
        if status == 'Active':
            major = True
        elif status == 'Retired':
            continue
        else:
            raise Panic('unknown status: {!r}'.format(status))
        reference_name = element.get('reference_name')
        datum = iso_639[reference_name]
        if code in l2b_to_2t:
            datum += [code]
        else:
            if datum[0] is not None:
                raise Panic
            if code1 is not None:
                if code1.endswith('(deprecated)'):
                    code1 = None
            if code1 is not None:
                if len(code1) == 2:
                    datum[0] = code1
                    datum += [code]
                else:
                    raise Panic('len({!r}) != 2'.format(code1))
            else:
                datum[0] = code
    print('[language-codes]')
    iso_639_rev = {}
    for code, *aliases in iso_639.values():
        for alias in aliases:
            iso_639_rev[alias] = code
        if not aliases:
            iso_639_rev[code] = ''
    for alias, code in sorted(iso_639_rev.items()):
        print('{} = {}'.format(alias, code).rstrip())
    print()

def generate_iso_3166():
    # =========================
    # ISO 3166: territory codes
    # =========================
    iso_3166 = set()
    for event, element in etree.iterparse(os.path.join(iso_codes_dir, 'iso_3166.xml')):
        if element.tag != 'iso_3166_entry':
            continue
        cc = element.get('alpha_2_code')
        if cc is None:
            raise ValueError
        iso_3166.add(cc)
    print('[territory-codes]')
    for cc in sorted(iso_3166):
        print('{} ='.format(cc))
    print()
    print('# vi''m:ft=dosini')

if __name__ == '__main__':
    main()

# vim:ts=4 sw=4 et
