#!/usr/bin/perl

#  Yudit Unicode Editor Source File
#
#  Copyright (C) 2000  Gaspar Sinai <gsinai@yudit.org>  
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# 
#  This script makes a compressed precompose map.
#  Compressed means: for a range only the first one will be 
#  encoded.
#  In order to run the script you need to get
#   ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
#  as and input.
#  encode composing chars -> precomposed char
#  decode precomposed char -> composing chars

%BiDiCategory = (
  "XX",  0x00, #  Unknown
  "L",   0x01, #  Left-to-Right
  "LRE", 0x02, #  Left-to-Right Embedding
  "LRO", 0x03, #  Left-to-Right Override
  "R",   0x04, #  Right-to-Left
  "AL",  0x05, #  Right-to-Left Arabic
  "RLE", 0x06, #  Right-to-Left Embedding
  "RLO", 0x07, #  Right-to-Left Override
#
#  /* weak */
  "PDF", 0x08, #  Pop Directional Format
  "EN",  0x09, #  European Number
  "ES",  0x0A, #  European Number Separator
  "ET",  0x0B, #  European Number Terminator
  "AN",  0x0C, #  Arabic Number
  "CS",  0x0D, #  Common Number Separator
  "NSM", 0x0E, #  Non-Spacing Mark
  "BN",  0x0F, #  Boundary Neutral
#
#  /* neutral */
  "B",   0x10, #  Paragraph Separator
  "S",   0x11, #  Segment Separator
  "WS",  0x12, #  Whitespace
  "ON",  0x13  #  Other Neutrals
);

@encodes = ();
@decodes = ();

@full_date = localtime(time);
$year = $full_date[5] + 1900;
$mon = $full_date[4] + 1;
$mday = $full_date[3];
$date = sprintf ("%4d-%02d-%02d", $year, $mon, $mday);
$name="precompose.my";

print <<EOD;
NAME=bidiclass.my
COMM=
COMM= generated by $0 $date
COMM= 
COMM= from ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt 
COMM=
#  /* strong */
#  XX     # 00 Unknown
#  L      # 01 Left-to-Right
#  LRE    # 02 Left-to-Right Embedding
#  LRO    # 03 Left-to-Right Override
#  R      # 04 Right-to-Left
#  AL     # 05 Right-to-Left Arabic
#  RLE    # 06 Right-to-Left Embedding
#  RLO    # 07 Right-to-Left Override
#
#  /* weak */
#  PDF    # 08 Pop Directional Format
#  EN     # 09 European Number
#  ES     # 0A European Number Separator
#  ET     # 0B European Number Terminator
#  AN     # 0C Arabic Number
#  CS     # 0D Common Number Separator
#  NSM    # 0E Non-Spacing Mark
#  BN     # 0F Boundary Neutral
#
#  /* neutral */
#  B      # 10 Paragraph Separator
#  S      # 11 Segment Separator
#  WS     # 12 Whitespace
#  ON     # 13 Other Neutrals
#
# This ia a compressed map: that means all characters in the range are
# represented by the first element only. Holes are represented with 0.
#
COMM=Informative Categories
COMM=Compressed Range Format
TYPE=0
SECTION=encode
ENCODE=1
#
# key 2 for 32 bit (32-bitunicode)
# value 0 for 8 bit (category) values 
#
KEY_WIDTH=2
VALUE_WIDTH=0
KEY_LENGTH=0
VALUE_LENGTH=0
EOD


$last = -1;
$lastvle = -2;

while (<>)
{
  next if (split (';')) < 6;
  $plain = hex $_[0];
  $general = $_[4];
  if (!defined ($BiDiCategory {$general}))
  {
    $general = 0;
  }
  else
  {
    $general = $BiDiCategory {$general};
  }
  if ($last+1 == $plain && $lastvle == $general)
  {
    $last++;
    next;
  }  
  # hole
  # Ranges out of hole 
  if ($last+1 < $plain && $_[1]!~/Last>/)
  {
    printf ("%08X -> %02X$/", $last+1, 0);
  }
  printf ("%08X -> %02X$/", $plain, $general);
  $last = $plain;
  $lastvle = $general;
}
printf ("%08X -> %02X$/", $last+1, 0);

print "$/#END$/";
