#!/usr/bin/python2

# convert bedic-like dictionary into .tei format
# http://kbedic.sourceforge.net/
# entries are separated by NULL character
# first line in entry is WORD, other lines are definition

def tran(input, output, s):
                r = ''
                for c in s:
                        i = string.find(input, c)
                        if i >= 0: c = output[i]
                        r = r+c
                return r


def low(s):
    return tran('', 'ţ', s)


import sys, string


f = open(sys.argv[1], "r")

pos = 0
lastlinepos = 0

inword = 0
word = ""


def teiheader():
    print """<!DOCTYPE TEI.2       PUBLIC "-//TEI P3//DTD Main Document Type//EN"  [
   <!ENTITY % TEI.dictionaries "INCLUDE" >
]>

<tei.2>
  <teiHeader>
    <filedesc>
      <titlestmt>
        <title>           </title>
      </titlestmt>
      <publicationstmt>
        <authority></authority>
      </publicationstmt>
      <sourcedesc>
        <p>http://kbedic.sourceforge.net/</p>
      </sourcedesc>
    </filedesc>
  </teiHeader>
"""    

words = {}

block = ""

teiheader()
print "<text>"
print "<body>"

def readentry(f):
    r = []
    while 1:
        c = f.read(1)
        if c == '' and r == []:
            return None
        if c == '\0':
            return string.join(r,'')
        r.append(c)

while 1:
    #i = readblock(f) # or f.readline
    i = readentry(f)
    #print "fasz", `i`
    if i=='':
        continue
    if i==None:
        break
    word, definition = string.split(i, '\n', 1)
    definition = string.replace(definition, '<', '&lt;')
    definition = string.replace(definition, '>', '&gt;')
    dl = string.split(definition, "\n")
    one=word
    two = dl
    
    if words.has_key(one):
        words[one].extend(two)
    else:
        words[one] = two

for i, j in words.items():
    print "<entry>"
    print "  <form>"
    print "    <orth>"+i+"</orth>"
    print "  </form>"
    print "  <trans>"
    for k in j:
        print "    <tr>"+k+"</tr>"
    print "  </trans>"
    print "</entry>"

print "</body>"
print "</text>"

print "</tei.2>"
