/*

  Copyright (c) 2003,2004 uim Project http://uim.freedesktop.org/

  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions
  are met:

  1. Redistributions of source code must retain the above copyright
     notice, this list of conditions and the following disclaimer.
  2. Redistributions in binary form must reproduce the above copyright
     notice, this list of conditions and the following disclaimer in the
     documentation and/or other materials provided with the distribution.
  3. Neither the name of authors nor the names of its contributors
     may be used to endorse or promote products derived from this software
     without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  SUCH DAMAGE.
*/

// Locale dependent routines

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif

#include <locale.h>
#include <iconv.h>
#include <errno.h>
#include <X11/Xlib.h>
#include <X11/Xutil.h>
#include "ximserver.h"
#ifndef __GNUC__
# ifdef HAVE_ALLOCA_H
#  include <alloca.h>
# endif
#endif

// Return code if invalid. (utf8_mbtowc, utf8_wctomb)
#define RET_ILSEQ	0
// Return code if only a shift sequence of n bytes was read. (utf8_mbtowc)
#define RET_TOOFEW(n)	(-1-(n))

static char *
ustring_to_utf8_str(uString *s)
{
    uString::iterator i;
    int l = 0, nbyte;
    unsigned char utf8[6];
    // count the length
    for (i = s->begin(); i != s->end(); i++) {
	nbyte = utf8_wctomb(utf8, *i);
	l += nbyte;
    }
    char *c = (char *)malloc(l + 1);
    c[l] = 0;
    l = 0;
    for (i = s->begin(); i != s->end(); i++) {
	nbyte = utf8_wctomb(utf8, *i);
	int j;
	for (j = 0; j < nbyte; j++) {
	    c[l] = utf8[j];
	    l++;
	}
    }
    return c;
}

bool
Locale::supportOverTheSpot()
{
    return false;
}

class EUCJP_Locale : public Locale {
public:
    virtual char *uStringToCtext(uString *us, const char *encoding) {
	int len;
	char *t;
	len = us->size() * 10;
	t = (char *)alloca(len + 1);
	uString::iterator i;
	bool jp_mode = false;
	int cur, j = 0;

	for (i = us->begin(); i != us->end(); i++) {
	    cur = *i;
	    if (!(cur & 0xff00)) {
		if (jp_mode) {
		    t[j++] = 0x1b;
		    t[j++] = 0x28;
		    t[j++] = 0x42;
		    jp_mode = false;
		}
		t[j++] = cur & 0xff;
	    } else {
		if (!jp_mode) {
		    jp_mode = true;
		    t[j++] = 0x1b;
		    t[j++] = 0x24;
		    t[j++] = 0x29;
		    t[j++] = 0x42;
		}
		t[j++] = (cur >> 8) | 0x80;
		t[j++] = (cur & 0xff) | 0x80;
	    }
	}
	if (jp_mode) {
	    t[j++] = 0x1b;
	    t[j++] = 0x28;
	    t[j++] = 0x42;
	}
	t[j] = 0;
	return strdup(t);
    };
    virtual bool supportOverTheSpot() {
	return true;
    }
};

char *utf8_to_native_str(char *utf8, const char *enc) {
    iconv_t cd;
    size_t outbufsize = BUFSIZ;
    char *inbuf, *outbuf, *convstr = NULL;
    char *inchar;
    char *outchar;
    size_t inbytesleft, outbytesleft;
    size_t ret_val;
    
    cd = iconv_open(enc, "UTF-8");
    if (cd == (iconv_t)-1) {
	perror("error in iconv_open");
	return NULL;
    }

    inbuf = strdup(utf8);
    if (!inbuf) {
	iconv_close(cd);
	return NULL;
    }
    outbuf = (char *)malloc(outbufsize);
    if (!outbuf) {
	iconv_close(cd);
	free(inbuf);
	return NULL;
    }
    inchar = inbuf;
    outchar = outbuf;
    inbytesleft = strlen(inbuf);
    outbytesleft = outbufsize;
    ret_val = iconv(cd, (ICONV_CONST char **)&inchar, &inbytesleft, &outchar, &outbytesleft);

    if (ret_val == (size_t)-1 && errno != E2BIG) {
	//perror("error in iconv");
	iconv_close(cd);
	free(inbuf);
	free(outbuf);
	return NULL;
    }
    iconv_close(cd);
    convstr = (char *)malloc(outbufsize - outbytesleft + 1);
    if (!convstr) {
	free(inbuf);
	free(outbuf);
	return NULL;
    }
    strncpy(convstr, outbuf, outbufsize - outbytesleft);
    convstr[outbufsize - outbytesleft] = '\0';
    free(outbuf);
    free(inbuf);
    return convstr;
}

class UTF8_Locale : public Locale {
public:
    UTF8_Locale::UTF8_Locale(const char *lang);
    virtual char *uStringToCtext(uString *us, const char *encoding) {
	char *str = ustring_to_utf8_str(us);
	XTextProperty prop;

	if (!strcmp(encoding, "UTF-8")) {
	    XmbTextListToTextProperty(XimServer::gDpy, &str, 1,
			    XCompoundTextStyle, &prop);
	    free(str);
	} else {
	    char *native_str;
	    
	    native_str = utf8_to_native_str(str, encoding);
	    free(str);
	    if (!native_str)
		return NULL;

	    XmbTextListToTextProperty(XimServer::gDpy, &native_str, 1,
			    XCompoundTextStyle, &prop);
	    free(native_str);
	}
	char *res = strdup((char *)prop.value);
	XFree(prop.value);
	return res;
    }
    virtual bool supportOverTheSpot() {
	return true;
    }
    virtual void set_localename_from_im_lang(const char *im_lang);
private:
    char *mLocaleName;
};

UTF8_Locale::UTF8_Locale(const char *im_lang)
{
    mLocaleName = strdup(compose_localenames_from_im_lang(im_lang));
}

const char *
compose_localenames_from_im_lang(const char *im_lang)
{
    const char *name;
    // XXX Should we use table?  Also I haven't checked m17nlib yet.
    // --ekato
    if (!strcmp(im_lang, "ja"))
	name = "ja_JP";
    else if (!strcmp(im_lang, "ko"))
	name = "ko_KR";
    else if (!strcmp(im_lang, "zh_CN"))
	name = "zh_CN";
    else if (!strcmp(im_lang, "zh_TW:zh_HK"))
	name = "zh_TW:zh_HK";
    else if (!strcmp(im_lang, "vi"))
	name = "vi_VN";
    else
	name = "en_US";	// "*" and "" for now

    return name;
}

bool
is_locale_included(const char *locales, const char *locale)
{
    char *sep, *tmp, *first;
    tmp = strdup(locales);
    first = tmp;

    while ((sep = strchr(tmp, ':')) != NULL) {
	*sep = '\0';
	if (!strcmp(tmp, locale)) {
	    free(first);
	    return true;
	}
	tmp = sep + 1;
    }
    if (!strcmp(tmp, locale)) {
	free(first);
	return true;
    }
    free(first);

    return false;
}

char *
get_first_locale(const char *locales)
{
    char *locale = strdup(locales);
    char *sep;
    sep = strchr(locale, ':');
    if (sep)
	*sep = '\0';
    return locale;
}

void
UTF8_Locale::set_localename_from_im_lang(const char *im_lang)
{
    const char *name;
    name = compose_localenames_from_im_lang(im_lang);
    
    if (mLocaleName)
	free(mLocaleName);
    mLocaleName = strdup(name);
}

Locale *createLocale(const char *im_lang)
{
    return new UTF8_Locale(im_lang);
}

int
utf8_mbtowc(uchar *wc, const unsigned char *src, int src_len)
{
    if (!wc)
	return 0;

    unsigned char c = src[0];
    if (c < 0x80) {
	*wc = c;
	return 1;
    } else if (c < 0xc2) {
	return RET_ILSEQ;
    } else if (c < 0xe0) {
	if (src_len < 2)
	    return RET_TOOFEW(0);
	if (!((src[1] ^ 0x80) < 0x40))
	    return RET_ILSEQ;
	*wc = ((uchar)(c & 0x1f) << 6) | (uchar)(src[1] ^ 0x80);
	return 2;
    } else if (c < 0xf0) {
	if (src_len < 3)
	    return RET_TOOFEW(0);
	if (!((src[1] ^ 0x80) < 0x40 &&
	      (src[2] ^ 0x80) < 0x40 &&
	      (c >= 0xe1 || src[1] >= 0xa0)))
	    return RET_ILSEQ;
	*wc = ((uchar)(c & 0x0f) << 12) |
	      ((uchar)(src[1] ^ 0x80) << 6) |
	      (uchar)(src[2] ^ 0x80);
	return 3;
    } else if (c < 0xf8) {
	if (src_len < 4)
	    return RET_TOOFEW(0);
	if (!((src[1] ^ 0x80) < 0x40 &&
	      (src[2] ^ 0x80) < 0x40 &&
	      (src[3] ^ 0x80) < 0x40 &&
	      (c >= 0xf1 || src[1] >= 0x90)))
	    return RET_ILSEQ;
	*wc = ((uchar)(c & 0x07) << 18) |
	      ((uchar)(src[1] ^ 0x80) << 12) |
	      ((uchar)(src[2] ^ 0x80) << 6) |
	      (uchar)(src[3] ^ 0x80);
	return 4;
    } else if (c < 0xfc) {
	if (src_len < 5)
	    return RET_TOOFEW(0);
	if (!((src[1] ^ 0x80) < 0x40 &&
	      (src[2] ^ 0x80) < 0x40 &&
	      (src[3] ^ 0x80) < 0x40 &&
	      (src[4] ^ 0x80) < 0x40 &&
	      (c >= 0xf9 || src[1] >= 0x88)))
	    return RET_ILSEQ;
	*wc = ((uchar)(c & 0x03) << 24) |
	      ((uchar)(src[1] ^ 0x80) << 18) |
	      ((uchar)(src[2] ^ 0x80) << 12) |
	      ((uchar)(src[3] ^ 0x80) << 6) |
	      (uchar)(src[4] ^ 0x80);
	return 5;
    } else if (c < 0xfe) {
	if (src_len < 6)
	    return RET_TOOFEW(0);
	if (!((src[1] ^ 0x80) < 0x40 &&
	      (src[2] ^ 0x80) < 0x40 &&
	      (src[3] ^ 0x80) < 0x40 &&
	      (src[4] ^ 0x80) < 0x40 &&
	      (src[5] ^ 0x80) < 0x40 &&
	      (c >= 0xfd || src[1] >= 0x84)))
	    return RET_ILSEQ;
	*wc = ((uchar)(c & 0x01) << 30) |
	      ((uchar)(src[1] ^ 0x80) << 24) |
	      ((uchar)(src[2] ^ 0x80) << 18) |
	      ((uchar)(src[3] ^ 0x80) << 12) |
	      ((uchar)(src[4] ^ 0x80) << 6) |
	      (uchar)(src[5] ^ 0x80);
	return 6;
    } else
	return RET_ILSEQ;
}

int
utf8_wctomb(unsigned char *dest, uchar wc)
{
    if (!dest)
	return 0;

    int count;
    if (wc < 0x80)
	count = 1;
    else if (wc < 0x800)
	count = 2;
    else if (wc < 0x10000)
	count = 3;
    else if (wc < 0x200000)
	count = 4;
    else if (wc < 0x4000000)
	count = 5;
    else if (wc <= 0x7fffffff)
	count = 6;
    else
	return RET_ILSEQ;
    switch (count) { // note: falls through cases (no break)
    case 6:
	dest[5] = 0x80 | (wc & 0x3f);
	wc = wc >> 6; wc |= 0x4000000;
    case 5:
	dest[4] = 0x80 | (wc & 0x3f);
	wc = wc >> 6; wc |= 0x200000;
    case 4:
	dest[3] = 0x80 | (wc & 0x3f);
	wc = wc >> 6; wc |= 0x10000;
    case 3:
	dest[2] = 0x80 | (wc & 0x3f);
	wc = wc >> 6; wc |= 0x800;
    case 2:
	dest[1] = 0x80 | (wc & 0x3f);
	wc = wc >> 6; wc |= 0xc0;
    case 1:
	dest[0] = wc;
    }
    return count;
}
