/*
Copyright (C) 2003 by Sean David Fleming

sean@power.curtin.edu.au

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

The GNU GPL can also be found at http://www.gnu.org
*/

#include <math.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#include <ctype.h>
#include <time.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>

#ifndef __WIN32
#include <sys/param.h>
#include <sys/wait.h>
#endif

#include "gdis.h"
#include "file.h"
#include "parse.h"
#include "keywords.h"
#include "interface.h"

/* main structures */
extern struct sysenv_pak sysenv;
extern struct elem_pak elements[];

/*****************************************/
/* strip off path and extension (if any) */
/*****************************************/
gchar *strdup_basename(const gchar *name)
{
gint i;
gchar *base, *temp;

temp = g_path_get_basename(name);

/* get the rightmost '.' */
for (i=strlen(temp) ; i-- ; )
  if (*(temp+i) == '.')
    break;

/* not found - return the whole thing */
if (!i)
  return(temp);

/* if a '.' was found then malloc a truncated string, otherwise */
base = g_strndup(temp, i);
free(temp);

return(base);
}

/**************************************/
/* search for a character in a string */
/**************************************/
gchar *find_char(const gchar *text, gint target, gint search_type)
{
gint i;

switch(search_type)
  {
  case LAST:
    for (i=strlen(text) ; i-- ; )
      if (text[i] == target)
        return((gchar *) text+i);
    break;
  default:
    g_assert_not_reached();
  }
return(NULL);
}

/*******************************************************/
/* get a float from a string (even in fraction format) */
/*******************************************************/
/* TODO - fortran prints a bunch of *'s if the number is too big - */
/* find a way to cope with this */
/* TODO - cif files have () at the end of coords (error - std), should */
/* ignore everything inside & including the brackets */
#define DEBUG_STR_TO_FLOAT 0
gdouble str_to_float(const gchar *txt)
{
gint i;
gchar *ptr, *str;
gdouble num, den, val;

/* return 0.0 for NULL string */
if (txt == NULL)
  return(0.0);

/* if we have a backslash, process number as a fraction */
str = g_strdup(txt);
ptr = find_char(str, '/', LAST);
if (ptr)
  {
#if DEBUG_STR_TO_FLOAT
printf("processing: %s -> ",str);
#endif
  num = g_ascii_strtod(str, NULL);
  den = g_ascii_strtod(++ptr, NULL);

  if (den == 0.0)
    den = 1.0;
  val = num/den;
#if DEBUG_STR_TO_FLOAT
printf("[%f]\n",val);
#endif
  }
else
  {
/* remove any equal signs */
  for (i=0 ; i<strlen(str) ; i++)
    if (*(str+i) == '=')
      *(str+i) = ' ';

  val = g_ascii_strtod(g_strstrip(str), NULL);
  }
g_free(str);
return(val);
}

/* Return a list of keywords found */
/* NEW - match all keywords, not just space separated ones */
#define DEBUG_GET_KEYWORDS_ANYWHERE 0
GSList *get_keywords_anywhere(gchar *str)
{
gint i, j=0, n;
GSList *list=NULL;

#if DEBUG_GET_KEYWORD_ANYWHERE
printf("extracted: ");
#endif

n = num_keys();
for(i=0 ; i<n ; i++)
  {
  if (strstr(str, keywords[i].label) != NULL)
    {
#if DEBUG_GET_KEYWORD_ANYWHERE
printf(" %d",keywords[i].code);
#endif
    list = g_slist_prepend(list, GINT_TO_POINTER(keywords[i].code));
    j++;
    }
  }
list = g_slist_reverse(list);

#if DEBUG_GET_KEYWORD_ANYWHERE
printf("\nKeywords found: %d\n", j);
#endif

return(list);
}

/* Return a list of keywords found (iff space separated!) */
/* NEW - replacement for below routine, avoids alloc/free worries */
#define DEBUG_GET_KEYWORDS 0
GSList *get_keywords(gchar *str)
{
gint i, j, n, len, num_tokens;
gchar **buff;
GSList *list=NULL;

#if DEBUG_GET_KEYWORDS
printf("extracting from: %s\n", str);
#endif

buff = tokenize(str, &num_tokens);

i=n=0;
while(i < num_tokens)
  {
  if (*(buff+i) == NULL)
    break;
/* default keyword code - nothing */
  j=0;
  while(keywords[j].code != -1)
    {
    len = strlen(keywords[j].label);
    if (g_ascii_strncasecmp(*(buff+i), keywords[j].label, len) == 0)
      {
#if DEBUG_GET_KEYWORDS
printf(" %d",keywords[j].code);
#endif
      list = g_slist_prepend(list, GINT_TO_POINTER(keywords[j].code));
      n++;
      }
    j++;
    }
  i++;
  }
list = g_slist_reverse(list);

g_strfreev(buff);
#if DEBUG_GET_KEYWORDS
printf("\nKeywords found: %d\n", n);
#endif

return(list);
}

/***********************************************************/
/* hash table function for comparing two character strings */
/***********************************************************/
gint hash_strcmp(gconstpointer a, gconstpointer b)
{
if (g_ascii_strcasecmp(a, b) == 0)
  return(TRUE);
return(FALSE);
}

/**************************************************************/
/* return a string of keyword code (if any) found in a string */
/**************************************************************/
/* 1st item -> number actually found */
#define DEBUG_GET_KEYWORD 0
gint *get_keyword(gchar *str, gint max)
{
gint i, j, n, len, num_tokens;
gchar **buff;
gint *list;

#if DEBUG_GET_KEYWORD
printf("extracted: ");
#endif

list = g_malloc((max+1) * sizeof(gint));
/*
buff = get_tokens(str, MAX_TOKENS);
*/
buff = tokenize(str, &num_tokens);

n=1;
i=0;
while(i < num_tokens)
  {
/* default keyword code - nothing */
  *(list+n) = -1;
  j=0;
  while(keywords[j].code != -1)
    {
    len = strlen(keywords[j].label);
    if (g_ascii_strncasecmp(*(buff+i), keywords[j].label, len) == 0)
      {
#if DEBUG_GET_KEYWORD
printf(" %d",keywords[j].code);
#endif
      *(list+n) = keywords[j].code;
      if (++n == max+1)
        goto get_keyword_done;
      }
    j++;
    }
  i++;
  }
get_keyword_done:;
g_strfreev(buff);
*list = n-1;
#if DEBUG_GET_KEYWORD
printf("\n");
#endif

return(list);
}

gint num_keys(void)
{
gint n;

n=0;
while (keywords[n].code != -1)
  n++;
/*
printf("Found %d keywords\n",n);
*/
return(n);
}

/*****************************************/
/* get a token's keyword number (if any) */
/*****************************************/
gint get_keyword_code(const gchar *token)
{
gint j, len;

j=0;
while(keywords[j].code != -1)
  {
  len = strlen(keywords[j].label);
  if (g_ascii_strncasecmp(token, keywords[j].label, len) == 0)
    return(j);
  j++;
  }
return(-1);
}

/*********************/
/* tokenize a string */
/*********************/
/* replacement routine for get_tokens */
/* will get as many tokens as available (no more messing with MAX_TOKENS) */
#define DEBUG_TOKENIZE 0
gchar **tokenize(const gchar *src, gint *num)
{
gint i, j, n, len;
gchar *tmp, *ptr;
gchar **dest;
GSList *list=NULL, *item=NULL;

g_assert(src != NULL);

/* duplicate & replace all whitespace with a space */
tmp = g_strdup(src);
for (i=0 ; i<strlen(tmp) ; i++)
  if (isspace((int) *(tmp+i)))
    *(tmp+i) = ' ';

/* strange errors can be avoided if a strstrip is done */
g_strstrip(tmp);

#if DEBUG_TOKENIZE
printf("tokenizing [%s]:\n", tmp);
#endif

len = strlen(tmp);

/* NB: most problems have occured by making MAX_TOKENS too small */
/* for some reason it can need many more than it would apparently seem */
/* problem is multiple separators - giving too many tokens */
/*
buff = g_strsplit(tmp, " ", MAX_TOKENS);
*/

i=n=0;
while(i<len)
  {
/* find end of current token */
  j=i;
  while(!isspace((int) *(tmp+j)) && j<len)
    j++;

/* assign token */
  ptr = g_strndup(tmp+i, j-i);

  list = g_slist_prepend(list, (gpointer *) ptr);
  n++;

/* find start of new token */
  i=j;

  while(isspace((int) *(tmp+i)) && i<len)
    i++;
  }
list = g_slist_reverse(list);

*num = n;
#if DEBUG_TOKENIZE
printf("Found %d tokens: ", n);
#endif

/* return a NULL if no tokens were found */
if (!n)
  {
  free_slist(list);
  return(NULL);
  }

/* num+1 -> last ptr is NULL, so g_strfreev works */
dest = g_malloc((n+1)*sizeof(gchar *));

i=0;
/* fill in the non empty tokens */
item = list;
while (i<n)
  {
  if (item != NULL)
    {
    *(dest+i) = g_strdup((gchar *) item->data);
#if DEBUG_TOKENIZE
printf("%s:", *(dest+i));
#endif
    item = g_slist_next(item);
    }
  else
    {
/* fake item */
    *(dest+i) = g_strdup(" ");;
#if DEBUG_TOKENIZE
printf("(empty token):");
#endif
    }
  i++;
  }

g_assert (i == n);

/* terminate */
*(dest+n) = NULL;

#if DEBUG_TOKENIZE
printf("%p\n",*(dest+n));
#endif

/* done */
g_free(tmp);
free_slist(list);

return(dest);
}

/************************************************/
/* get the next (non-trivial) line and tokenize */
/************************************************/
/* NULL is returned on EOF */
gchar **get_tokenized_line(FILE *fp, gint *num_tokens)
{
gchar **buff, line[LINELEN];

do
  {
  if (fgetline(fp, line))
    return(NULL);

  buff = tokenize(line, num_tokens);
  }
while (!buff);

return(buff);
}

/*********************/
/* tokenize a string */
/*********************/
/* replacement routine for copy_items */
/* aim is to have one call, rather than multiple copy_item calls */
/* trouble is, g_strsplit doesn't eliminate multiple separators */
/* return number found -> check if enough in file parsing */
/* ensure exactly num tokens returned!!! */
/* strlen = 0 if token is empty */
#define DEBUG_GET_TOKENS 0
gchar **get_tokens(gchar *src, gint num)
{
gint i, j;
gchar **buff, **dest, *tmp;

/* duplicate & replace all whitespace with a space */
/* strange errors can be avoided if a strstrip is done */
tmp = g_strdup(src);
for (i=0 ; i<strlen(tmp) ; i++)
  if (isspace((int) *(tmp+i)))
    *(tmp+i) = ' ';
g_strstrip(tmp);

/* NB: most problems have occured by making MAX_TOKENS too small */
/* for some reason it can need many more than it would apparently seem */
buff = g_strsplit(tmp, " ", MAX_TOKENS);

/* num+1 -> last ptr is NULL, so g_strfreev works */
dest = g_malloc((num+1)*sizeof(gchar *));

i=j=0;
/* fill in the non empty tokens */
while (*(buff+i) != NULL && j<num)
  {
  if (strlen(*(buff+i)))
    *(dest+j++) = g_strdup(g_strstrip(*(buff+i)));
  i++;
  }

/* pad with empty strings */
while (j<num)
  *(dest+j++) = g_strdup("");

/* terminate */
*(dest+num) = NULL;

#if DEBUG_GET_TOKENS
for (i=0 ; i<num ; i++)
  printf("%s:",*(dest+i));
printf("%p\n",*(dest+num));
#endif

/* done */
g_strfreev(buff);
g_free(tmp);

return(dest);
}

/* need another routine that gets everything in a line past */
/* a specified point - this will replace copy_items(...ALL) */
gchar *get_token_pos(gchar *src, gint num)
{
gint i,j,n,len;

/* flag the start(i) and end(j) */
len = strlen(src);
i = j = 0;
for (n=0 ; n<=num ; n++)
  {
  i = j;
/* FIXME - use the isspace function here */
  while((*(src+i) == ' ' || *(src+i) == '\t') && i<len)
    i++; 

  j = i;
  while(*(src+j) != ' ' && *(src+j) != '\t' && j<len)
    j++;
  }
 
/* return ptr to position */
return(src+i);
}

