/* relational.c:
 *
 ****************************************************************
 * Copyright (C) 2003 Tom Lord
 *
 * See the file "COPYING" for further information about
 * the copyright and warranty status of this work.
 */


#include "hackerlab/os/stdarg.h"
#include "hackerlab/vu/safe.h"
#include "hackerlab/vu/safe-vu-utils-vfdbuf.h"
#include "hackerlab/arrays/ar.h"
#include "hackerlab/sort/qsort.h"
#include "hackerlab/char/char-class.h"
#include "hackerlab/char/str.h"
#include "hackerlab/char/pika-escaping-utils.h"
#include "libawk/relational.h"


/* __STDC__ prototypes for static functions */
static int rec_cmp_by_field (void * va, void * vb, void * vdata);
static int rec_cmp_by_field_fn (void * va, void * vb, void * vdata);
static int rec_cmp_by_fields (void * va, void * vb, void * vdata);
static rel_record rel_read_record (int fd,
                                   int n_fields,
                                   char * err_name,
                                   char * err_src);
static rel_record rel_read_pika_unescape_iso8859_1_record (int fd,
                                                           int n_fields,
                                                           char * err_name,
                                                           char * err_src);
static void rel_print_record (int fd, rel_record rec);
static void rel_print_pika_escape_iso8859_1_record (int fd, int escape_classes, rel_record rec);
static void rel_print_record_sp (int fd, rel_record rec);
static void rel_print_pika_escape_iso8859_1_record_sp (int fd, int escape_classes, rel_record rec);


/************************************************************************
 *(h0 "Relational Tables")
 * 
 * Tla makes heavy use of a simple data structure for storing two
 * dimensional tables of strings: the `rel_table' type.
 * 
 * In general, these functions will cause the process to exit with 
 * non-0 status and a error message to the standard error descriptor
 * if an allocation failure occurs.
 */
/*(menu)
 */


/************************************************************************
 *(h1 "Table Types")
 * 
 * Tables should be declared to be of type `rel_table' and initialized 
 * to 0, as in:
 *  
 *    rel_table table = 0;
 * 
 * 
 * Individual records (each an array of fields) and individual fields
 * can be read using ordinary, 0-based array subscripting:
 * 
 *    table[4][1]
 * 
 * refers to the second field (or column) of the fifth row (or record)
 * of `tabel'.
 */

/*(c rel_field :category type)
 * typedef t_uchar * rel_field;
 * 
 * A single field within a relational table -- a 0-terminated string.
 */
/*(c rel_record :category type)
 * typedef rel_field * rel_record;
 * 
 * A single row within a relational table;  an array of fields.
 */
/*(c rel_table :category type)
 * typedef rel_record * rel_table;
 * 
 * A relational table;  an array of records.
 */


/************************************************************************
 *(h1 "Table Sizes")
 * 
 */


/*(c rel_n_records)
 * int rel_n_records (rel_table r);
 * 
 * Return the number of records (rows) within a table.
 */
int
rel_n_records (rel_table r)
{
  return ar_size ((void *)r, 0, sizeof (rel_record));
}


/*(c rel_n_fields)
 * int rel_n_fields (rel_record r);
 * 
 * Return the number of fields (columns) within a record.
 */
int
rel_n_fields (rel_record r)
{
  return ar_size ((void *)r, 0, sizeof (rel_field));
}


/************************************************************************
 *(h1 "Adding Fields and Records")
 * 
 * 
 * 
 */


/*(c rel_make_record)
 * rel_record rel_make_record (t_uchar * field0, ...);
 * 
 * Allocate a new record containing the indicated fields.
 * 
 * The list of field values may not itself contain a 0 (null) field
 * and must be terminated by a 0, as in this call:
 *  
 *     r = rel_make_record ("apples", "oranges", 0);
 * 
 * which creates a record with two fields.
 * 
 * Note that this function allocates private copies of the fields.
 * They will be freed by `rel_free_record' or `rel_free_table'.
 */
rel_record
rel_make_record (t_uchar * field0, ...)
{
  va_list fp;
  rel_record answer;

  if (!field0)
    return 0;

  answer = 0;
  *(rel_field *)ar_push ((void **)&answer, 0, sizeof (rel_field)) = str_save (0, field0);
  va_start (fp, field0);
  while (1)
    {
      t_uchar * contents;

      contents = va_arg (fp, t_uchar *);
      if (!contents)
        break;
      *(rel_field *)ar_push ((void **)&answer, 0, sizeof (rel_field)) = str_save (0, contents);
    }
  va_end (fp);
  return answer;
}


/*(c rel_add_records)
 * void rel_add_records (rel_table * table, ...);
 * 
 * Append records to `*table'.
 * 
 * The list of records must be terminated by 0 (null).
 * 
 * This procedure may move the table itself in memory.   If 
 * it does, `*table' will be updated to point to the relocated
 * table.
 * 
 * This procedure does ^not^ copy its argument records but uses
 * them directly.   If the table is later passed to `rel_free_table',
 * those records will be freed.   (Thus, in general, tables should  
 * not share records.)
 * 
 * A typical usage, creating a table of the form:
 * 
 *      apples    trees
 *      oranges   trees
 *      grapes    vines
 * 
 * is the call (not with care the 0 values passed to terminate 
 * argument lists):
 * 
 *    rel_table t = 0;
 * 
 *    rel_add_records (&t, rel_make_record ("apples", "trees", 0),
 *                         rel_make_record ("oranges", "trees", 0),
 *                         rel_make_record ("grapes", "vines", 0),
 *                         0);
 */
void
rel_add_records (rel_table * table, ...)
{
  va_list rp;
  rel_record r;

  va_start (rp, table);
  for (r = va_arg (rp, rel_record); r; r = va_arg (rp, rel_record))
    {
      *(rel_record *)ar_push ((void **)table, 0, sizeof (rel_record)) = r;
    }
  va_end (rp);
}


/*(c rel_add_field)
 * void rel_add_field (rel_record * r, t_uchar * field);
 * 
 * Append a single field (the string `field') to the record `*r'.
 * 
 * A private copy of `field' is allocated.
 * 
 * The record may be relocated in memory in which case the value of
 * `*r' will be update.
 */
void
rel_add_field (rel_record * r, t_uchar * field)
{
  *(t_uchar **)ar_push ((void **)r, 0, sizeof (t_uchar *)) = str_save (0, field);
}


/*(c rel_singleton_record_n)
 * rel_record rel_singleton_record_n (t_uchar * start, size_t len);
 * 
 * Create a new record containing a single field which is a copy of
 * the `len' characters beginning at `start' with a final 0 appended.
 */
rel_record
rel_singleton_record_n (t_uchar * start, size_t len)
{
  rel_record answer = 0;

  *(rel_field *)ar_push ((void **)&answer, 0, sizeof (rel_field)) = str_save_n (0, start, len);
  return answer;
}


/************************************************************************
 *(h1 "Freeing Records and Tables")
 * 
 * 
 * 
 */


/*(c rel_free_table)
 * void rel_free_table (rel_table t);
 * 
 * Free the entire table `t'. 
 * 
 * This function will also free all records which are part
 * of `t' -- there is no need to separately call `rel_free_record'.
 */
void
rel_free_table (rel_table t)
{
  int lim;
  int x;

  lim = rel_n_records (t);
  for (x = 0; x < lim; ++x)
    rel_free_record (t[x]);

  ar_free ((void **)&t, 0);
}


/*(c rel_free_record)
 * void rel_free_record (rel_record r);
 * 
 * Free record `r'.   This function will also individually free
 * the fields of record `r'.
 */
void
rel_free_record (rel_record r)
{
  int lim;
  int x;

  lim = rel_n_fields (r);
  for (x = 0; x < lim; ++x)
    lim_free (0, r[x]);
  ar_free ((void **)&r, 0);
}



/************************************************************************
 *(h1 "Parsing Tables from Strings")
 * 
 * 
 * 
 */

typedef int split_callback(void const * , char);

/*(c rel_callback_split)
 * rel_table rel_ws_split (t_uchar * string, split_callback *split_fn, void * split_context);
 * 
 * Allocate and return a new table formed by parsing rows,
 * each containing a single field, from `string'.   Rows 
 * are separated by arbitrary whitespace.
 */
static rel_table
rel_callback_split (t_uchar const * string, split_callback * split_fn, void const *split_context)
{
  rel_table answer = 0;
  t_uchar const * start;
  t_uchar const * end;

  if (!string)
    return 0;

  start = string;

  while (1)
    {
      while (split_fn (split_context, *start))
        ++start;

      if (!*start)
        return answer;

      end = start;

      while (*end && !split_fn (split_context, *end))
        ++end;

      rel_add_records (&answer, rel_singleton_record_n ((t_uchar *)start, end - start), 0);

      start = end;
    }
}

static int
split_delim (void const * context, char ch)
{
    t_uchar const *delimiters = (t_uchar const * ) context;
    while (*delimiters != '\0')
	if (*delimiters++ == ch)
	    return 1;
    return 0;
}

/*(c rel_delim_split)
 * rel_table rel_delim_split (t_uchar * string, t_uchar * delimiters);
 * 
 * Allocate and return a new table formed by parsing rows,
 * each containing a single field, from `string'.   Rows 
 * are separated by any char in delimiters.
 */
rel_table
rel_delim_split (t_uchar const * string, t_uchar const * delimiters)
{
    return rel_callback_split (string, split_delim, delimiters);
}

static int 
split_is_space (void const *unused, char ch)
{
    return char_is_space (ch);
}

/*(c rel_ws_split)
 * rel_table rel_ws_split (t_uchar * string);
 * 
 * Allocate and return a new table formed by parsing rows,
 * each containing a single field, from `string'.   Rows 
 * are separated by arbitrary whitespace.
 */
rel_table
rel_ws_split (t_uchar * string)
{
  return rel_callback_split (string, split_is_space, NULL);
}


/*(c rel_nl_split)
 * rel_table rel_nl_split (t_uchar * string);
 * 
 * Allocate and return a new table formed by parsing rows,
 * each containing a single field, from `string'.   Rows 
 * are separated by newlines.
 * FIXME: audit the callers to this to see if it can be integrated
 * into the callback method above, which would strip empty lines.
 */
rel_table
rel_nl_split (t_uchar * string)
{
  rel_table answer = 0;
  t_uchar * start;
  t_uchar * end;

  if (!string)
    return 0;

  start = string;

  while (1)
    {
      if (!*start)
        return answer;

      end = start;

      while (*end && (*end != '\n'))
        ++end;

      rel_add_records (&answer, rel_singleton_record_n (start, end - start), 0);

      if (*end)
        start = end + 1;
      else
        start = end;
    }
}



/************************************************************************
 *(h1 "Copying Tables")
 * 
 * 
 * 
 */



/*(c rel_copy_table)
 * rel_table rel_copy_table (rel_table t);
 * 
 * Return a freshly allocated copy of table `t'.
 */
rel_table
rel_copy_table (rel_table t)
{
  rel_table answer;
  int records;
  int r;

  records = rel_n_records (t);

  answer = 0;
  ar_setsize ((void **)&answer, 0, records, sizeof (rel_record));
  for (r = 0; r < records; ++r)
    answer[r] = rel_copy_record (t[r]);

  return answer;
}


/*(c rel_copy_record)
 * rel_record rel_copy_record (rel_record r);
 * 
 * Return a freshly allocated copy of record `r'.
 */
rel_record
rel_copy_record (rel_record r)
{
  rel_record answer;
  int fields;
  int f;

  fields = rel_n_fields (r);

  answer = 0;
  ar_setsize ((void **)&answer, 0, fields, sizeof (rel_field));
  for (f = 0; f < fields; ++f)
    answer[f] = str_save (0, r[f]);

  return answer;
}


/************************************************************************
 *(h1 "Appending Tables")
 * 
 * 
 * 
 */



/*(c rel_append_x)
 * void rel_append_x (rel_table * out, rel_table t);
 * 
 * Append copies of all records in table `t' to the 
 * table `*out'.
 * 
 * This procedure may move the output table in memory.   If 
 * it does, `*out' will be updated to point to the relocated
 * table.
 */
void
rel_append_x (rel_table * out, rel_table t)
{
  int lim;
  int x;

  lim = rel_n_records (t);

  for (x = 0; x < lim; ++x)
    {
      *(rel_record *)ar_push ((void **)out, 0, sizeof (rel_record)) = rel_copy_record (t[x]);
    }
}


/************************************************************************
 *(h1 "Reordering Tables")
 * 
 * 
 * 
 */


/*(c rel_reverse_table)
 * void rel_reverse_table (rel_table t);
 * 
 * Reverse the order of records in table `t'.
 */
void
rel_reverse_table (rel_table t)
{
  int a;
  int b;

  a = 0;
  b = rel_n_records (t) - 1;

  while (a < b)
    {
      rel_record tmp;

      tmp = t[a];
      t[a] = t[b];
      t[b] = tmp;

      ++a;
      --b;
    }
}


struct rel_sort_spec
{
  int reverse_p;
  int field;
};


/*(c rel_sort_table_by_field)
 * void rel_sort_table_by_field (int reverse_p,
 *                               rel_table t,
 *                               int field_n);
 * 
 * Sort table `t' lexically according the contents of 
 * `field_n' within each record.
 * 
 * If `reverse_p' is not 0, then sort in descending rather
 * than ascending order.
 */
void
rel_sort_table_by_field (int reverse_p,
                         rel_table t,
                         int field_n)
{
  struct rel_sort_spec spec;

  spec.reverse_p = reverse_p;
  spec.field = field_n;

  quicksort ((void *)t, rel_n_records (t), sizeof (rel_record), rec_cmp_by_field, (void *)&spec);
}


static int
rec_cmp_by_field (void * va, void * vb, void * vdata)
{
  rel_record * a;
  rel_record * b;
  struct rel_sort_spec * spec;

  a = (rel_record *)va;
  b = (rel_record *)vb;
  spec = (struct rel_sort_spec *)vdata;

  if (spec->reverse_p)
    {
      return -str_cmp ((*a)[spec->field], (*b)[spec->field]);
    }
  else
    {
      return str_cmp ((*a)[spec->field], (*b)[spec->field]);
    }
}


struct rel_sort_by_fn_spec
{
  int reverse_p;
  int field;
  int (*fn) (t_uchar * va, t_uchar * vb);
};


/*(c rel_sort_table_by_field_fn)
 * void rel_sort_table_by_field_fn (int reverse_p,
 *                                  rel_table t,
 *                                  int field_n, 
 *                                  int (*fn)(t_uchar *, t_uchar *));
 * 
 * Sort table `t' according the contents of 
 * `field_n' within each record.
 * 
 * If `reverse_p' is not 0, then sort in descending rather
 * than ascending order.
 * 
 * The ordering is determined by `fn' which should accept 
 * two arguments, both field values, and return -1, 0, or 1
 * depending on whether the first is less than, equal to, or
 * greater than the second.
 */
void
rel_sort_table_by_field_fn (int reverse_p,
                            rel_table t,
                            int field_n, 
                            int (*fn)(t_uchar *, t_uchar *))
{
  struct rel_sort_by_fn_spec spec;

  spec.reverse_p = reverse_p;
  spec.field = field_n;
  spec.fn = fn;

  quicksort ((void *)t, rel_n_records (t), sizeof (rel_record), rec_cmp_by_field_fn, (void *)&spec);
}

static int
rec_cmp_by_field_fn (void * va, void * vb, void * vdata)
{
  rel_record * a;
  rel_record * b;
  struct rel_sort_by_fn_spec * spec;

  a = (rel_record *)va;
  b = (rel_record *)vb;
  spec = (struct rel_sort_by_fn_spec *)vdata;

  if (spec->reverse_p)
    {
      return -spec->fn ((*a)[spec->field], (*b)[spec->field]);
    }
  else
    {
      return spec->fn ((*a)[spec->field], (*b)[spec->field]);
    }
}



struct rel_nsort_spec
{
  int reverse_p;
  int * fields;
};


/*(c rel_sort_table_by_fields)
 * void rel_sort_table_by_fields (int reverse_p,
 *                                rel_table t,
 *                                int * fields);
 * 
 * Sort table `t' lexically, according the contents of the indicated
 * fields.
 * 
 * If `reverse_p' is not 0, then sort in descending rather
 * than ascending order.
 *
 * `fields' is a list of fields created by `rel_sort_fields' (see below)
 *  and lists the sort keys, from highest to lowest priority.
 */
void
rel_sort_table_by_fields (int reverse_p,
                          rel_table t,
                          int * fields)
{
  struct rel_nsort_spec spec;

  spec.reverse_p = reverse_p;
  spec.fields = fields;

  quicksort ((void *)t, rel_n_records (t), sizeof (rel_record), rec_cmp_by_fields, (void *)&spec);
}


/*(c rel_sort_fields)
 * int * rel_sort_fields (int f, ...);
 * 
 * Construct a list of fields suitable for use with
 * `rel_sort_table_by_fields'.
 * 
 * The arguments should be terminated by an argument which is less
 * than 0.
 * 
 * It is not necessary to free the value returned by this
 * function (but ^note^ that, at the moment, the table is 
 * simply space-leaked).
 */
int *
rel_sort_fields (int f, ...)
{
  va_list fp;
  int * answer;

  answer = 0;
  *(int *)ar_push ((void **)&answer, 0, sizeof (int)) = f;

  va_start (fp, f);
  while (1)
    {
      f = va_arg (fp, int);
      *(int *)ar_push ((void **)&answer, 0, sizeof (int)) = f;
      if (f < 0)
        break;
    }
  va_end (fp);
  return answer;
}


static int
rec_cmp_by_fields (void * va, void * vb, void * vdata)
{
  rel_record * a;
  rel_record * b;
  struct rel_nsort_spec * spec;
  int nth;

  a = (rel_record *)va;
  b = (rel_record *)vb;
  spec = (struct rel_nsort_spec *)vdata;

  for (nth = 0; spec->fields[nth] >= 0; ++nth)
    {
      int cmp;

      if (spec->reverse_p)
        {
          cmp = -str_cmp ((*a)[spec->fields[nth]], (*b)[spec->fields[nth]]);
        }
      else
        {
          cmp = str_cmp ((*a)[spec->fields[nth]], (*b)[spec->fields[nth]]);
        }

      if (cmp)
        return cmp;
    }

  return 0;
}


/************************************************************************
 *(h1 "Eliminating Duplicate Fields")
 * 
 * 
 * 
 */


/*(c rel_uniq_by_field)
 * void rel_uniq_by_field (rel_table * table,
 *                         int field);
 * 
 * Discard from `table' the second and subsequent
 * consecutive occurences of contiguous records sharing
 * equal values for the indicated `field'.
 * 
 * This procedure may move the `table' in memory.   If 
 * it does, `*table' will be updated to point to the 
 * relocated table.
 */
void
rel_uniq_by_field (rel_table * table,
                   int field)
{
  int lim;
  int dest;
  int src;

  lim = rel_n_records (*table);
  for (dest = 0, src = 0; src < lim; ++dest, ++src)
    {
      (*table)[dest] = (*table)[src];

      while ((src < (lim - 1)) && !str_cmp ((*table)[dest][field], (*table)[src + 1][field]))
        {
          rel_free_record ((*table)[src + 1]);
          ++src;
        }
    }
  ar_setsize ((void **)table, 0, dest, sizeof (rel_record));
}



/************************************************************************
 *(h1 "Table Cuts")
 * 
 * 
 * 
 */


/*(c rel_cut)
 * rel_table rel_cut (rel_cut_spec fields, rel_table t);
 * 
 * Create a new, freshly allocated table formed by removing
 * from `t' the fields indicated by `fields'.
 * 
 * `fields' should be a value returned by `rel_cut_list' (see below).
 */
rel_table
rel_cut (rel_cut_spec fields, rel_table t)
{
  rel_table answer;
  int lim;
  int x;

  answer = 0;

  lim = ar_size ((void *)t, 0, sizeof (rel_record));
  for (x = 0; x < lim; ++x)
    {
      rel_add_records (&answer, rel_cut_record (fields, t[x]), 0);
    }

  return answer;
}


/*(c rel_cut_record)
 * rel_record rel_cut_record (rel_cut_spec fields, rel_record r);
 * 
 * Create a new, freshly allocated record formed by removing
 * from `r' the fields indicated by `fields'.
 * 
 * `fields' should be a value returned by `rel_cut_list' (see below).
 */
rel_record
rel_cut_record (rel_cut_spec fields, rel_record r)
{
  rel_record answer;
  int x;

  answer = 0;
  for (x = 0; fields[x] >= 0; ++x)
    {
      *(t_uchar **)ar_push ((void **)&answer, 0, sizeof (t_uchar *)) = str_save (0, r[fields[x]]);
    }
  return answer;
}


/*(c rel_cut_list)
 * rel_cut_spec rel_cut_list (int field, ...);
 * 
 * Construct a list of fields suitable for use with
 * `rel_cut'.
 * 
 * The arguments should be terminated by an argument which is less
 * than 0.
 * 
 * It is not necessary to free the value returned by this
 * function (but ^note^ that, at the moment, the table is 
 * simply space-leaked).
 */
rel_cut_spec
rel_cut_list (int field, ...)
{
  va_list fp;
  rel_cut_spec answer;

  answer = 0;
  *(int *)ar_push ((void **)&answer, 0, sizeof (int)) = field;

  va_start (fp, field);
  while (1)
    {
      field = va_arg (fp, int);
      *(int *)ar_push ((void **)&answer, 0, sizeof (int)) = field;
      if (field < 0)
        break;
    }
  va_end (fp);
  return answer;
}

void
rel_cut_spec_finalise (rel_cut_spec *spec)
{
    ar_free ((void **) spec, 0);
}





/************************************************************************
 *(h1 "The Relational Join Operation")
 * 
 * 
 * 
 */


/*(c rel_join)
 * rel_table rel_join (int absence_table,
 *                     struct rel_join_output_spec * output,
 *                     int table1_field,
 *                     int table2_field,
 *                     rel_table table1,
 *                     rel_table table2);
 * 
 * Perform a relational join on `table1' and `table2' as
 * specified by the other arguments.
 * 
 * `table1_field' and `table2_field' indicate the fields to compare
 * for the join.  Both tables should be lexically sorted by that
 * field, in increasing order.
 * 
 * If `absence_table' is -1, then the output table contains an entry
 * for each row of `table1' and `table2' having the indicated fields
 * in common.  If `absence_table' is 1, then output is produced only
 * for rows unique to table 1, if 2, then for rows unique to table2.
 * 
 * `output' describes which field values to copy into the output table.
 * See `rel_join_output' below.
 */
rel_table
rel_join (int absence_table,
          struct rel_join_output_spec * output,
          int table1_field,
          int table2_field,
          rel_table table1,
          rel_table table2)
{
  int f1_len;
  int f2_len;
  int f1_pos;
  int f2_pos;
  int n_output_fields;
  rel_table answer;

  /* How curious that such a simple loop can do so many useful things.
   */

  answer = 0;

  f1_len = rel_n_records (table1);
  f2_len = rel_n_records (table2);

  for (n_output_fields = 0; output[n_output_fields].table != -1; ++n_output_fields)
    ;

  f1_pos = 0;
  f2_pos = 0;

  while ((f1_pos < f1_len) || (f2_pos < f2_len))
    {
      int cmp;
      int want_output;

      if (f2_pos == f2_len)
        cmp = -1;
      else if (f1_pos == f1_len)
        cmp = 1;
      else
        cmp = str_cmp (table1[f1_pos][table1_field], table2[f2_pos][table2_field]);

     if (absence_table < 0)
       want_output = !cmp;
      else if (absence_table == 1)
        want_output = (cmp < 0);
      else
        want_output = (cmp > 0);

      if (want_output)
        {
          rel_record r;
          rel_record f1_rec;
          rel_record f2_rec;
          int x;

          r = 0;
          f1_rec = ((f1_pos < f1_len) ? table1[f1_pos] : 0);
          f2_rec = ((f2_pos < f2_len) ? table2[f2_pos] : 0);
          for (x = 0; x < n_output_fields; ++x)
            {
              *(t_uchar **)ar_push ((void **)&r, 0, sizeof (char *)) = str_save (0, ((output[x].table == 1) ? f1_rec : f2_rec)[output[x].field]);
            }
          *(rel_record *)ar_push ((void **)&answer, 0, sizeof (rel_record)) = r;
        }

      if ((f1_pos < f1_len) && (cmp <= 0))
        ++f1_pos;

      if ((f2_pos < f2_len) && (cmp >= 0))
        ++f2_pos;
    }

  return answer;
}


/*(c rel_join_output)
 * struct rel_join_output_spec * rel_join_output (int table,
 *                                                int field, ...);
 * 
 * Construct a list of fields to be included in the output of
 * a `rel_join' call.
 * 
 * The argument list is a sequence of pairs, terminated by a single -1.
 * 
 * Each pair names a `table' (1 or 2) and a `field' (0 based).
 */
struct rel_join_output_spec *
rel_join_output (int table,
                 int field, ...)
{
  va_list ap;
  struct rel_join_output_spec * answer;
  struct rel_join_output_spec * item;
  int x;

  static struct rel_join_output_spec ** cache = 0;

  answer = 0;

  for (x = 0; !answer && x < ar_size (cache, 0, sizeof (struct rel_join_output_spec *)); ++x)
    {
      item = cache[x];

      if (item->table != table || item->field != field)
        continue;

      va_start (ap, field);
      while (1)
        {
          int tmp_table;
          int tmp_field;

          ++item;
          tmp_table = va_arg (ap, int);
          if (tmp_table < 0)
            tmp_field = -1;
          else
            tmp_field = va_arg (ap, int);

          if (item->table != tmp_table || item->field != tmp_field)
            break;
          if (item->table == -1)
            {
              answer = cache[x];
              break;
            }
        }
      va_end (ap);
    }
    
  if (!answer)
    {
      item = (struct rel_join_output_spec *)ar_push ((void **)&answer, 0, sizeof (struct rel_join_output_spec));
      item->table = table;
      item->field = field;

      va_start (ap, field);
      while (1)
        {
          table = va_arg (ap, int);
          if (table < 0)
            break;
          field = va_arg (ap, int);

          item = (struct rel_join_output_spec *)ar_push ((void **)&answer, 0, sizeof (struct rel_join_output_spec));
          item->table = table;
          item->field = field;
        }
      va_end (ap);

      item = (struct rel_join_output_spec *)ar_push ((void **)&answer, 0, sizeof (struct rel_join_output_spec));
      item->table = -1;
      item->field = -1;

      *(struct rel_join_output_spec **)ar_push ((void **)&cache, 0, sizeof (struct rel_join_output_spec *)) = answer;
    }

  return answer;
}




/************************************************************************
 *(h1 "Reading Tables from Streams")
 * 
 * 
 * 
 */


/*(c rel_read_table)
 * rel_table rel_read_table (int fd,
 *                           int n_fields,
 *                           char * err_name,
 *                           char * err_src);
 * 
 * Read a table with `n_fields' per row from descriptor
 * `fd'.  (Fields are whitespace-separated strings, rows
 * are separated by newlines.)
 * 
 * In the event of an I/O or syntax error, report an error 
 * from program `err_name' concerning input from `err_src'
 * and exit with status 2.
 */
rel_table
rel_read_table (int fd,
                int n_fields,
                char * err_name,
                char * err_src)
{
  rel_record rec;
  rel_table answer;

  answer = 0;
  while (1)
    {
      rec = rel_read_record (fd, n_fields, err_name, err_src);
      if (!rec)
        break;
      *(rel_record *)ar_push ((void **)&answer, 0, sizeof (rel_record)) = rec;
    }
  return answer;
}


static rel_record
rel_read_record (int fd,
                 int n_fields,
                 char * err_name,
                 char * err_src)
{
  t_uchar * line;
  long len;
  t_uchar * pos;
  int f;
  rel_record answer;

  safe_next_line (&line, &len, fd);
  if (!line)
    return 0;

  answer = 0;
  ar_setsize ((void **)&answer, 0, n_fields, sizeof (rel_field));

  pos = line;
  for (f = 0; f < n_fields; ++f)
    {

      while (len && !char_is_space (*pos))
        {
          ++pos;
          --len;
        }

      if (!len || (pos == line))
        {
          if (err_name)
            {
              safe_printfmt (2, "%s: ill formated input\n", err_name);
              safe_printfmt (2, "   input source: %s\n", err_src);
              exit (2);
            }
        }

      answer[f] = str_save_n (0, line, pos - line);

      while (len && char_is_space (*pos))
        {
          ++pos;
          --len;
        }

      line = pos;
    }

  return answer;
}

/*(c rel_read_pika_unescape_iso8859_1_table)
 * rel_table rel_read_pika_unescape_iso8859_1_table (int fd,
 *                                                   int n_fields,
 *                                                   char * err_name,
 *                                                   char * err_src);
 * 
 * Read an escaped table with `n_fields' per row from descriptor
 * `fd'.  (Fields are whitespace-separated strings, rows
 * are separated by newlines.)
 *
 * Escape sequences will be unescaped
 *
 * In the event of an I/O or syntax error, report an error 
 * from program `err_name' concerning input from `err_src'
 * and exit with status 2.
 */
rel_table
rel_read_pika_unescape_iso8859_1_table (int fd,
                                        int n_fields,
                                        char * err_name,
                                        char * err_src)
{
  rel_record rec;
  rel_table answer;

  answer = 0;
  while (1)
    {
      rec = rel_read_pika_unescape_iso8859_1_record (fd, n_fields, err_name, err_src);
      if (!rec)
        break;
      *(rel_record *)ar_push ((void **)&answer, 0, sizeof (rel_record)) = rec;
    }
  return answer;
}


static rel_record
rel_read_pika_unescape_iso8859_1_record (int fd,
                                         int n_fields,
                                         char * err_name,
                                         char * err_src)
{
  t_uchar * line;
  long len;
  t_uchar * pos;
  int f;
  rel_record answer;

  safe_next_line (&line, &len, fd);
  if (!line)
    return 0;

  answer = 0;
  ar_setsize ((void **)&answer, 0, n_fields, sizeof (rel_field));

  pos = line;
  for (f = 0; f < n_fields; ++f)
    {

      while (len && !char_is_space (*pos))
        {
          ++pos;
          --len;
        }

      if (!len || (pos == line))
        {
          if (err_name)
            {
              safe_printfmt (2, "%s: ill formated input\n", err_name);
              safe_printfmt (2, "   input source: %s\n", err_src);
              exit (2);
            }
        }

      answer[f] = pika_save_unescape_iso8859_1_n (0, 0, line, pos - line);

      while (len && char_is_space (*pos))
        {
          ++pos;
          --len;
        }

      line = pos;
    }

  return answer;
}


/************************************************************************
 *(h1 "Printing Tables to Streams")
 * 
 * 
 * 
 */


/*(c rel_print_table)
 * void rel_print_table (int fd, rel_table table);
 * 
 * Print `table' on descriptor `fd' as one record per
 * line, with fields separated by tabs.
 * 
 * In the event of an I/O error, exit with non-0, non-1 status.
 */
void
rel_print_table (int fd, rel_table table)
{
  int recs;
  int r;

  recs = ar_size ((void *)table, 0, sizeof (rel_record));

  for (r = 0; r < recs; ++r)
    rel_print_record (fd, table[r]);
}

void
rel_print_pika_escape_iso8859_1_table (int fd, int escape_classes, rel_table table)
{
  int recs;
  int r;

  recs = ar_size ((void *)table, 0, sizeof (rel_record));

  for (r = 0; r < recs; ++r)
    rel_print_pika_escape_iso8859_1_record (fd, escape_classes, table[r]);
}


/*(c rel_print_table_sp)
 * void rel_print_table_sp (int fd, rel_table file);
 * 
 * Print `table' on descriptor `fd' as one record per
 * line, with fields separated by single spaces.
 * 
 * In the event of an I/O error, exit with non-0, non-1 status.
 */
void
rel_print_table_sp (int fd, rel_table file)
{
  int recs;
  int r;

  recs = ar_size ((void *)file, 0, sizeof (rel_record));

  for (r = 0; r < recs; ++r)
    rel_print_record_sp (fd, file[r]);
}

void
rel_print_pika_escape_iso8859_1_table_sp (int fd, int escape_classes, rel_table file)
{
  int recs;
  int r;

  recs = ar_size ((void *)file, 0, sizeof (rel_record));

  for (r = 0; r < recs; ++r)
    rel_print_pika_escape_iso8859_1_record_sp (fd, escape_classes, file[r]);
}


static void
rel_print_record (int fd, rel_record rec)
{
  int fields;
  int f;

  fields = ar_size ((void *)rec, 0, sizeof (rel_field));

  for (f = 0; f < fields; ++f)
    {
      safe_printfmt (fd, "%s%s", (f ? "\t" : ""), rec[f]);
    }

  if (f)
    safe_printfmt (fd, "\n");
}

static void
rel_print_pika_escape_iso8859_1_record (int fd, int escape_classes, rel_record rec)
{
  int fields;
  int f;

  fields = ar_size ((void *)rec, 0, sizeof (rel_field));

  for (f = 0; f < fields; ++f)
    {
      t_uchar * item;

      item = pika_save_escape_iso8859_1 (0, 0, escape_classes, rec[f]);
      safe_printfmt (fd, "%s%s", (f ? "\t" : ""), item);
      lim_free (0, item);
    }

  if (f)
    safe_printfmt (fd, "\n");
}



static void
rel_print_record_sp (int fd, rel_record rec)
{
  int fields;
  int f;

  fields = ar_size ((void *)rec, 0, sizeof (rel_field));

  for (f = 0; f < fields; ++f)
    {
      safe_printfmt (fd, "%s%s", (f ? " " : ""), rec[f]);
    }

  if (f)
    safe_printfmt (fd, "\n");
}

static void
rel_print_pika_escape_iso8859_1_record_sp (int fd, int escape_classes, rel_record rec)
{
  int fields;
  int f;

  fields = ar_size ((void *)rec, 0, sizeof (rel_field));

  for (f = 0; f < fields; ++f)
    {
      t_uchar * item;

      item = pika_save_escape_iso8859_1 (0, 0, escape_classes, rec[f]);
      safe_printfmt (fd, "%s%s", (f ? " " : ""), item);
      lim_free (0, item);
    }

  if (f)
    safe_printfmt (fd, "\n");
}

/* rel_set_subtract:
 * return a new set containing only element in left and not
 * in right.
 * identity is considered the first column of the rel tables
 * but the entire records are copied
 * left and right may be mutated 
 * the answer is not sorted.
 */
rel_table
rel_set_subtract (rel_table left, rel_table right)
{
  rel_table answer;
  
  rel_sort_table_by_field (0, left, 0);
  rel_sort_table_by_field (0, right, 0);

  answer = rel_join (1, rel_join_output (1,0, -1), 0, 0, left, right);

  return answer;
}




/* tag: Tom Lord Mon May  5 12:50:00 2003 (relational.c)
 */
