//  crm_css_maintenance_.c  - Controllable Regex Mutilator,  version v1.0
//  Copyright 2001-2004  William S. Yerazunis, all rights reserved.
//  
//  This software is licensed to the public under the Free Software
//  Foundation's GNU GPL, version 2.  You may obtain a copy of the
//  GPL by visiting the Free Software Foundations web site at
//  www.fsf.org, and a copy is included in this distribution.  
//
//  Other licenses may be negotiated; contact the 
//  author for details.  
//
//  include some standard files
#include "crm114_sysincludes.h"

//  include any local crm114 configuration file
#include "crm114_config.h"

//  include the crm114 data structures file
#include "crm114_structs.h"

//  and include the routine declarations file
#include "crm114.h"

//    the command line argc, argv
extern int prog_argc;
extern char **prog_argv;

//    the auxilliary input buffer (for WINDOW input)
extern char *newinputbuf;

//    the globals used when we need a big buffer  - allocated once, used 
//    wherever needed.  These are sized to the same size as the data window.
extern char *inbuf;
extern char *outbuf;
extern char *tempbuf;

//     How to microgroom a .css file that's getting full
//
//     NOTA BENE NOTA BENE NOTA BENE NOTA BENE
//      
//         This whole section of code is under intense develoment; right now
//         it "works" but not any better than nothing at all.  Be warned
//         that any patches issued on it may well never see the light of
//         day, as intense testing and comparison may show that the current
//         algorithms are, well, suckful.
//
//
//     There are two steps to microgrooming - first, since we know we're
//     already too full, we execute a 'zero unity bins'.  Then, we see
//     how the file looks, and if necessary, we do a "scale by R", where
//     R is the "MICROGROOM_RESCALE_FACTOR"
//
void crm_microgroom (FEATUREBUCKET_TYPE *h, long hs, unsigned long hindex)
{
  long i, j, k;
  static long microgroom_count = 0;
  long steps;
  long packstart;
  long packlen;
  //  for stochastic grooming we need a place for the random...
  unsigned long randy;
  long zeroed_countdown;
  long force_rescale;
  j = 0;
  k = 0;
  zeroed_countdown = MICROGROOM_STOP_AFTER;

  //

  i = j = k = 0;
  microgroom_count++;

  if (user_trace)
    {
      if (microgroom_count == 1)
	fprintf (stderr, "CSS file too full: microgrooming this css chain: ");
      fprintf (stderr, " %ld ",
	   microgroom_count);
    };

  //   micropack - start at initial chain start, move to back of 
  //   chain that overflowed, then scale just that chain.
  i = j = hindex % hs;
  if (i == 0) i = 1;
  while (h[i].value != 0)
    {
      i--;
      if (i < 1) i = hs - 1;
      if (i == j)  break;        // don't hang if we have a 100% full .css file
      // fprintf (stderr, "-");
    }

  //     now, move our index to point to the first bucket in this chain.
  i++;
  if (i >= hs) i = 1;
  packstart = i;

  //   set our stochastic amnesia matcher - note that we add 
  //   our microgroom count so that we _eventually_ can knock out anything
  //   even if we get a whole string of buckets with hash keys that all alias
  //   to the same value.
  //
  //   We also keep track of how many buckets we've zeroed and we stop
  //   zeroing additional buckets after that point.   NO!  BUG!  That 
  //   messes up the tail length, and if we don't repack the tail, then
  //   features in the tail can become permanently inaccessible!   Therefore,
  //   we really can't stop in the middle of the tail (well, we could 
  //   stop zeroing, but we need to pass the full length of the tail in.
  //
  //   Note that we can't do this "adaptively" in packcss, because zeroes
  //   there aren't necessarily overflow chain terminators (because -we-
  //   might have inserted them here.
  //
  steps = 0;
  force_rescale = 0;
  while (h[i].value != 0 )
    {
      //      fprintf (stderr, "=");
      randy = rand() + microgroom_count;
      if ( force_rescale || 
	   (( h[i].key + randy ) & MICROGROOM_STOCHASTIC_MASK ) 
	   == MICROGROOM_STOCHASTIC_KEY)
	{
	  h[i].value = h[i].value * MICROGROOM_RESCALE_FACTOR;
	};
	  if (h[i].value == 0) zeroed_countdown--;
      i++;
      if (i >= hs ) i = 1;
      steps++;
    }
  packlen = steps;

  //   now we pack the buckets
  crm_packcss (h, hs, packstart, packlen);

}

void crm_packcss (FEATUREBUCKET_TYPE *h, long hs, long packstart, long packlen)
{
  //    How we pack...
  //   
  //    We look at each bucket, and attempt to reinsert it at the "best"
  //    place.  We know at worst it will end up where it already is, and
  //    at best it will end up lower (at a lower index) in the file, except
  //    if it's in wraparound mode, in which case we know it will not get
  //    back up past us (since the file must contain at least one empty)
  //    and so it's still below us in the file.

  //fprintf (stderr, "Packing %ld len %ld total %ld", 
  //	   packstart, packlen, packstart+packlen);
  //  if (packstart+packlen >= hs)
  //  fprintf (stderr, " BLORTTTTTT ");
  if (packstart+packlen <= hs)   //  no wraparound in this case
    {
      crm_packseg (h, hs, packstart, packlen);
    }
  else    //  wraparound mode - do it as two separate repacks
    {
      crm_packseg (h, hs, packstart, (hs - packstart));
      crm_packseg (h, hs, 1, (packlen - (hs - packstart)));
    };
}
      
void crm_packseg (FEATUREBUCKET_TYPE *h, long hs, long packstart, long packlen)
{
  unsigned long ifrom, ito;
  unsigned long thash, tkey, tvalue;

  if (internal_trace) fprintf (stderr, " < %ld %ld >", packstart, packlen);

  for (ifrom = packstart; ifrom < packstart + packlen; ifrom++)
    {
      //  Is it an empty bucket?  (remember, we're compressing out 
      //  all placeholder buckets, so any bucket that's zero-valued
      //  is a valid target.)
      if ( h[ifrom].value == 0)
	{
	  //    Empty bucket - turn it from marker to empty
	 if (internal_trace) fprintf (stderr, "x");
	  h[ifrom].key = 0;
	  h[ifrom].hash = 0;
	}
      else 
	{ if (internal_trace) fprintf (stderr, "-");};
    }

  //  Our slot values are now somewhat in disorder because empty
  //  buckets may now have been inserted into a chain where there used
  //  to be placeholder buckets.  We need to re-insert slot data in a
  //  bucket where it will be found.
  //
  ito = 0;
  for (ifrom = packstart; ifrom < packstart+packlen; ifrom++)
    {
      //    Now find the next bucket to place somewhere
      //
      thash  = h[ifrom].hash;
      tkey   = h[ifrom].key;
      tvalue = h[ifrom].value;

      if (tvalue == 0)
	{
	  if (internal_trace) fprintf (stderr, "X");
	}
      else
	{
	  ito = thash % hs;
	  if (ito == 0) ito = 1;
	  // fprintf (stderr, "a %ld", ito);

	  while ( ! ( (h[ito].value == 0)
		      || (  h[ito].hash == thash 
			    && h[ito].key == tkey )))
	    {
	      ito++;
	      if (ito >= hs) ito = 1;
	      // fprintf (stderr, "a %ld", ito);
	    };
	  
	  //
	  //    found an empty slot, put this value there, and zero the
	  //    original one.  Sometimes this is a noop.  We don't care.

	  if (internal_trace)
	    {
	      if ( ifrom == ito ) fprintf (stderr, "=");
	      if ( ito < ifrom) fprintf (stderr, "<");
	      if ( ito > ifrom ) fprintf (stderr, ">");
	    };

	  h[ifrom].hash  = 0;
	  h[ifrom].key   = 0;
	  h[ifrom].value = 0;
	  
	  h[ito].hash  = thash;
	  h[ito].key   = tkey;
	  h[ito].value = tvalue;
	};
    };
}

int crm_create_cssfile(char *cssfile, long buckets, 
		       long major, long minor, long spectrum_start)
{
  FILE *f;
  long i;
  FEATUREBUCKET_STRUCT feature = {0, 0, 0};
  
  f = fopen (cssfile, "w");
  if (!f)
    {
      fprintf (stderr, "\n Couldn't open file %s for writing; errno=%d .\n",
	       cssfile, errno);
      return (EXIT_FAILURE);
    }
  //  Initialize CSS file - zero all buckets
  feature.hash =  major;
  feature.key = minor;
  feature.value = spectrum_start;
  for (i=0; i < buckets; i++)
    {
      if (fwrite(&feature, sizeof(feature), 1, f) != 1)
        {
          fprintf (stderr, "\n Couldn't initialize .CSS file %s, "
		   "errno=%d.\n",
		   cssfile,
                   errno);
          return (EXIT_FAILURE);
        }
      //
      //   HACK ALERT HACK ALERT HACK ALERT
      //
      //  yeah,there's more efficient ways to do this, but this will
      //  stay in cache; an IF-statement will need at least three ops as
      //  well.   Probably six of one...
      feature.hash = 0;
      feature.key = 0;
      feature.value = 0;
    }
  fclose (f);
  return (EXIT_SUCCESS);
}



