//  crm_winnow_maintenance.c  - Controllable Regex Mutilator,  version v1.0
//  Copyright 2001-2004  William S. Yerazunis, all rights reserved.
//  
//  This software is licensed to the public under the Free Software
//  Foundation's GNU GPL, version 2.  You may obtain a copy of the
//  GPL by visiting the Free Software Foundations web site at
//  www.fsf.org, and a copy is included in this distribution.  
//
//  Other licenses may be negotiated; contact the 
//  author for details.  
//
//  include some standard files
#include "crm114_sysincludes.h"

//  include any local crm114 configuration file
#include "crm114_config.h"

//  include the crm114 data structures file
#include "crm114_structs.h"

//  and include the routine declarations file
#include "crm114.h"

//    the command line argc, argv
extern int prog_argc;
extern char **prog_argv;

//    the auxilliary input buffer (for WINDOW input)
extern char *newinputbuf;

//    the globals used when we need a big buffer  - allocated once, used 
//    wherever needed.  These are sized to the same size as the data window.
extern char *inbuf;
extern char *outbuf;
extern char *tempbuf;

//     How to microgroom a .css file that's getting full
//
//     NOTA BENE NOTA BENE NOTA BENE NOTA BENE
//      
//         This whole section of code is under intense develoment; right now
//         it "works" but not any better than nothing at all.  Be warned
//         that any patches issued on it may well never see the light of
//         day, as intense testing and comparison may show that the current
//         algorithms are, well, suckful.
//
//
//     There are two steps to microgrooming - first, since we know we're
//     already too full, we execute a 'zero unity bins'.  Then, we see
//     how the file looks, and if necessary, we do a "scale by R", where
//     R is the "MICROGROOM_RESCALE_FACTOR"
//
void crm_winnow_microgroom (WINNOW_FEATUREBUCKET_STRUCT *h, 
			    unsigned char *xhashes, 
			    unsigned long hfsize, unsigned long hindex)
{
  long i, j, k;
  static long microgroom_count = 0;
  long steps;
  long packstart;
  long packlen;
  //  for stochastic grooming we need a place for the random...
  unsigned long randy;
  long zeroed_countdown;
  long force_rescale;
  j = 0;
  k = 0;
  zeroed_countdown = MICROGROOM_STOP_AFTER;

  //

  //   micropack - start at initial chain start, move to back of 
  //   chain that overflowed, then scale just that chain.
  k = 0;
  i = j = hindex;
  if (i == 0) i = 1;
  if (j == 0) j = 1; 
  microgroom_count++;

  //  if (user_trace)
  //if (microgroom_count == 1)
  //  fprintf (stderr, "=");


  if (0)
    {
      if (microgroom_count == 1)
	fprintf (stderr, "  OSBW file too full: microgrooming ");
      fprintf (stderr, " .. pass %ld",
	       microgroom_count);
      fprintf (stderr, " at %lu ",
	       i);
      fprintf (stderr, "(val) %f ", 
	       h[i].value);
    };

  if (h[i].value == 0) 
    { 
      nonfatalerror ("Your program has thrown a willie during microgrooming",
		     "... which should never happen in polite company.  "
		     " Please file a bug report.");
    };
  while (h[i].value != 0)
    {
      i--;
      if (i < 1) i = hfsize - 1;
      if (i == j)  break;        // don't hang if we have a 100% full .css file
    }

  //     now, move our index to point to the first bucket in this chain.
  i++;
  if (i >= hfsize ) i = 1;
  packstart = i;

  //  fprintf (stderr, "start was %ld, backstep to %ld ",
  //   hindex, packstart);

  
  //   set our stochastic amnesia matcher - note that we add 
  //   our microgroom count so that we _eventually_ can knock out anything
  //   even if we get a whole string of buckets with hash keys that all alias
  //   to the same value.
  //
  //   We also keep track of how many buckets we've zeroed and we stop
  //   zeroing additional buckets after that point.  NO!  THAT'S A BUG!
  //   FEATURES IN THE TAIL CAN NO LONGER BE ACCESSED UNLESS THE WHOLE TAIL
  //   IS PACKED!
  //
  steps = 0;
  force_rescale = 0;
  while (h[i].value != 0 )
    {
      //  fprintf (stderr, "=");
      randy = rand() + microgroom_count;
      if ( (( h[i].key + randy ) & MICROGROOM_STOCHASTIC_MASK ) 
	   == MICROGROOM_STOCHASTIC_KEY)
	{
	  //    for winnow, we normalize toward 1.0 by averaging with 1.
	  h[i].value = (h[i].value + 1.0 )/ 2.0;
	  if ( (h[i].value > 0.9 && h[i].value < 1.1)
	       || h[i].value == 0.0 ) 
	    {
	      //      fprintf (stderr, "|");
	      zeroed_countdown--;
	      h[i].value = 0.0;
	      h[i].key = 0;
	      h[i].hash = 0;
	    };
	};
      i++;
      if ( i >= hfsize ) i = 1;
      steps++;
    };
  packlen = steps;

  //fprintf (stderr, "with steps %ld ", packlen);
  //   now we pack the buckets
  crm_packcss_winnow ((FEATUREBUCKET_TYPE *)h, xhashes, hfsize, packstart, packlen);

}

void crm_packcss_winnow (FEATUREBUCKET_TYPE *h, unsigned char *xhashes,
			 long hs, long packstart, long packlen)
{
  //    How we pack...
  //   
  //    We look at each bucket, and attempt to reinsert it at the "best"
  //    place.  We know at worst it will end up where it already is, and
  //    at best it will end up lower (at a lower index) in the file, except
  //    if it's in wraparound mode, in which case we know it will not get
  //    back up past us (since the file must contain at least one empty)
  //    and so it's still below us in the file.

  if (packstart+packlen <= hs)   //  no wraparound in this case
    {
      crm_packseg_winnow (h, xhashes, hs, packstart, packlen);
    }
  else    //  wraparound mode - do it as two separate repacks
    {
      crm_packseg_winnow (h, xhashes, hs, packstart, (hs - packstart));
      crm_packseg_winnow (h, xhashes, hs, 1, (packlen - (hs - packstart)));
    };
}
      
void crm_packseg_winnow (FEATUREBUCKET_TYPE *h, 
		  unsigned char *xhashes,
		  long hs, long packstart, long packlen)
{
  unsigned long ifrom, ito;
  unsigned long thash, tkey, tvalue, txhash;

  if (internal_trace) fprintf (stderr, " < %ld %ld >", packstart, packlen);

  for (ifrom = packstart; ifrom < packstart + packlen; ifrom++)
    {
      //  Is it an empty bucket?  (remember, we're compressing out 
      //  all placeholder buckets, so any bucket that's zero-valued
      //  is a valid target.)
      if ( h[ifrom].value == 0)
	{
	  //    Empty bucket - turn it from marker to empty
	  h[ifrom].key = 0;
	  h[ifrom].hash = 0;
	  xhashes[ifrom] = 0;
	}
    }

  //  Our slot values are now somewhat in disorder because empty
  //  buckets may now have been inserted into a chain where there used
  //  to be placeholder buckets.  We need to re-insert slot data in a
  //  bucket where it will be found.
  //
  for (ifrom = packstart; ifrom < packstart+packlen; ifrom++)
    {
      //    Now find the next bucket to place somewhere
      //
      thash  = h[ifrom].hash;
      tkey   = h[ifrom].key;
      tvalue = h[ifrom].value;
      txhash = xhashes[ifrom];

      if (tvalue != 0)
	{
	  ito = thash % hs;
	  if (ito == 0) ito = 1;
	  // fprintf (stderr, "a %ld", ito);

	  while ( ! (h[ito].value == 0 || 
		     (h[ito].hash == thash && h[ito].key == tkey )))
	    {
	      ito++;
	      if (ito >= hs) ito = 1;
	      //  fprintf (stderr, "A %ld", ito);
	    };
	  
	  //
	  //    found an empty slot, put this value there, and zero the
	  //    original one.  Sometimes this is a noop.  We don't care.

	  // fprintf (stderr, "b %ld", ito);
	  
	  h[ifrom].hash  = 0;
	  h[ifrom].key   = 0;
	  h[ifrom].value = 0;
	  xhashes[ifrom] = 0;

	  // fprintf (stderr, "c %ld ", ito );

	  h[ito].hash  = thash;
	  h[ito].key   = tkey;
	  h[ito].value = tvalue;
	  xhashes[ito] = txhash;
	  // fprintf (stderr, "d %ld", ito);
	};
    };
}

