//  crm114_.c  - Controllable Regex Mutilator,  version v1.0
//  Copyright 2001-2004  William S. Yerazunis, all rights reserved.
//  
//  This software is licensed to the public under the Free Software
//  Foundation's GNU GPL, version 2.  You may obtain a copy of the
//  GPL by visiting the Free Software Foundations web site at
//  www.fsf.org, and a copy is included in this distribution.  
//
//  Other licenses may be negotiated; contact the 
//  author for details.  
//
//  include some standard files
#include "crm114_sysincludes.h"

//  include any local crm114 configuration file
#include "crm114_config.h"

//  include the crm114 data structures file
#include "crm114_structs.h"

//  and include the routine declarations file
#include "crm114.h"

//    the command line argc, argv
extern int prog_argc;
extern char **prog_argv;

//    the auxilliary input buffer (for WINDOW input)
extern char *newinputbuf;

//    the globals used when we need a big buffer  - allocated once, used 
//    wherever needed.  These are sized to the same size as the data window.
extern char *inbuf;
extern char *outbuf;
extern char *tempbuf;
//
//
//     crm_nexpandvar - given a string and it's length, go through it
//     and if there's a variable expansion called for (by the :*:
//     operator) expand the variable.
//
//     the inputs are a buffer with the NULL-safe string in it, the
//     length of this string, and the maximum allocated length of the
//     buffer.  This function returns the new length of the buffer.
//     It will NOT increase the buffer length past maxlen, so
//     expansions beyond that will cause a nonfatal error and be
//     aborted.
//
//     Algorithm: 
//     1) efficiency check- do we need to do any expansions at all.
//     2) Start at buf[0], work up to buf[buflen]-3
//     2a) do \n, \r, \a, \xHH and \oOOO
//     3) are we looking at :*:?  
//     4) no: copy 1 character, increment from and to indexes, go to step 3
//     5) yes: skip from index ahead 3, from there to next : is the varname
//     6) copy var value to tbuf, incrementing tobuf index.
//     7) set from-index to third colon index + 1
//     8) go to 2 (modulo last two chars need copying)
//
long crm_nexpandvar (char *buf, long inlen, long maxlen)
{
  long is, id;
  long vht_index;
  long q;
  //  a temporary work buffer...
  char *tbuf;
  //  and another for variable names...
  char *vname; 

  char *cp;
  long vlen;

  //    efficiency check - do we even _have_ a :*: in the buffer?
  //

  if (inlen == 0)
    return (0);

  if (q_expansion_mode >= 2)
    return (crm_qexpandvar (buf, inlen, maxlen, NULL));

  //  GROT GROT GROT must fix this for 8-bit safe error messages
  if (inlen > maxlen)
    {
      q = fatalerror (
		      "You have blown the gaskets while building a string.  Orig string was: ",
		      buf);
      if (q == 0 )
	return (inlen);
      goto bailout;
    };  

  //   First thing- do the \-escapes
  //
  is = 0;
  id = 0;
  for (is = 0; is < inlen ; is++)  
    {
      if (buf[is] != '\\' )
	{
	  buf [id] = buf [is];
	  id++;
	}
      else
	{
	  //  we're looking at a '\\' character.
	  //  
	  //   Check for a few common things: \n, \a, \xNN, \oNNN
	  is++;
	  //
	  switch (buf[is])
	    {
	    case '0':
	      {
		//   it's a NULL.
		buf[id] = '\0';
		id++;
	      }
	      break;
	    case 'b':
	      {
		//   it's a backspace
		buf[id] = '\b';
		id++;
	      }
	      break;
	    case 't':
	      {
		//   it's a tab
		buf[id] = '\t';
		id++;
	      }
	      break;
	    case 'n':
	      {
		//   it's a newline.  stuff in a newline.
		buf[id] = '\n';
		id++;
	      }
	      break;
	    case 'v':
	      {
		//   it's a vtab
		buf[id] = '\v';
		id++;
	      }
	      break;
	    case 'f':
	      {
		//   it's a form feed.
		buf[id] = '\f';
		id++;
	      }
	      break;
	    case 'r':
	      {
		//   it's a carriage return
		buf[id] = '\r';
		id++;
	      }
	      break;
	    case 'a':
	      {
		//   it's a BELL.  put that in.
		buf[id] = '\a';
		id++;
	      }
	      break;
	    case 'x':
	    case 'X':
	      {
		//   it's a hex char constant.  read it and stuff it.
		unsigned int value;
		is++;
		sscanf (&buf[is], "%2X", &value);
		buf[id] = value;
		id++;
		is++;
	      }
	      break;
	    case 'o':
	    case 'O':
	      {
		//   it's an octal char constant.  read it and stuff it.
		unsigned int value;
		is++;
		sscanf (&buf[is], "%o3", &value);
		buf[id] = value;
		id++;
		is++;
		is++;
	      }
	      break;
	    case '>':
	    case ')':
	    case ']':
	    case '/':
	    case ';':
	    case '{':
	    case '}':
	    case '#':
	    case '\\':
	      {
		//      >, ), ], ;, {, }, #, and / are themselves after a '\',
		//    but need the \ escape to pass thru the parser
		//    without terminating their enclosed args
		buf[id] = buf[is];
		id++;
	      };
	      break;
	    default:
	      {
		//       if it's "none of the above" characters, then
		//       the '\' character _stays_ as a literal
		buf[id] = '\\';
		id++;
		buf[id] = buf[is];
		id++;
	      };
	      break;
	    };
	};
    };
  //     and update the new inlen
  inlen = id ;
  buf[inlen] = '\000';    // needed because slimy old GNU REGEX needs it.

  //    if no *, then no :*: and so no expansions needed
  cp = memchr (buf, '*', inlen);
  if (cp == NULL)
    {
      return (inlen);
    };

  //    OK, we might have a :*: substitution operator, so we actually have
  //    to do some work.
  //    allocate some memory for tbuf and vname;
  tbuf = (char *) malloc (maxlen);
  vname = (char *) malloc (maxlen);

  if (tbuf == NULL || vname == NULL)
    {
      q = fatalerror ("Couldn't allocate memory for variable expansion!",
		      "Try making the window set smaller with the -w option");
      if (q == 0)
	return (inlen);
    };
  
  is = 0;    //   is is the input position index
  id = 0;    //   id is the destination position index
  for (is = 0; is <= inlen && id < maxlen; is++)
    {
      if (is <= inlen - 5  //  check only if :*:c:" possible
	  && buf[is] == ':' 
	  && buf[is+1] == '*'
	  && buf[is+2] ==':')
	{
	  //   yes, it's a probable variable.
	  //    copy everything from the colon to the second colon
	  //    into the vname buffer.
	  is = is + 2;
	  vname [0] = buf[is];
	  vlen = 1;
	  is++;
	  while (is < maxlen
		 && is <= inlen
		 && buf [is] != ':')
	    {
	      vname[vlen] = buf[is];
	      is++;
	      vlen++;
	    };
	  //
	  //    check for the second colon as well...
	  if (buf[is] == ':')
	    {
	      vname[vlen] = ':';
	      vlen++;
	    }
	  vname [vlen] = '\000';
	  
	  //
	  //      Now we've got the variable name in vname, we can 
	  //      go get it's value and copy _that_ into tbuf as well.
	  if (internal_trace)
	    fprintf (stderr, "looking up variable >%s<\n", vname);
	  vht_index = crm_vht_lookup (vht, vname, vlen);
	  
	  if (vht[vht_index] == NULL)
	    {
	      //      there was no variable by that name, just put the
	      //     name itself there.  Note that we retain the :'s
	      //     but that the :* prefix goes away.
	      for (q = 0; q < vlen && id < maxlen; q++)
		{
		  tbuf[id] = vname[q];
		  id++;
		}
	    }
	  else
	    {
	      //     There really was a variable value by that name.
	      //     suck it out, and splice it in!

	      //   if this was :_iso:, update iso's length
	      if (strncmp(
		   (char *) &vht[vht_index]->nametxt[vht[vht_index]->nstart],
		   ":_iso:", 6) == 0)
		{
		  vht[vht_index]->vlen = tdw->nchars;
		};

	      for (q = 0; q < vht[vht_index]->vlen && id < maxlen; q++)
		{
		  tbuf[id] = vht[vht_index]->valtxt
		    [(vht[vht_index]->vstart)+q];
		  id++;
		}
	    };
	}
      //         Now, handle the case where we were NOT looking at
      //         :*:c: in buf
      else
	{
	  tbuf[id] = buf[is];
	  id++;
	}
    }
  //    That's all, folks!  Clean up the temporary buffer.  We null-terminate
  //    it in case we need to do stupid non-8-bit-clean IO on it.
  tbuf[id] = '\000';
  memmove (buf, tbuf, id);
  free (tbuf);
  free (vname);
  id--;  //  the actual length is id-1, since id is the next available char
  if (internal_trace)
    fprintf (stderr, " Returned length from nexpandvar is %ld\n", id);
  return (id);
 bailout:
  return (inlen);
}


//     crm_qexpandvar - "expanded" expandvar.  Like nexpandvar, but moreso.
//
//     nexpandvar just does \ and :*: expansion.  qexpandvar also does
//     :#:, :$:, and a bunch of other stuff.  (not recursively though.)
//
//     the inputs are a buffer with the NULL-safe string in it, the
//     length of this string, and the maximum allocated length of the
//     buffer.  This function returns the new length of the buffer.
//     It will NOT increase the buffer length past maxlen, so
//     expansions beyond that will cause a nonfatal error and be
//     aborted.
//
//     Algorithm: 
//     1) efficiency check- do we need to do any expansions at all.
//     2) Start at buf[0], work up to buf[buflen]-3
//     2a) do \n, \r, \a, \xHH and \oOOO
//     3) are we looking at :<some-operator>:?  
//     4) no: copy 1 character, increment from and to indexes, go to step 3
//     5) yes: skip from index ahead 3, from there to next : is the varname
//     6) copy var value to tbuf, incrementing tobuf index.
//     7) set from-index to third colon index + 1
//     8) go to 2 (modulo last two chars need copying)
//
long crm_qexpandvar (char *buf, long inlen, long maxlen, long *retstat)
{
  long is, id;
  long vht_index;
  long q;
  //  a temporary work buffer...
  char *tbuf;
  //  and another for variable names...
  char *vname; 

  char *cp;
  long vlen;

  char opchar;

  //    efficiency check - do we even _have_ a :*: in the buffer?
  //

  if (inlen == 0)
    return (0);

  if (internal_trace) 
    fprintf (stderr, "qexpandvar on =%s= len %ld\n", buf, inlen);

  //  GROT GROT GROT must fix this for 8-bit safe error messages
  if (inlen > maxlen)
    {
      q = fatalerror (
		      "You have blown the gaskets while building a string.  Orig string was: ",
		      buf);
      if (q == 0 )
	return (inlen);
      goto bailout;
    };  

  //   First thing- do the \-escapes
  //
  is = 0;
  id = 0;
  for (is = 0; is < inlen ; is++)  
    {
      if (buf[is] != '\\' )
	{
	  buf [id] = buf [is];
	  id++;
	}
      else
	{
	  //  we're looking at a '\\'.
	  //  
	  //   Check for a few common things: \n, \a, \xNN, \oNNN
	  is++;
	  //
	  switch (buf[is])
	    {
	    case '0':
	      {
		//   it's a NULL.
		buf[id] = '\0';
		id++;
	      }
	      break;
	    case 'b':
	      {
		//   it's a backspace
		buf[id] = '\b';
		id++;
	      }
	      break;
	    case 't':
	      {
		//   it's a tab
		buf[id] = '\t';
		id++;
	      }
	      break;
	    case 'n':
	      {
		//   it's a newline.  stuff in a newline.
		buf[id] = '\n';
		id++;
	      }
	      break;
	    case 'v':
	      {
		//   it's a vtab
		buf[id] = '\v';
		id++;
	      }
	      break;
	    case 'f':
	      {
		//   it's a form feed.
		buf[id] = '\f';
		id++;
	      }
	      break;
	    case 'r':
	      {
		//   it's a carriage return
		buf[id] = '\r';
		id++;
	      }
	      break;
	    case 'a':
	      {
		//   it's a BELL.  put that in.
		buf[id] = '\a';
		id++;
	      }
	      break;
	    case 'x':
	    case 'X':
	      {
		//   it's a hex char constant.  read it and stuff it.
		unsigned int value;
		is++;
		sscanf (&buf[is], "%2X", &value);
		buf[id] = value;
		id++;
		is++;
	      }
	      break;
	    case 'o':
	    case 'O':
	      {
		//   it's an octal char constant.  read it and stuff it.
		unsigned int value;
		is++;
		sscanf (&buf[is], "%o3", &value);
		buf[id] = value;
		id++;
		is++;
		is++;
	      }
	      break;
	    case '>':
	    case ')':
	    case ']':
	    case '/':
	    case ';':
	    case '{':
	    case '}':
	    case '#':
	    case '\\':
	      {
		//      >, ), ], ;, {, }, #, and / are themselves after a '\',
		//    but need the \ escape to pass thru the parser
		//    without terminating their enclosed args
		buf[id] = buf[is];
		id++;
	      };
	      break;
	    default:
	      {
		//       if it's "none of the above" characters, then
		//       the '\' character _stays_ as a literal
		buf[id] = '\\';
		id++;
		buf[id] = buf[is];
		id++;
	      };
	      break;
	    };
	};
    };
  //     and update the new inlen
  inlen = id ;
  buf[inlen] = '\000';    // needed because slimy old GNU REGEX needs it.

  if (internal_trace)
    fprintf (stderr, "backslash expansion yields: =%s= len %ld\n", buf, inlen);

  //    if no :, then no operators possible.
  cp = memchr (buf, ':', inlen);
  if (cp == NULL)
    {
      return (inlen);
    };

  //    OK, we might have a :*: substitution operator, so we actually have
  //    to do some work.
  //    allocate some memory for tbuf and vname;
  tbuf = (char *) malloc (maxlen);
  vname = (char *) malloc (maxlen);

  if (tbuf == NULL || vname == NULL)
    {
      q = fatalerror ("Couldn't allocate memory for Q-variable expansion!",
		      "Try making the window set smaller with the -w option");
      if (q == 0)
	return (inlen);
    };
  
  is = 0;    //   is is the input position index
  id = 0;    //   id is the destination position index

  //
  //   First time through the loop, for :*: (variable expansion)
  //
  for (is = 0; is <= inlen && id < maxlen; is++)
    {
      if (is <= inlen - 5  //  check only if :*:c:" possible
	  && buf[is] == ':' 
	  && ( buf[is+1] == '*' )
	  && buf[is+2] ==':')
	{
	  //   yes, it's probably an expansion of some sort.
	  opchar = buf[is+1];
	  //    copy everything from the colon to the second colon
	  //    ( or the end of the string) into the vname buffer.
	  is = is + 2;
	  vname [0] = buf[is];
	  vlen = 1;
	  is++;
	  while (is < maxlen
		 && is <= inlen
		 && buf [is] != ':')
	    {
	      vname[vlen] = buf[is];
	      is++;
	      vlen++;
	    };
	  //
	  //    check for the second colon as well...
	  if (buf[is] == ':')
	    {
	      vname[vlen] = ':';
	      vlen++;
	    }
	  vname [vlen] = '\000';
	  
	  //
	  //      Now we've got the variable name in vname, we can 
	  //      go get it's value and copy _that_ into tbuf as well.
	  if (internal_trace)
	    fprintf (stderr, "looking up variable >%s<\n", vname);
	  vht_index = crm_vht_lookup (vht, vname, vlen);
	  
	  if (vht[vht_index] == NULL)
	    {
	      //      there was no variable by that name, use the text itself
	      switch (opchar)
		{
		case '*':
		  {
		    //
		    //    simply copy text till the close colon
		    //
		    for (q = 0; q < vlen && id < maxlen; q++)
		      {
			tbuf[id] = vname[q];
			id++;
		      }
		  }
		  break;
		}
	    }
	  else
	    {
	      //     There really was a variable value by that name.
	      //     suck it out, and splice it's text value

	      //   if this was :_iso:, update iso's length
	      if (strncmp(
		   (char *) &vht[vht_index]->nametxt[vht[vht_index]->nstart],
		   ":_iso:", 6) == 0)
		{
		  vht[vht_index]->vlen = tdw->nchars;
		};

	      switch (opchar)
		{
		case '*':
		  {
		    for (q = 0; q < vht[vht_index]->vlen && id < maxlen; q++)
		      {
			tbuf[id] = vht[vht_index]->valtxt
			  [(vht[vht_index]->vstart)+q];
			id++;
		      }
		  }
		  break;
		};
	    };
	}
      //         Now, handle the case where we were NOT looking at
      //         :*:c: in buf
      else
	{
	  tbuf[id] = buf[is];
	  id++;
	}
    }
  //
  //
  //   Second time through the loop - expand :#: (string lengths)
  //
  strncpy (buf, tbuf, id);
  buf[id] = '\000';
  inlen = id-1 ;        // since id gets one last increment.
  if (internal_trace)
    fprintf (stderr, " var-expand yields: =%s= len %ld\n", buf, inlen);
  id = 0;
  for (is = 0; is <= inlen && id < maxlen; is++)
    {
      if (is <= inlen - 5  //  check only if :#:c:" possible
	  && buf[is] == ':' 
	  && ( buf[is+1] == '#' )
	  && buf[is+2] ==':')
	{
	  //   yes, it's probably an expansion of some sort.
	  opchar = buf[is+1];
	  //    copy everything from the colon to the second colon
	  //    into the vname buffer.
	  is = is + 2;
	  vname [0] = buf[is];
	  vlen = 1;
	  is++;
	  while (is < maxlen
		 && is <= inlen
		 && buf [is] != ':')
	    {
	      vname[vlen] = buf[is];
	      is++;
	      vlen++;
	    };
	  //
	  //    check for the second colon as well...
	  if (buf[is] == ':')
	    {
	      vname[vlen] = ':';
	      vlen++;
	    }
	  vname [vlen] = '\000';
	  
	  //
	  //      Now we've got the variable name in vname, we can 
	  //      go get it's value and copy _that_ into tbuf as well.
	  if (internal_trace)
	    fprintf (stderr, "looking up variable >%s<\n", vname);
	  vht_index = crm_vht_lookup (vht, vname, vlen);
	  
	  if (vht[vht_index] == NULL)
	    {
	      //      there was no variable by that name, use the text itself
	      switch (opchar)
		{
		case '#':
		    {
		      char lentext[MAX_VARNAME];
		      int m, mm;
		      //   the vlen-2 is because we need to get rid of the ':' 
		      sprintf (lentext, "%ld", vlen-2);
		      mm = strlen (lentext);
		      for (m = 0; m < mm && id < maxlen; m++)
			{
			  tbuf[id] = lentext[m];
			  id++;
			};
		    }
		  break;
		}
	    }
	  else
	    {
	      //     There really was a variable value by that name.
	      //     suck it out, and splice it's text value

	      //   if this was :_iso:, update iso's length
	      if (strncmp(
		   (char *) &vht[vht_index]->nametxt[vht[vht_index]->nstart],
		   ":_iso:", 6) == 0)
		{
		  vht[vht_index]->vlen = tdw->nchars;
		};

	      switch (opchar)
		{
		case '#':
		  {
		    //
		    //   Actually, we want the _length_ of the variable
		    //
		    char lentext[MAX_VARNAME];
		    int m, mm;
		    sprintf (lentext, "%ld", vht[vht_index]->vlen);
		    mm = strlen (lentext);
		    for (m = 0; m < mm && id < maxlen; m++)
		      {
			tbuf[id] = lentext[m];
			id++;
		      };
		  };
		  break;
		};
	    };
	}
      //         Now, handle the case where we were NOT looking at
      //         :*:c: in buf
      else
	{
	  tbuf[id] = buf[is];
	  id++;
	}
    }
  //
  //
  //      Third pass - handle :@:  (math evaluations)
  //
  //
  strncpy (buf, tbuf, id);
  buf[id] = '\000';
  inlen = id - 1;               //  since id got one extra increment
  if (internal_trace)
    fprintf (stderr, " length-expand yields: =%s= len %ld\n", buf, inlen);
  id = 0;
  for (is = 0; is <= inlen && id < maxlen; is++)
    {
      if (is <= inlen - 5  //  check only if :*:c:" possible
	  && buf[is] == ':' 
	  && ( buf[is+1] == '@' )
	  && buf[is+2] ==':')
	{
	  //   yes, it's probably an expansion of some sort.
	  opchar = buf[is+1];
	  //    copy everything from the colon to the second colon
	  //    into the vname buffer.
	  is = is + 2;
	  vname [0] = buf[is];
	  vlen = 1;
	  is++;
	  while (is < maxlen
		 && is <= inlen
		 && buf [is] != ':')
	    {
	      vname[vlen] = buf[is];
	      is++;
	      vlen++;
	    };
	  //
	  //    check for the second colon as well...
	  if (buf[is] == ':')
	    {
	      vname[vlen] = ':';
	      vlen++;
	    }
	  vname [vlen] = '\000';

	  //
	  //      Now we've got the variable name in vname, we can 
	  //      go get it's value and copy _that_ into tbuf as well.
	  if (internal_trace)
	    fprintf (stderr, "looking up variable >%s<\n", vname);
	  vht_index = crm_vht_lookup (vht, vname, vlen);
	  
	  if (vht[vht_index] == NULL)
	    {
	      //      there was no variable by that name, use the text itself
	      switch (opchar)
		{
		case '@':
		    {
		      char mathtext[MAX_VARNAME];
		      int m, mm;
		      strncpy (mathtext, &vname[1], vlen-2);
		      mathtext[vlen-2] = '\000';
		      if (internal_trace)
			fprintf (stderr, "In-Mathtext is -'%s'-\n", mathtext); 
		      m = strmath (mathtext, vlen-2, MAX_VARNAME, retstat);
		      if (internal_trace)
			fprintf (stderr, "Out-Mathtext is -'%s'-\n", mathtext);
		      if (retstat && *retstat < 0)
			{
			  q = fatalerror ("Problem during math evaluation of ",
					  mathtext);
			  if (q == 0)
			    return (inlen);
			  goto bailout;
			}
		      mm = strlen (mathtext);
		      for (m = 0; m < mm && id < maxlen; m++)
			{
			  tbuf[id] = mathtext[m];
			  id++;
			};
		    }
		  break;
		}
	    }
	  else
	    {
	      //     There really was a variable value by that name.
	      //     suck it out, and splice it's text value

	      //   if this was :_iso:, update iso's length
	      if (strncmp(
		   (char *) &vht[vht_index]->nametxt[vht[vht_index]->nstart],
		   ":_iso:", 6) == 0)
		{
		  vht[vht_index]->vlen = tdw->nchars;
		};

	      switch (opchar)
		{
		case '@':
		    {
		      char mathtext[MAX_VARNAME];
		      int m, mm;
		      m = 0;
		      for (q = 0; q < vht[vht_index]->vlen && m < maxlen; q++)
			{
			  mathtext[m] = vht[vht_index]->valtxt
			    [(vht[vht_index]->vstart)+q];
			  m++;
			}
		      mathtext[vlen-1] = '\000';
		      m = strmath (mathtext, vlen-2, MAX_VARNAME, retstat );
		      if (retstat && *retstat < 0)
			{
			  q = fatalerror ("Problem during math evaluation of ",
					  mathtext);
			  if (q == 0)
			    return (inlen);
			  goto bailout;
			}
		      mm = strlen (mathtext);
		      for (m = 0; m < mm && id < maxlen; m++)
			{
			  tbuf[id] = mathtext[m];
			  id++;
			};
		    }
		  break;

		};
	    };
	}
      //         Now, handle the case where we were NOT looking at
      //         :*:c: in buf
      else
	{
	  tbuf[id] = buf[is];
	  id++;
	}
    }


  //    That's all, folks!  Clean up the temporary buffer.  We null-terminate
  //    it in case we need to do stupid non-8-bit-clean IO on it.
  tbuf[id] = '\000';
  memmove (buf, tbuf, id);
  id--;  //  the actual length is id-1, since id is the next available char
  if (internal_trace)
    fprintf (stderr, " math-expand yields: =%s= len %ld\n", buf, id);
  free (tbuf);
  free (vname);
  if (internal_trace)
    {
      fprintf (stderr, " Returned length from qexpandvar is %ld\n", id);
      if (retstat) fprintf (stderr, "retstat was: %ld\n", *retstat);
    };
  return (id);
 bailout:
  return (inlen);
}
