/*
 * Perform the basic line parsing of input performed by awk.
 */

#include <stdio.h>
#include <string.h>
#include <unistd.h>

#define LINEBUF 2048

struct awkemu
{
	char lineBuf[LINEBUF];
	char blineBuf[LINEBUF];
	int lineLen;
	int blineLen;
	int words;

	int curs;
};

%% awkemu
{
	# Starts a line. Will initialize all the data necessary for capturing the line.
	action startline {
		fsm->lineLen = 0;	
		fsm->blineLen = 0;	
		fsm->words = 0;
	}

	# Will be executed on every character seen in a word. Captures the word
	# to the broken up line buffer.
	action wordchar {
		fsm->blineBuf[fsm->blineLen] = fc;
		fsm->blineLen += 1;
	}

	# Terminate a word. Adds the null after the word and increments the word count
	# for the line.
	action termword {
		fsm->blineBuf[fsm->blineLen] = 0;
		fsm->blineLen += 1;
		fsm->words += 1;
	}

	# Will be executed on every character seen in a line (not including 
	# the newline itself.
	action linechar {
		fsm->lineBuf[fsm->lineLen] = fc;
		fsm->lineLen += 1;
	}

	# Terminate a line. Print out what was captured. 
	action termline {
		int i;
		char *pword = fsm->blineBuf;
		fsm->lineBuf[fsm->lineLen] = 0;
		printf("endline(%i): %s\n", fsm->words, fsm->lineBuf );
		for ( i = 0; i < fsm->words; i++ ) {
			printf("  word: %s\n", pword );
			pword += strlen(pword) + 1;
		}
	}


	# This section of the machine deals with breaking up lines into fields.
	# Lines are separed by the whitespace and put in an array of words.

	# Words in a line.
	word = (extend - [ \t\n])+;

	# The whitespace separating words in a line.
	whitespace = [ \t];

	# The components in a line to break up. Either a word or a single char of
	# whitespace. On the word capture characters.
	blineElements = word $wordchar %termword | whitespace;

	# Star the break line elements. Just be careful to decrement the leaving
	# priority as we don't want multiple character identifiers to be treated as
	# multiple single char identifiers.
	breakLine = ( blineElements $1 %0 )* . '\n';

	# This machine lets us capture entire lines. We do it separate from the words
	# in a line.
	bufLine = (extend - '\n')* $linechar %termline . '\n';

	# A line can then consist of the machine that will break up the line into
	# words and a machine that will buffer the entire line. 
	line = ( breakLine | bufLine ) > startline;

	# Any number of lines.
	main := line*;
}

#define BUFSIZE 2048
char buf[BUFSIZE];
struct awkemu awkEmu;

int main()
{
	awkemu_init( &awkEmu );
	while ( 1 ) {
		int len = fread( buf, 1, BUFSIZE, stdin );

		awkemu_execute( &awkEmu, buf, len );

		if ( len != BUFSIZE )
			break;
	}
	if ( awkemu_finish( &awkEmu ) <= 0 )
		printf("fail\n");
	return 0;
}
