#include <iostream>
using namespace std;

#define BUFSIZE 4096

char buf[BUFSIZE];

struct Scanner
{
	int cs, act;
	char *tokstart, *tokend;

	// Initialize the machine. Invokes any init statement blocks. Returns 0
	// if the machine begins in a non-accepting state and 1 if the machine
	// begins in an accepting state.
	void init( );

	// Execute the machine on a block of data. Returns -1 if after processing
	// the data, the machine is in the error state and can never accept, 0 if
	// the machine is in a non-accepting state and 1 if the machine is in an
	// accepting state.
	int execute( char *data, int len );

	// Indicate that there is no more data. Returns -1 if the machine finishes
	// in the error state and does not accept, 0 if the machine finishes
	// in any other non-accepting state and 1 if the machine finishes in an
	// accepting state.
	int finish( );
};

%%{
	machine Scanner;

	action to_act { 
		cout << "to:   fc = " << fc << " tokstart = " << 
				( tokstart == 0 ? -1 : tokstart-buf ) << endl;
	} 
	action from_act {
		cout << "from: fc = " << fc << " tokstart = " << 
				( tokstart == 0 ? -1 : tokstart-buf ) << endl;
	}

	c_comm := ( any* $0 '*/' @1 @{ fgoto main; } ) $~to_act $*from_act;
	cxx_comm := ( any* $0 '\n' @1 @{ fgoto main; } ) $~to_act $*from_act;

	main := |*

	# Single and double literals.
	( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) $~ to_act $* from_act;
	( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) $~ to_act $* from_act;

	# Identifiers
	( [a-zA-Z_] [a-zA-Z0-9_]* ) $~ to_act $* from_act;

	# Floating literals.
	fract_const = digit* '.' digit+ | digit+ '.';
	exponent = [eE] [+\-]? digit+;
	float_suffix = [flFL];

	( fract_const exponent? float_suffix? |
		digit+ exponent float_suffix? ) $~ to_act $* from_act;
	
	# Integer decimal. Leading part buffered by float.
	( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) $~ to_act $* from_act

	# Integer octal. Leading part buffered by float.
	( '0' [0-9]+ [ulUL]{0,2} ) $~ to_act $* from_act;

	# Integer hex. Leading 0 buffered by float.
	( '0x' [0-9a-fA-F]+ [ulUL]{0,2} ) $~ to_act $* from_act;

	# Three char compounds, first item already buffered. */
	( '...' ) $~ to_act $* from_act;

	# Single char symbols.
	( punct - [_"'] ) $~ to_act $* from_act;

	# Comments and whitespace.
	( '/*' ) $~ to_act $* from_act { fgoto c_comm; };
	( '//' ) $~ to_act $* from_act { fgoto cxx_comm; };

	( any - 33..126 )+ $~ to_act $* from_act;

	*|;
}%%

%% write data;

void Scanner::init( )
{
	%% write init;
}

int Scanner::execute( char *data, int len )
{
	char *p = data;
	char *pe = data + len;

	%% write exec;

	int have = 0;
	if ( tokstart != 0 ) {
		have = pe - tokstart;
		memmove( data, tokstart, have );
	}
	return have;
}

int Scanner::finish( )
{
	%% write eof;
	if ( cs == Scanner_error )
		return -1;
	if ( cs >= Scanner_first_final )
		return 1;
	return 0;
}


int main()
{
	std::ios::sync_with_stdio(false);
	Scanner scanner;
	scanner.init();

	int have = 0;
	while ( true ) {
		char *data = buf + have;
		cin.read( data, BUFSIZE - have );

		int newd = cin.gcount();
		if ( newd == 0 )
			break;

		have = scanner.execute( data, newd );
		if ( scanner.cs == Scanner_error ) {
			/* Machine failed before finding a token. */
			cout << "PARSE ERROR" << endl;
			exit(1);
		}
		else if ( have == BUFSIZE ) {
			/* Buffer is full. */
			cout << "TOKEN TOO BIG" << endl;
			exit(1);
		}
	}

	/* FIXME: Last token may get lost. */
	scanner.finish();

	return 0;
}
