#include <iostream>
using namespace std;

#define TK_Dlit 192
#define TK_Slit 193
#define TK_Float 194
#define TK_Id 195
#define TK_NameSep 197
#define TK_Arrow 211
#define TK_PlusPlus 212
#define TK_MinusMinus 213
#define TK_ArrowStar 214
#define TK_DotStar 215
#define TK_ShiftLeft 216
#define TK_ShiftRight 217
#define TK_IntegerDecimal 218
#define TK_IntegerOctal 219
#define TK_IntegerHex 220
#define TK_EqualsEquals 223
#define TK_NotEquals 224
#define TK_AndAnd 225
#define TK_OrOr 226
#define TK_MultAssign 227
#define TK_DivAssign 228
#define TK_PercentAssign 229
#define TK_PlusAssign 230
#define TK_MinusAssign 231
#define TK_AmpAssign 232
#define TK_CaretAssign 233
#define TK_BarAssign 234
#define TK_DotDotDot 240
#define TK_Whitespace 241
#define TK_Comment 242

struct Scanner
{
	int cs, act;
	char *tokstart, *tokend;
	bool isCxx;

	void token( int tok );
	void run();
};


%%{
	machine Scanner;

	# Process all comments, relies on isCxx being set.
	comment := |*
		'*/' {
			if ( ! isCxx )
				fgoto main;
			else {
				cout << "comm char: " << tokstart[0] << endl;
				cout << "comm char: " << tokstart[1] << endl;
			}
		};

		'\n' {
			if ( isCxx )
				fgoto main;
			else
				cout << "comm char: " << tokstart[0] << endl;
		};
		
		any {
			cout << "comm char: " << tokstart[0] << endl;
		};
	*|;
	
	main := |*

	# Single and double literals.
	( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) { token( TK_Slit );};
	( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) { token( TK_Dlit );};

	# Identifiers
	( [a-zA-Z_] [a-zA-Z0-9_]* ) { token( TK_Id ); };

	# Floating literals.
	fract_const = digit* '.' digit+ | digit+ '.';
	exponent = [eE] [+\-]? digit+;
	float_suffix = [flFL];

	( fract_const exponent? float_suffix? |
		digit+ exponent float_suffix? ) { token( TK_Float );};
	
	# Integer decimal. Leading part buffered by float.
	( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) { token( TK_IntegerDecimal );};

	# Integer octal. Leading part buffered by float.
	( '0' [0-9]+ [ulUL]{0,2} ) { token( TK_IntegerOctal );};

	# Integer hex. Leading 0 buffered by float.
	( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) { token( TK_IntegerHex );};

	# Only buffer the second item, first buffered by symbol. */
	'::' {token( TK_NameSep );};
	'==' {token( TK_EqualsEquals );};
	'!=' {token( TK_NotEquals );};
	'&&' {token( TK_AndAnd );};
	'||' {token( TK_OrOr );};
	'*=' {token( TK_MultAssign );};
	'/=' {token( TK_DivAssign );};
	'%=' {token( TK_PercentAssign );};
	'+=' {token( TK_PlusAssign );};
	'-=' {token( TK_MinusAssign );};
	'&=' {token( TK_AmpAssign );};
	'^=' {token( TK_CaretAssign );};
	'|=' {token( TK_BarAssign );};
	'++' {token( TK_PlusPlus );};
	'--' {token( TK_MinusMinus );};
	'->' {token( TK_Arrow );};
	'->*' {token( TK_ArrowStar );};
	'.*' {token( TK_DotStar );};

	# Three char compounds, first item already buffered. */
	'...' { token( TK_DotDotDot );};

	# Single char symbols.
	( punct - [_"'] ) { token( tokstart[0] );};

	# Comments and whitespace. Handle these outside of the machine so that se
	# don't end up buffering the comments.
	'/*' { isCxx = false; fgoto comment; };
	'//' { isCxx = true; fgoto comment; };

	( any - 33..126 )+ { token( TK_Whitespace );};

	*|;
}%%

%% write data nofinal;

void Scanner::token( int tok )
{
	const char *data = tokstart;
	int len = tokend - tokstart;
	cout << "<" << tok << "> ";
	if ( data != 0 ) {
		for ( int i = 0; i < len; i++ )
			cout << data[i];
	}
	cout << '\n';
}

#define BUFSIZE 8
static char buf[BUFSIZE];

void Scanner::run()
{
	%% write init;

	int have = 0;
	while ( true ) {
		char *data = buf + have;
		cin.read( data, BUFSIZE - have );
		int len = cin.gcount();
		if ( len == 0 )
			break;

		char *p = data;
		char *pe = data + len;
		%% write exec;

		if ( cs == Scanner_error ) {
			/* Machine failed before finding a token. */
			cout << "PARSE ERROR" << endl;
			exit(1);
		}
		else if ( tokstart == buf ) {
			/* There is a prefix, but no room to shift over. Buffer is full. */
			cout << "TOKEN TOO BIG" << endl;
			exit(1);
		}
		else if ( tokstart != 0 ) {
			/* There is a prefix to preserve, shift it over. */
			have = pe - tokstart;
			memmove( buf, tokstart, have );
			tokend = buf + (tokend-tokstart);
			tokstart = buf;
		}
	}

	%% write eof;
}

int main()
{
	std::ios::sync_with_stdio(false);

	Scanner scanner;
	scanner.run();
	return 0;
}
