#include <iostream>
using namespace std;

#define TK_Dlit 192
#define TK_Slit 193
#define TK_Float 194
#define TK_Id 195
#define TK_NameSep 197
#define TK_Arrow 211
#define TK_PlusPlus 212
#define TK_MinusMinus 213
#define TK_ArrowStar 214
#define TK_DotStar 215
#define TK_ShiftLeft 216
#define TK_ShiftRight 217
#define TK_IntegerDecimal 218
#define TK_IntegerOctal 219
#define TK_IntegerHex 220
#define TK_EqualsEquals 223
#define TK_NotEquals 224
#define TK_AndAnd 225
#define TK_OrOr 226
#define TK_MultAssign 227
#define TK_DivAssign 228
#define TK_PercentAssign 229
#define TK_PlusAssign 230
#define TK_MinusAssign 231
#define TK_AmpAssign 232
#define TK_CaretAssign 233
#define TK_BarAssign 234
#define TK_DotDotDot 240
#define TK_Whitespace 241
#define TK_Comment 242

#define BUFSIZE 4096

int tok;
char buf[BUFSIZE], *tokstart, *tokend;
void token( char *data, int len );
bool discard = false;

struct Scanner
{
	int curs;
	%% {
		element char;
		interface;
	}
};

%% Scanner
{
	init { 
		tok = 0;
		tokstart = 0;
		tokend = 0;
	}

	# Single and double literals.
	slit = ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) @{tok = TK_Slit;};
	dlit = ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) @{tok = TK_Dlit;};

	# Identifiers
	id = ( [a-zA-Z_] [a-zA-Z0-9_]* ) @{tok = TK_Id;};

	# Floating literals.
	fract_const = digit* '.' digit+ | digit+ '.';
	exponent = [eE] [+\-]? digit+;
	float_suffix = [flFL];
	float = 
		( fract_const exponent? float_suffix? |
		digit+ exponent float_suffix? ) @{tok = TK_Float;};
	
	# Integer decimal. Leading part buffered by float.
	integer_decimal = ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) @{tok = TK_IntegerDecimal;};

	# Integer octal. Leading part buffered by float.
	integer_octal = ( '0' [0-9]+ [ulUL]{0,2} ) @{tok = TK_IntegerOctal;};

	# Integer hex. Leading 0 buffered by float.
	integer_hex = ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) @{tok = TK_IntegerHex;};

	# Only buffer the second item, first buffered by symbol. */
	namesep = '::' @{tok = TK_NameSep;};
	deqs = '==' @{tok = TK_EqualsEquals;};
	neqs = '!=' @{tok = TK_NotEquals;};
	and_and = '&&' @{tok = TK_AndAnd;};
	or_or = '||' @{tok = TK_OrOr;};
	mult_assign = '*=' @{tok = TK_MultAssign;};
	div_assign = '/=' @{tok = TK_DivAssign;};
	percent_assign = '%=' @{tok = TK_PercentAssign;};
	plus_assign = '+=' @{tok = TK_PlusAssign;};
	minus_assign = '-=' @{tok = TK_MinusAssign;};
	amp_assign = '&=' @{tok = TK_AmpAssign;};
	caret_assign = '^=' @{tok = TK_CaretAssign;};
	bar_assign = '|=' @{tok = TK_BarAssign;};
	plus_plus = '++' @{tok = TK_PlusPlus;};
	minus_minus = '--' @{tok = TK_MinusMinus;};
	arrow = '->' @{tok = TK_Arrow;};
	arrow_star = '->*' @{tok = TK_ArrowStar;};
	dot_star = '.*' @{tok = TK_DotStar;};

	# Three char compounds, first item already buffered. */
	dot_dot_dot = '...' @{tok = TK_DotDotDot;};

	# All compunds
	compound = namesep | deqs | neqs | and_and | or_or | mult_assign |
			div_assign | percent_assign | plus_assign | minus_assign |
			amp_assign | caret_assign | bar_assign | plus_plus | minus_minus |
			arrow | arrow_star | dot_star | dot_dot_dot;

	# Single char symbols.
	symbol = ( punct - [_"'] ) @{tok = fc;};

	action discard {
		discard = true;
	}

	# Comments and whitespace.
	commc = '/*' @discard ( any* $0 '*/' @1 ) @{tok = TK_Comment;};
	commcc = '//' @discard ( any* $0 '\n' @1 ) @{tok = TK_Comment;};
	whitespace = ( any - 33..126 )+ >discard @{tok = TK_Whitespace;};

	# All outside code tokens.
	tokens = ( 
		id | slit | dlit | float | integer_decimal | 
		integer_octal | integer_hex | compound | symbol |
		commc | commcc | whitespace );

	action onError {
		if ( tok != 0 ) {
			char *rst_data;
			int rst_len;

			if ( tok == TK_Comment || tok == TK_Whitespace ) {
				/* Reset comment status, don't send. */
				discard = false;

				/* Restart right at the error point if consuming whitespace or
				 * a comment. Consume may have spanned multiple buffers. */
				rst_data = fpc;
				rst_len = (fbuf + fblen) - fpc;
			}
			else {
				/* Send the token. */
				token( tokstart, tokend - tokstart + 1 );

				/* Restart right after the token. */
				rst_data = tokend+1;
				rst_len = (fbuf + fblen) - tokend - 1;
			}

			tokstart = 0;
			fexec( rst_data, rst_len );
			fgoto main;
		}
	}

	main := tokens >{tokstart=fpc;} @{tokend=fpc;} $!onError;
}

void token( char *data, int len )
{
	cout << "<" << tok << "> ";
	for ( int i = 0; i < len; i++ )
		cout << data[i];
	cout << '\n';
}

int main()
{
	std::ios::sync_with_stdio(false);
	Scanner scanner;
	scanner.init();

	/* Do the first read. */
	int have = 0;

	while ( true ) {
		cin.read( buf+have, BUFSIZE-have );
		int newd = cin.gcount();
		if ( newd == 0 )
			break;

		int len = have + newd;
		int rtn = scanner.execute( buf+have, newd );
		if ( rtn < 0 ) {
			/* Machine failed before finding a token. */
			cerr << "PARSE ERROR" << endl;
			exit(1);
		}
		else if ( discard ) {
			/* No failure yet, end of buf in whitespace or comment. */
			have = 0;
			tokend -= (tokstart-buf);
			tokstart = buf;
		}
		else if ( tokstart == buf && len == BUFSIZE ) {
			/* No failure yet, buffer is full. */
			cerr << "TOKEN TOO BIG" << endl;
			exit(1);
		}
		else {
			/* No failure yet, room still left in buffer. Shift over data and
			 * read more. */
			have = len - (tokstart-buf);
			memmove( buf, tokstart, have );
			tokend -= (tokstart-buf);
			tokstart = buf;
		}
	}

	scanner.finish();
	if ( tok != 0 && tok != TK_Comment && tok != TK_Whitespace )
		token( tokstart, tokend - tokstart + 1 );

	return 0;
}
