/*
 * Lexes Ragel input files.
 */

#include <iostream>
#include <stdlib.h>
#include <stdio.h>

using namespace std;

#define BUFSIZE 2048

/* A growable buffer for collecting headers. */
struct Buffer
{
	Buffer() : data(0), allocated(0), length(0) { }
	~Buffer() { empty(); }

	void append( char p ) {
		if ( ++length > allocated )
			upAllocate( length*2 );
		data[length-1] = p;
	}
		
	void clear() { length = 0; }
	void upAllocate( int len );
	void empty();

	char *data;
	int allocated;
	int length;
};

struct RagelScan
{
	Buffer tokBuf;
	int inline_depth;

	inline void emit() {
		tokBuf.append(0); 
		cout << "token: " << tokBuf.data << endl;
		tokBuf.clear();
	}

	int curs;
	%% interface;
};

%% RagelScan
{
	action pass { cout << fc; }
	action emit { emit(); }
	action buf { tokBuf.append(fc); }
	action clear { tokBuf.clear(); }
	action lit_eof { cerr << "rlscan: EOF in literal" << endl; }
	action com_eof { cerr << "rlscan: EOF in comment" << endl; }
	action rl_eof { cerr << "rlscan: EOF in machine spec" << endl; }

	#
	# Inline code blocks.
	#

	# Start and finish inline code blocks. 
	action inline_open {
		inline_depth += 1;
		tokBuf.append( fc );
	}
	action inline_close {
		/* If dropping down to zero braces finish with inline. */
		tokBuf.append( fc );
		if ( --inline_depth == 0 ) {
			emit();
			fgoto rlMain;
		}
	}

	# Inline code tokens.
	ilSlit = "'" ( [^'\\] | /\\./ )* $!lit_eof "'";
	ilDlit = '"' ( [^"\\] | /\\./ )* $!lit_eof '"';
	ilCommC = '/*' any* $!com_eof $0 '*/' @1;
	ilCommCC = '//' any* $!com_eof $0 '\n' @1;
	ilElse = [^'"{}];
	ilOpen = '{' @inline_open;
	ilClose = '}'@inline_close;

	# All inline tokens.
	ilTokens = ( ilSlit | ilDlit | ilCommC | ilCommCC | ilElse ) $buf | 
			ilOpen | ilClose;
	ilMain := ilTokens**;

	# 
	# Ragel Input.
	#

	# Ragel Tokens
	uint = /[0-9][0-9]*/;
	pint = /+[0-9][0-9]*/;
	nint = /-[0-9][0-9]*/;
	hex = /0x[0-9a-fA-F][0-9a-fA-F]*/;
	word = /[A-Za-z_][A-Za-z_0-9]*/;
	comment = '#' [^\n]* $!com_eof '\n';
	slit = "'" ( [^'\\] | /\\./ )* $!lit_eof "'";
	dlit = '"' ( [^"\\] | /\\./ )* $!lit_eof '"';
	orlit = '[' ( [^\]\\] | /\\./ )* $!lit_eof ']';
	relit = '/' ( [^/\\] | /\\./ ) * $!lit_eof '/';
	ops = ( '..' | ':=' | '->' | 
		'>!' | '@!' | '$!' | '%!' |
		'>~' | '@~' | '$~' | '%~' );
	otherp = '%'+ - '%%';
	symbols = punct - ['"\[\/#{];

	action start_inline {
		/* Init the bracket depth and go to the inline main. */
		inline_depth = 1; 
		tokBuf.append(fc);
		fgoto ilMain;
	}

	# Ragel special tokens.
	term = '%%' %{ fgoto main;};
	ilopen = '{' @start_inline;
	whitespace = (any - 0x21..0x7e)+;

	# All Ragel tokens.
	rlTokens = (
			uint | pint | nint | hex | word | slit | 
			dlit | orlit | relit | ops | otherp | symbols
		) $buf %emit | term | ilopen | comment | whitespace;
	rlMain := rlTokens** %rl_eof;

	#
	# Outside code.
	#

	# Outside code tokens. 
	ocSlit = "'" ( [^'\\] | /\\./ )* $!lit_eof "'";
	ocDlit = '"' ( [^"\\] | /\\./ )* $!lit_eof '"';
	ocCommC = '/*' any* $!com_eof $0 '*/' @1;
	ocCommCC = '//' any* $!com_eof $0 '\n' @1;
	ocElse = [^%'"];

	# Pass single and 3+ percent signs through, 2 percents moves to rl.
	ocPercent = 
			'%' %{cout << '%';} | 
			'%%' %{ fhold; fgoto rlMain; } |
			'%%%' @{cout << "%%%";} '%'* $pass;

	# All outside code tokens.
	ocTokens = ( ocSlit | ocDlit | ocCommC | ocCommCC | ocElse );
	main := ( ocTokens $pass | ocPercent )**;
}

void Buffer::empty()
{
	if ( data != 0 ) {
		free( data );

		data = 0;
		length = 0;
		allocated = 0;
	}
}

void Buffer::upAllocate( int len )
{
	if ( data == 0 )
		data = (char*) malloc( len );
	else
		data = (char*) realloc( data, len );
	allocated = len;
}

RagelScan scan;
char buf[BUFSIZE];

int main()
{
	scan.init();
	while ( 1 ) {
		int len = fread( buf, 1, BUFSIZE, stdin );
		scan.execute( buf, len );
		if ( len != BUFSIZE )
			break;
	}
	if ( scan.finish() <= 0 )
		cerr << "rlscan: error parsing input" << endl;
	return 0;
}
