/*
 *  Copyright 2001-2005 Adrian Thurston <thurston@cs.queensu.ca>
 */

/*  This file is part of Ragel.
 *
 *  Ragel is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 * 
 *  Ragel is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with Ragel; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 */

#include "ragel.h"
#include "fsmcodegen.h"
#include "parsetree.h"
#include "redfsm.h"

/* Determine if a string is only whitespace. Code blocks that are only
 * whitespace need not be output. */
bool onlyWhitespace( char *str )
{
	while ( *str != 0 ) {
		if ( *str != ' ' && *str != '\t' && *str != '\n' &&
				*str != '\v' && *str != '\f' && *str != '\r' )
			return false;
		str += 1;
	}
	return true;
}

using std::ostream;

/* Init code gen with in parameters. */
FsmCodeGen::FsmCodeGen( char *fsmName, ParseData *parseData, 
		RedFsmAp *redFsm, ostream &out )
:
	fsmName(fsmName), 
	parseData(parseData), 
	redFsm(redFsm),
	out(out),

	bAnyEofActions(false),
	bAnyActionCalls(false),
	bAnyActionRets(false),
	bAnyRegActionRets(false),
	bAnyRegActionByValControl(false),
	bAnyRegNextStmt(false),
	bAnyRegCurStateRef(false),
	bAnyEofActionControl(false),
	bAnyEofActionCharRef(false),
	bAnyEofActionHold(false)
{
}

void FsmCodeGen::startCodeGen()
{
	/* Write the preprocessor line info for going to the output file. */
	out << "#line " << outFilter->line + 1 << " \""; LDIR_PATH(outputFile) << "\"\n";
}

void FsmCodeGen::endCodeGen( const InputLoc &loc )
{
	/* Write the preprocessor line info for to the input file. */
	out << "#line " << loc.line  << " \""; LDIR_PATH(parseData->fileName) << "\"\n";
}

/* Does the machine have any functions. */
bool FsmCodeGen::anyActions()
{
	/* If there is more than one action then one must be non-empty. If there
	 * is only one then it may be nonzero. */
	return redFsm->actionMap.length() > 0;
}

void FsmCodeGen::findFinalActionRefs()
{
	for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
		/* Rerence count out of single transitions. */
		for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) {
			if ( rtel->value->action != 0 ) {
				for ( ActionTable::Iter item = rtel->value->action->key; item.lte(); item++ )
					item->value->numTransRefs += 1;
			}
		}

		/* Reference count out of range transitions. */
		for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
			if ( rtel->value->action != 0 ) {
				for ( ActionTable::Iter item = rtel->value->action->key; item.lte(); item++ )
					item->value->numTransRefs += 1;
			}
		}

		/* Reference count default transition. */
		if ( st->defTrans != 0 && st->defTrans->action != 0 ) {
			for ( ActionTable::Iter item = st->defTrans->action->key; item.lte(); item++ )
				item->value->numTransRefs += 1;
		}

		/* Reference count out of eof actions. */
		if ( st->eofAction != 0 ) {
			for ( ActionTable::Iter item = st->eofAction->key; item.lte(); item++ )
				item->value->numEofRefs += 1;
		}
	}
}

/* Assign ids to referenced actions. */
void FsmCodeGen::assignActionIds()
{
	int nextActionId = 0;
	for ( ActionList::Iter act = parseData->actionList; act.lte(); act++ ) {
		/* Only ever interested in referenced actions. */
		if ( act->numTransRefs > 0 || act->numEofRefs > 0 )
			act->actionId = nextActionId++;
	}
}

void FsmCodeGen::setValueLimits()
{
	maxSingleLen = 0;
	maxRangeLen = 0;
	maxKeyOffset = 0;
	maxIndexOffset = 0;
	maxIndex = redFsm->transSet.length();
	maxActListId = 0;
	maxActionLoc = 0;
	maxActArrItem = 0;
	maxSpan = 0;
	maxFlatIndexOffset = 0;

	maxState = redFsm->nextStateId - 1;

	for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
		/* Maximum single length. */
		if ( st->outSingle.length() > maxSingleLen )
			maxSingleLen = st->outSingle.length();

		/* Maximum range length. */
		if ( st->outRange.length() > maxRangeLen )
			maxRangeLen = st->outRange.length();

		/* The key offset index offset for the state after last is not used, skip it.. */
		if ( ! st.last() ) {
			maxKeyOffset += st->outSingle.length() + st->outRange.length()*2;
			maxIndexOffset += st->outSingle.length() + st->outRange.length() + 1;
		}

		/* Max key span. */
		if ( st->transList != 0 ) {
			unsigned long long span = redFsm->keyOps->span( st->lowKey, st->highKey );
			if ( span > maxSpan )
				maxSpan = span;
		}

		/* Max flat index offset. */
		if ( ! st.last() ) {
			if ( st->transList != 0 )
				maxFlatIndexOffset += redFsm->keyOps->span( st->lowKey, st->highKey );
			maxFlatIndexOffset += 1;
		}
	}

	for ( ActionTableMap::Iter at = redFsm->actionMap; at.lte(); at++ ) {
		/* Maximum id of action lists. */
		if ( at->actListId+1 > maxActListId )
			maxActListId = at->actListId+1;

		/* Maximum location of items in action array. */
		if ( at->location+1 > maxActionLoc )
			maxActionLoc = at->location+1;

		/* Maximum values going into the action array. */
		if ( at->key.length() > maxActArrItem )
			maxActArrItem = at->key.length();
		for ( ActionTable::Iter item = at->key; item.lte(); item++ ) {
			if ( item->value->actionId > maxActArrItem )
				maxActArrItem = item->value->actionId;
		}
	}
}

void FsmCodeGen::analyzeAction( Action *act, InlineList *inlineList )
{
	for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
		/* Only consider actions that are referenced. */
		if ( act->numTransRefs > 0 || act->numEofRefs > 0 ) {
			if ( item->type == InlineItem::Call )
				bAnyActionCalls = true;
			else if ( item->type == InlineItem::Ret )
				bAnyActionRets = true;
		}

		/* Check for various things in regular actions. */
		if ( act->numTransRefs > 0 ) {
			/* Any returns in regular actions? */
			if ( item->type == InlineItem::Ret )
				bAnyRegActionRets = true;

			/* Any next statements in the regular actions? */
			if ( item->type == InlineItem::Next || item->type == InlineItem::NextE )
				bAnyRegNextStmt = true;

			/* Any by value control in regular actions? */
			if ( item->type == InlineItem::CallE || item->type == InlineItem::GotoE )
				bAnyRegActionByValControl = true;

			/* Any references to the current state in regular actions? */
			if ( item->type == InlineItem::Curs )
				bAnyRegCurStateRef = true;
		}

		/* Check for various things in eof actions. */
		if ( act->numEofRefs > 0 ) {
			/* Note that there are eof actions. */
			bAnyEofActions = true;

			/* Any Control flow? */
			if ( item->type == InlineItem::Goto || item->type == InlineItem::Call || 
					item->type == InlineItem::Ret || item->type == InlineItem::GotoE ||
					item->type == InlineItem::CallE )
				bAnyEofActionControl = true;

			/* Any holds? */
			if ( item->type == InlineItem::Hold )
				bAnyEofActionHold = true;

			/* References to the current character? */
			if ( item->type == InlineItem::Char || item->type == InlineItem::PChar || 
					item->type == InlineItem::Hold )
				bAnyEofActionCharRef = true;
		}

		if ( item->children != 0 )
			analyzeAction( act, item->children );
	}
}

void FsmCodeGen::analyzeActionList( RedAction *redAct, InlineList *inlineList )
{
	for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
		/* Any next statements in the action table? */
		if ( item->type == InlineItem::Next || item->type == InlineItem::NextE )
			redAct->bAnyNextStmt = true;

		/* Any references to the current state. */
		if ( item->type == InlineItem::Curs )
			redAct->bAnyCurStateRef = true;
		
		if ( item->children != 0 )
			analyzeActionList( redAct, item->children );
	}
}

/* Gather various info on the machine. */
void FsmCodeGen::analyzeMachine()
{
	/* Find the true count of action references.  */
	findFinalActionRefs();

	/* Check if there are any calls in action code. */
	for ( ActionList::Iter act = parseData->actionList; act.lte(); act++ )
		analyzeAction( act, act->inlineList );

	/* Analyze reduced action lists. */
	for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) {
		for ( ActionTable::Iter act = redAct->key; act.lte(); act++ )
			analyzeActionList( redAct, act->value->inlineList );
	}

	/* Find states that have transitions with actions that have next
	 * statements. */
	for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
		/* Check any actions out of outSinge. */
		for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) {
			if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() )
				st->bAnyRegCurStateRef = true;
		}

		/* Check any actions out of outRange. */
		for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
			if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() )
				st->bAnyRegCurStateRef = true;
		}

		/* Check any action out of default. */
		if ( st->defTrans != 0 && st->defTrans->action != 0 && 
				st->defTrans->action->anyCurStateRef() )
			st->bAnyRegCurStateRef = true;
	}

	/* Set up the label needed flag for each state */
	setLabelsNeeded();

	/* Assign ids to actions that are referenced. */
	assignActionIds();

	/* Set the maximums of various values used for deciding types. */
	setValueLimits();

	/* Determine if we should use indicies. */
	calcIndexSize();
}

int FsmCodeGen::arrayTypeSize( unsigned long maxVal )
{
	if ( maxVal <= RL_UCHAR_MAX )
		return 1;
	else if ( maxVal <= RL_USHORT_MAX )
		return 2;
	else 
		return 4;
}

std::ostream &FsmCodeGen::ARRAY_TYPE( unsigned long maxVal )
{
	if ( maxVal <= RL_UCHAR_MAX )
		out << "unsigned char";
	else if ( maxVal <= RL_USHORT_MAX )
		out << "unsigned short";
	else 
		out << "unsigned int";
	return out;
}

std::ostream &FsmCodeGen::C_INTERFACE()
{
	out <<
		"/* Initialize the machine. Invokes any init statement blocks. Returns 0\n"
		" * if the machine begins in a non-accepting state and 1 if the machine\n"
		" * begins in an accepting state. */\n"
		"int "; FSM_NAME() << "_init( struct "; FSM_NAME() << " *fsm );\n"
		"\n"
		"/* Execute the machine on a block of data. Returns -1 if after processing\n"
		" * the data, the machine is in the error state and can never accept, 0 if\n"
		" * the machine is in a non-accepting state and 1 if the machine is in an\n"
		" * accepting state. */\n"
		"int "; FSM_NAME() << "_execute( struct "; FSM_NAME() << " *fsm, ";
				EL_TYPE() << " *data, int len );\n"
		"\n"
		"/* Indicate that there is no more data. Returns -1 if the machine finishes\n"
		" * in the error state and does not accept, 0 if the machine finishes\n"
		" * in any other non-accepting state and 1 if the machine finishes in an\n"
		" * accepting state. */\n"
		"int "; FSM_NAME() << "_finish( struct "; FSM_NAME() << " *fsm );\n";
	return out;
}

std::ostream &FsmCodeGen::C_HEADER()
{
	for ( ContextMap::Iter ctx = parseData->contextMap; ctx.lte(); ctx++ ) {
		out << 
			"#define "; FSM_NAME() << "_ctx_" << ctx->key << "(state) \\\n"
			"		(_"; FSM_NAME() << "_ctxdata_" << ctx->key << "[(state)>>3]&(1<<((state)&7)))\n";
			
		out << "extern unsigned char _"; FSM_NAME() << "_ctxdata_" << ctx->key << "[];\n";
	}
	out << "\n";
	return out;
}


std::ostream &FsmCodeGen::CPP_INTERFACE()
{
	out <<
		"	// Initialize the machine. Invokes any init statement blocks. Returns 0\n"
		"	// if the machine begins in a non-accepting state and 1 if the machine\n"
		"	// begins in an accepting state.\n"
		"	int init( );\n"
		"\n"
		"	// Execute the machine on a block of data. Returns -1 if after processing\n"
		"	// the data, the machine is in the error state and can never accept, 0 if\n"
		"	// the machine is in a non-accepting state and 1 if the machine is in an\n"
		"	// accepting state.\n"
		"	int execute( "; EL_TYPE() << " *data, int len );\n"
		"\n"
		"	// Indicate that there is no more data. Returns -1 if the machine finishes\n"
		"	// in the error state and does not accept, 0 if the machine finishes\n"
		"	// in any other non-accepting state and 1 if the machine finishes in an\n"
		"	// accepting state.\n"
		"	int finish( );\n"
		"\n";

	for ( ContextMap::Iter ctx = parseData->contextMap; ctx.lte(); ctx++ ) {
		out << 
			"	static bool ctx_" << ctx->key << "(int state) \n"
			"		{ return _ctxdata_" << ctx->key << "[state>>3]&(1<<(state&7)); }\n"
			"	static unsigned char _ctxdata_" << ctx->key << "[];\n"
			"\n";
	}
	return out;
}

std::ostream &FsmCodeGen::CPP_HEADER()
{
	return out;
}

std::ostream &FsmCodeGen::OBJC_INTERFACE()
{
	out <<
		"// Initialize the machine. Invokes any init statement blocks. Returns 0\n"
		"// if the machine begins in a non-accepting state and 1 if the machine\n"
		"// begins in an accepting state.\n"
		"- (int) initFsm;\n"
		"\n"
		"// Execute the machine on a block of data. Returns -1 if after processing\n"
		"// the data, the machine is in the error state and can never accept, 0 if\n"
		"// the machine is in a non-accepting state and 1 if the machine is in an\n"
		"// accepting state.\n"
		"- (int) executeWithData:("; EL_TYPE() << " *)data\n len:(int)len;\n"
		"\n"
		"// Indicate that there is no more data. Returns -1 if the machine finishes\n"
		"// in the error state and does not accept, 0 if the machine finishes\n"
		"// in any other non-accepting state and 1 if the machine finishes in an\n"
		"// accepting state.\n"
		"- (int) finish;\n";
	return out;
}

std::ostream &FsmCodeGen::OBJC_HEADER()
{
	for ( ContextMap::Iter ctx = parseData->contextMap; ctx.lte(); ctx++ ) {
		out << 
			"#define "; FSM_NAME() << "_ctx_" << ctx->key << "(state) \\\n"
			"		(_"; FSM_NAME() << "_ctxdata_" << ctx->key << "[(state)>>3]&(1<<((state)&7)))\n";
			
		out << "extern unsigned char _"; FSM_NAME() << "_ctxdata_" << ctx->key << "[];\n";
	}

	out << "\n";
	return out;
}


/* Write out the fsm name. */
std::ostream &FsmCodeGen::FSM_NAME()
{
	out << fsmName;
	return out;
}

std::ostream &FsmCodeGen::BASE_CLAUSE()
{
	return out;
}

std::ostream &FsmCodeGen::USER_INIT( InlineBlock *ilBlock )
{
	out << "{";
	INLINE_LIST( ilBlock->inlineList, 0, false );
	out << "}\n";
	return out;
}

std::ostream &FsmCodeGen::INIT_CODE()
{
	bool anyBlocksWritten = false;

	/* Walk the list of pre funcs, printing the sections. */
	InlineBlock *ilBlock = parseData->initCodeList.head;
	while ( ilBlock != NULL ) {
		/* Remember that we wrote a blcok so that we know to write a line
		 * directive back to the output file. */
		anyBlocksWritten = true;

		/* Write the preprocessor line info for going into the 
		 * source file. */
		out << "#line " << ilBlock->loc.line << " \"";
		LDIR_PATH(parseData->fileName) << "\"\n";

		/* Write the block. */
		out << "{";
		INLINE_LIST( ilBlock->inlineList, 0, false );
		out << "}\n";
		ilBlock = ilBlock->next;
	}

	/* If any blocks were written, then write the directive for going back
	 * into the output file. The line number is for the next line, so add one. */
	if ( anyBlocksWritten ) {
		out << "#line " << outFilter->line + 1 << " \"";
		LDIR_PATH(outputFile) << "\"\n";
	}

	return out;
}

/* Emit the offset of the start state as a decimal integer. */
std::ostream &FsmCodeGen::START_STATE_ID()
{
	out << redFsm->startState->id;
	return out;
};

/* Write out the array of actions. */
std::ostream &FsmCodeGen::ACTIONS_ARRAY()
{
	out << "\t0, ";
	int totalActions = 1;
	for ( ActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
		/* Write out the length, which will never be the last character. */
		out << act->key.length() << ", ";
		/* Put in a line break every 8 */
		if ( totalActions++ % 8 == 7 )
			out << "\n\t";

		for ( ActionTable::Iter item = act->key; item.lte(); item++ ) {
			out << item->value->actionId;
			if ( ! (act.last() && item.last()) )
				out << ", ";

			/* Put in a line break every 8 */
			if ( totalActions++ % 8 == 7 )
				out << "\n\t";
		}
	}
	out << "\n";
	return out;
}


/* Emit the alphabet data type. */
std::ostream &FsmCodeGen::ALPH_TYPE()
{
	switch ( parseData->alphType ) {
	case AT_Char:
		out << "char";
		break;
	case AT_UnsignedChar:
		out << "unsigned char";
		break;
	case AT_Short:
		out << "short";
		break;
	case AT_UnsignedShort:
		out << "unsigned short";
		break;
	case AT_Int:
		out << "int";
		break;
	case AT_UnsignedInt:
		out << "unsigned int";
		break;
	}

	return out;
}

std::ostream &FsmCodeGen::EL_TYPE()
{
	if ( parseData->elementType != 0 ) {
		/* Emit the element type. */
		INLINE_LIST( parseData->elementType, 0, false );
	}
	else {
		/* No element type specified, just using a const alph type. */
		out << "const ";
		ALPH_TYPE();
	}
	return out;
}

std::ostream &FsmCodeGen::GET_KEY()
{
	if ( parseData->getKeyExpr != 0 ) { 
		/* Emit the user supplied method of retrieving the key. */
		out << "(";
		INLINE_LIST( parseData->getKeyExpr, 0, false );
		out << ")";
	}
	else {
		/* Expression for retrieving the key, use simple dereference. */
		out << "(*_p)";
	}
	return out;
}

/* Write out level number of tabs. Makes the nested binary search nice
 * looking. */
std::ostream &FsmCodeGen::TABS( int level )
{
	while ( level-- > 0 )
		out << "\t";
	return out;
}

/* Write out a key from the fsm code gen. Depends on wether or not the key is
 * signed. */
std::ostream &FsmCodeGen::KEY( long key )
{
	if ( parseData->isAlphSigned() )
		out << key;
	else
		out << (unsigned long) key << 'u';
	return out;
}

std::ostream &FsmCodeGen::LM_SWITCH( LongestMatch *longestMatch, 
		int targState, int inFinish )
{
	out << "\tswitch( act ) {\n";

	for ( LongestMatchList::Iter lmi = *longestMatch->longestMatchList; 
			lmi.lte(); lmi++ )
	{
		if ( lmi->inLmSelect && lmi->action != 0 ) {
			/* Write the case label, the action and the case break. */
			out << "\tcase " << lmi->longestMatchId << ":\n";
			ACTION( lmi->action, targState, inFinish );
			out << "\tbreak;\n";
		}
	}

	out << "\t}\n";
	return out;
}

/* Write out an inline tree structure. Walks the list and possibly calls out
 * to virtual functions than handle language specific items in the tree. */
std::ostream &FsmCodeGen::INLINE_LIST( InlineList *inlineList, int targState, bool inFinish )
{
	for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
		switch ( item->type ) {
		case InlineItem::Text:
			out << item->data;
			break;
		case InlineItem::Goto:
			GOTO( item->nameTarg, inFinish );
			break;
		case InlineItem::Call:
			CALL( item->nameTarg, targState, inFinish );
			break;
		case InlineItem::Ret:
			RET( inFinish );
			break;
		case InlineItem::PChar:
			out << "_p";
			break;
		case InlineItem::Char:
			GET_KEY();
			break;
		case InlineItem::Hold:
			out << "_p--;";
			break;
		case InlineItem::Curs:
			CURS( inFinish );
			break;
		case InlineItem::Targs:
			TARGS( inFinish, targState );
			break;
		case InlineItem::Entry:
			GET_ENTRY( item->nameTarg, inFinish );
			break;
		case InlineItem::GotoE:
			GOTOE( item, inFinish );
			break;
		case InlineItem::CallE:
			CALLE( item, targState, inFinish );
			break;
		case InlineItem::Next:
			NEXT( item->nameTarg, inFinish );
			break;
		case InlineItem::NextE:
			NEXTE( item, inFinish );
			break;
		case InlineItem::Exec:
			if ( inFinish )
				out << "{}";
			else {
				/* The parser gives fexec two children. */
				out << "{" << "_p = (";
				INLINE_LIST( item->children->head->children, targState, inFinish );
				out << ")-1; " << "_pe = _p+(";
				INLINE_LIST( item->children->tail->children, targState, inFinish );
				out << ")+1;}";
			}
			break;
		case InlineItem::Buf:
			if ( inFinish ) {
				out << "((";
				EL_TYPE() << "*)0)";
			}
			else
				out << "_data";
			break;
		case InlineItem::BufLen:
			if ( inFinish )
				out << "0";
			else
				out << "_len";
			break;
		case InlineItem::LmSwitch:
			LM_SWITCH( item->longestMatch, targState, inFinish );
			break;
		case InlineItem::LmAct:
			INLINE_LIST( item->longestMatchPart->action->inlineList, targState, inFinish );
			break;
		case InlineItem::Node:
			/* Recurse on the list of children. */
			INLINE_LIST( item->children, targState, inFinish );
			break;
		}
	}
	return out;
}

/* Write out paths in line directives. Escapes any special characters. */
std::ostream &FsmCodeGen::LDIR_PATH( char *path )
{
	for ( char *pc = path; *pc != 0; pc++ ) {
		if ( *pc == '\\' )
			out << "\\\\";
		else
			out << *pc;
	}
	return out;
}

std::ostream &FsmCodeGen::ACTION( Action *action, int targState, bool inFinish )
{
	/* Write the preprocessor line info for going into the source file. */
	out << "#line " << action->loc.line << " \""; LDIR_PATH(parseData->fileName) << "\"\n";

	/* Write the block and close it off. */
	out << "\t{"; INLINE_LIST(action->inlineList, targState, inFinish) << "}\n";
	return out;
}

std::ostream &CCodeGen::SELF()
{
	out << "fsm";
	return out;
}

std::ostream &CppCodeGen::SELF()
{
	out << "this";
	return out;
}

std::ostream &ObjCCodeGen::SELF()
{
	out << "self";
	return out;
}
