/*
 *  Copyright 2001-2005 Adrian Thurston <thurston@cs.queensu.ca>
 */

/*  This file is part of Ragel.
 *
 *  Ragel is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 * 
 *  Ragel is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with Ragel; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 */

#ifndef _PARSEDATA_H
#define _PARSEDATA_H

#include <limits.h>
#include "avlmap.h"
#include "bstmap.h"
#include "astring.h"
#include "vectsimp.h"
#include "dlist.h"
#include "fsmgraph.h"
#include "compare.h"
#include "vector.h"
#include "rlparse.h"

/* Forwards. */
struct NameInst;
using std::ostream;

/* The minimum chars are the absolute value of the real minimum because the
 * sign is stored separately in integers read in so they are compared in the
 * positive. Each is casted to an unsigned because the data part of the number
 * is in unsigned int size. 
 */
#define RL_CHAR_MIN    ((long)((char)CHAR_MIN))
#define RL_CHAR_MAX    ((long)((char)CHAR_MAX))
#define RL_UCHAR_MIN   ((unsigned long)((unsigned char)0))
#define RL_UCHAR_MAX   ((unsigned long)((unsigned char)UCHAR_MAX))
#define RL_SHORT_MIN   ((long)((short)SHRT_MIN))
#define RL_SHORT_MAX   ((long)((short)SHRT_MAX))
#define RL_USHORT_MIN  ((unsigned long)((unsigned short)0))
#define RL_USHORT_MAX  ((unsigned long)((unsigned short)USHRT_MAX))
#define RL_INT_MIN     ((long)((int)INT_MIN))
#define RL_INT_MAX     ((long)((int)INT_MAX))
#define RL_UINT_MIN    ((unsigned long)((unsigned int)0))
#define RL_UINT_MAX    ((unsigned long)((unsigned int)UINT_MAX))
#define RL_LONG_MIN    ((long)LONG_MIN)
#define RL_LONG_MAX    ((long)LONG_MAX)
#define RL_ULONG_MIN   ((unsigned long)0)
#define RL_ULONG_MAX   ((unsigned long)LONG_MAX)


/* Types of builtin machines. */
enum BuiltinMachine
{
	BT_Any,
	BT_Ascii,
	BT_Extend,
	BT_Alpha,
	BT_Digit,
	BT_Alnum,
	BT_Lower,
	BT_Upper,
	BT_Cntrl,
	BT_Graph,
	BT_Print,
	BT_Punct,
	BT_Space,
	BT_Xdigit,
	BT_Lambda,
	BT_Empty
};

struct BISON_YYLTYPE;

/* Location in an input file. */
struct InputLoc
{
	InputLoc( ) 
		: line(0), col(0) { }
	InputLoc( int line, int col ) 
		: line(line), col(col) { }
	InputLoc( const BISON_YYLTYPE &loc );

	int line;
	int col;
};

/* Reference to a named state. */
typedef Vector<char*> NameRef;
typedef Vector<NameRef*> NameRefList;
typedef Vector<NameInst*> NameTargList;

/* Nodes in the tree that use this action. */
typedef Vector<NameInst*> ActionRefs;

/* Element in list of actions. Contains the string for the code to exectute. */
struct Action 
:
	public DListEl<Action>,
	public AvlTreeEl<Action>
{
public:

	Action( const InputLoc &loc, char *name, 
			InlineList *inlineList, const NameRefList &nameRefs )
	:
		loc(loc),
		name(name),
		inlineList(inlineList), 
		actionId(-1),
		numTransRefs(0),
		numToStateRefs(0),
		numFromStateRefs(0),
		numEofRefs(0),
		isLmAction(false),
		nameRefs(nameRefs)
	{
	}

	/* Key for action dictionary. */
	char *getKey() const { return name; }

	/* Data collected during parse. */
	InputLoc loc;
	char *name;
	InlineList *inlineList;
	int actionId;

	/* Places in the input text that reference the action. */
	ActionRefs actionRefs;

	/* Number of references in the final machine. */
	bool numRefs() 
		{ return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; }
	int numTransRefs;
	int numToStateRefs;
	int numFromStateRefs;
	int numEofRefs;

	bool isLmAction;

	/* List of qualified name references and associated targets. */
	NameRefList nameRefs;
	NameTargList nameTargs;
};

/* A list of actions. */
typedef DList<Action> ActionList;
typedef AvlTree<Action, char *, CmpStr> ActionDict;

/* Structure for reverse action mapping. */
struct RevActionMapEl
{
	char *name;
	InputLoc location;
};

/* Store the value and type of a priority augmentation. */
struct PriorityAug
{
	PriorityAug( AugType type, int priorKey, int priorValue ) :
		type(type), priorKey(priorKey), priorValue(priorValue) { }

	AugType type;
	int priorKey;
	int priorValue;
};

/* Structrue represents an action assigned to some FactorWithAug node. The
 * factor with aug will keep an array of these. */
struct ParserAction
{
	ParserAction( AugType type, int localErrKey, Action *action )
		: type(type), localErrKey(localErrKey), action(action) { }

	AugType type;
	int localErrKey;
	Action *action;
};

/* Context statement, kept unique by name. */
struct Context
{
	Context( char *data, int id )
		: data(data), id(id), declared(false) {}

	char *data;
	int id;
	bool declared;

	Context *prev, *next;
};
typedef AvlMap<char *, Context*, CmpStr> ContextMap;
typedef AvlMapEl<char *, Context*> ContextMapEl;
typedef DList<Context> ContextList;

struct ContextEmbed
{
	ContextEmbed( AugType type, int contextId )
		: type(type), contextId(contextId) { }
	
	AugType type;
	int contextId;
};

struct VarDef;
struct Join;
struct Expression;
struct Term;
struct FactorWithAug;
struct FactorWithLabel;
struct FactorWithRep;
struct FactorWithNeg;
struct Factor;
struct Literal;
struct Range;
struct RegExpr;
struct ReItem;
struct ReOrBlock;
struct ReOrItem;
struct LongestMatch;
typedef DList<LongestMatch> LmList;

/* Graph dictionary. */
struct GraphDictEl 
:
	public AvlTreeEl<GraphDictEl>,
	public DListEl<GraphDictEl>
{
	GraphDictEl( char *k ) 
		: key(k), value(0), isInstance(false) { }
	GraphDictEl( char *k, VarDef *value ) 
		: key(k), value(value), isInstance(false) { }

	const char *getKey() { return key; }

	char *key;
	VarDef *value;
	bool isInstance;

	/* Location info of graph definition. Points to variable name of assignment. */
	InputLoc loc;
};

typedef AvlTree<GraphDictEl, char*, CmpStr> GraphDict;
typedef DList<GraphDictEl> GraphList;

/* Priority name dictionary. */
typedef AvlMapEl<char*, int> PriorDictEl;
typedef AvlMap<char*, int, CmpStr> PriorDict;

/* Local error name dictionary. */
typedef AvlMapEl<char*, int> LocalErrDictEl;
typedef AvlMap<char*, int, CmpStr> LocalErrDict;

/* Types of alphabet supported by Ragel. */
enum AlphType
{
	AT_Char,
	AT_UnsignedChar,
	AT_Short,
	AT_UnsignedShort,
	AT_Int,
	AT_UnsignedInt
};

/* Tree of instantiated names. */
typedef BstMapEl<char*, NameInst*> NameMapEl;
typedef BstMap<char*, NameInst*, CmpStr> NameMap;
typedef Vector<NameInst*> NameVect;
typedef BstSet<NameInst*> NameSet;

/* Node in the tree of instantiated names. */
struct NameInst
{
	NameInst( const InputLoc &loc, NameInst *parent, char *name, int id, bool isLabel ) : 
		loc(loc), parent(parent), name(name), id(id), isLabel(isLabel), 
		numRefs(0), numUses(0), start(0), final(0) {}

	InputLoc loc;

	/* Keep parent pointers in the name tree to retrieve 
	 * fully qulified names. */
	NameInst *parent;

	char *name;
	int id;
	bool isLabel;

	int numRefs;
	int numUses;

	/* Names underneath us, excludes anonymous names. */
	NameMap children;

	/* All names underneath us in order of appearance. */
	NameVect childVect;

	/* Join scopes need an implicit "final" target. */
	NameInst *start, *final;

	/* During a fsm generation walk, lists the names that are referenced by
	 * epsilon operations in the current scope. After the link is made by the
	 * epsilon reference and the join operation is complete, the label can
	 * have its refcount decremented. Once there are no more references the
	 * entry point can be removed from the fsm returned. */
	NameVect referencedNames;

	/* Pointers for the name search queue. */
	NameInst *prev, *next;

	/* Check if this name inst or any name inst below is referenced. */
	bool anyRefsRec();
};

typedef DList<NameInst> NameInstList;

/* Stack frame used in walking the name tree. */
struct NameFrame 
{
	NameInst *prevNameInst;
	int prevNameChild;
	NameInst *prevLocalScope;
};

/* Class to collect information about the machine during the 
 * parse of input. */
struct ParseData
{
	/* Create a new parse data object. This is done at the beginning of every
	 * fsm specification. */
	ParseData( char *fileName, char *fsmName, const InputLoc &sectionLoc );
	~ParseData();

	/*
	 * Setting up the graph dict.
	 */

	/* Initialize a graph dict with the basic fsms. */
	void initGraphDict();
	void createBuiltin( char *name, BuiltinMachine builtin );

	/* Make a name id in the current name instantiation scope if it is not
	 * already there. */
	NameInst *addNameInst( const InputLoc &loc, char *data, bool isLabel );
	void makeNameTree( GraphDictEl *gdNode );
	void fillNameIndex( NameInst *from );
	void printNameTree();

	/* Increments the usage count on entry names. Names that are no longer
	 * needed will have their entry points unset. */
	void unsetObsoleteEntries( FsmAp *graph );

	/* Resove name references in action code and epsilon transitions. */
	NameSet resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly );
	void resolveFrom( NameSet &result, NameInst *refFrom, 
			const NameRef &nameRef, int namePos );
	NameInst *resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action );
	void resolveNameRefs( InlineList *inlineList, Action *action );
	void resolveActionNameRefs();

	/* Set the alphabet type. If type types are not valid returns false. */
	bool setAlphType( char *s1, char *s2 );
	bool setAlphType( char *s1 );

	/* Unique actions. */
	void removeDups( ActionTable &actionTable );
	void removeActionDups( FsmAp *graph );

	/* Dumping the name instantiation tree. */
	void printNameInst( NameInst *nameInst, int level );

	/* Make the graph from a graph dict node. Does minimization. */
	FsmAp *makeInstance( GraphDictEl *gdNode );
	FsmAp *makeSpecific( GraphDictEl *gdNode );
	FsmAp *makeAll();

	/* Checking the contents of actions. */
	void ParseData::checkInlineList( Action *act, InlineList *inlineList );
	void ParseData::checkActionCodeContents();

	void analyzeGraph( FsmAp *graph );
	void prepareMachineGen( GraphDictEl *graphDictEl );
	void generateXML( ostream &out );
	FsmAp *sectionGraph;
	bool generatingSectionSubset;

	/* Set the lower and upper range from the lower and upper number keys. */
	void setLowerUpperRange( );
	void initKeyOps();

	/*
	 * Querying the parse data
	 */

	/* Is the alphabet a signed type? */
	bool isAlphSigned();
	
	/*
	 * Data collected during the parse.
	 */

	/* Dictionary of graphs. Both instances and non-instances go here. */
	GraphDict graphDict;

	/* The list of instances. */
	GraphList instanceList;

	/* Dictionary of actions. Lets actions be defined and then referenced. */
	ActionDict actionDict;

	/* Dictionary of named priorities. */
	PriorDict priorDict;

	/* Dictionary of named local errors. */
	LocalErrDict localErrDict;

	/* List of actions. Will be pasted into a switch statement. */
	ActionList actionList;

	/* The id of the next priority name and label. */
	int nextPriorKey, nextLocalErrKey, nextNameId;
	
	/* The default priority number key for a machine. This is active during
	 * the parse of the rhs of a machine assignment. */
	int curDefPriorKey;

	int curDefLocalErrKey;

	/* Alphabet type. */
	AlphType alphType;
	bool alphTypeSet;

	/* Element type and get key expression. */
	InlineList *getKeyExpr;
	InlineList *accessExpr;
	InlineList *curStateExpr;

	/* The set of contexts. A context goes in the context list if it is
	 * declared. */
	ContextMap contextMap;
	ContextList contextList;
	int nextContextId;

	/* The alphabet range. */
	char *lowerNum, *upperNum;
	long lowKey, highKey;
	InputLoc rangeLowLoc, rangeHighLoc;

	/* Key operators. */
	KeyOps keyOps;

	/* The name of the file the fsm is from, and the spec name. */
	char *fileName;
	char *fsmName;
	InputLoc sectionLoc;

	/* Number of errors encountered parsing the fsm spec. */
	int errorCount;

	/* Counting the action and priority ordering. */
	int curActionOrd;
	int curPriorOrd;

	/* Root of the name tree. */
	NameInst *rootName;
	NameInst *curNameInst;
	int curNameChild;

	/* The place where resolved epsilon transitions go. These cannot go into
	 * the parse tree because a single epsilon op can resolve more than once
	 * to different nameInsts if the machine it's in is used more than once. */
	NameVect epsilonResolvedLinks;
	int nextEpsilonResolvedLink;

	/* Root of the name tree used for doing local name searches. */
	NameInst *localNameScope;

	bool hasCall( InlineList *inlineList );
	void setLmInRetLoc( InlineList *inlineList );
	void initLongestMatchData();
	void setLongestMatchData( FsmAp *graph );
	void initNameWalk();
	NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; }
	NameFrame enterNameScope( bool isLocal, int numScopes );
	void popNameScope( const NameFrame &frame );
	void resetNameScope( const NameFrame &frame );

	/* Make name ids to name inst pointers. */
	NameInst **nameIndex;

	/* Counter for assigning ids to longest match items. */
	int nextLongestMatchId;
	bool lmRequiresErrorState;

	/* List of all longest match parse tree items. */
	LmList lmList;

	Action *newAction( char *name, InlineList *inlineList );

	Action *initTokStart;
	int initTokStartOrd;

	Action *setTokStart;
	int setTokStartOrd;

	Action *initActId;
	int initActIdOrd;

	Action *setTokEnd;
	int setTokEndOrd;
};

typedef AvlMap<char*, ParseData *, CmpStr> SectionMap;
typedef AvlMapEl<char*, ParseData *> SectionMapEl;
extern SectionMap sectionMap;

void afterOpMinimize( FsmAp *fsm );
long makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd );
long makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd );
long makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd );
long makeFsmKeyChar( char c, ParseData *pd );
void makeFsmKeyArray( long *result, char *data, int len, ParseData *pd );
int makeFsmUniqueKeyArray( long *result, char *data, int len, ParseData *pd );
FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd );

void errorStateLabels( const NameSet &locations );

/* Data used by the parser specific to the current file. Supports the include
 * system, since a new parser is executed for each included file. */
struct InputData
{
	InputData( char *fileName, char *includeSpec, char *includeTo ) :
		pd(0), sectionName(0), defaultParseData(0), 
		first_line(1), first_column(1), 
		last_line(1), last_column(0), 
		fileName(fileName), includeSpec(includeSpec), 
		includeTo(includeTo), active(true)
		{}

	/* For collecting a name references. */
	NameRef nameRef;
	NameRefList nameRefList;

	/* The parse data. For each fsm spec, the parser collects things that it parses
	 * in data structures in here. */
	ParseData *pd;

	char *sectionName;
	ParseData *defaultParseData;

	int first_line;
	int first_column;
	int last_line;
	int last_column;

	char *fileName;

	/* If this is an included file, this contains the specification to search
	 * for. IncludeTo will contain the spec name that does the includng. */
	char *includeSpec;
	char *includeTo;

	bool active;
	InputLoc sectionLoc;
};

#endif /* _PARSEDATA_H */
