/*
 *  Copyright 2001-2004 Adrian Thurston <adriant@ragel.ca>
 */

/*  This file is part of Ragel.
 *
 *  Ragel is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 * 
 *  Ragel is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with Ragel; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 */

#ifndef _PARSEDATA_H
#define _PARSEDATA_H

#include <limits.h>
#include "ptreetypes.h"
#include "astring.h"
#include "avlmap.h"
#include "bstmap.h"
#include "astring.h"
#include "vectsimp.h"
#include "dlist.h"
#include "fsmgraph.h"
#include "compare.h"

/* Forwards. */
struct RedFsmAp;
struct FsmCodeGen;
struct NameInst;

/* The minimum chars are the absolute value of the real minimum because the
 * sign is stored separately in integers read in so they are compared in the
 * positive. Each is casted to an unsigned because the data part of the number
 * is in unsigned int size. 
 */
#define RL_CHAR_MIN    ((long)((char)CHAR_MIN))
#define RL_CHAR_MAX    ((long)((char)CHAR_MAX))
#define RL_UCHAR_MIN   ((unsigned long)((unsigned char)0))
#define RL_UCHAR_MAX   ((unsigned long)((unsigned char)UCHAR_MAX))
#define RL_SHORT_MIN   ((long)((short)SHRT_MIN))
#define RL_SHORT_MAX   ((long)((short)SHRT_MAX))
#define RL_USHORT_MIN  ((unsigned long)((unsigned short)0))
#define RL_USHORT_MAX  ((unsigned long)((unsigned short)USHRT_MAX))
#define RL_INT_MIN     ((long)((int)INT_MIN))
#define RL_INT_MAX     ((long)((int)INT_MAX))
#define RL_UINT_MIN    ((unsigned long)((unsigned int)0))
#define RL_UINT_MAX    ((unsigned long)((unsigned int)UINT_MAX))
#define RL_LONG_MIN    ((long)LONG_MIN)
#define RL_LONG_MAX    ((long)LONG_MAX)
#define RL_ULONG_MIN   ((unsigned long)0)
#define RL_ULONG_MAX   ((unsigned long)LONG_MAX)


/* Types of builtin machines. */
enum BuiltinMachine
{
	BT_Any,
	BT_Ascii,
	BT_Extend,
	BT_Alpha,
	BT_Digit,
	BT_Alnum,
	BT_Lower,
	BT_Upper,
	BT_Cntrl,
	BT_Graph,
	BT_Print,
	BT_Punct,
	BT_Space,
	BT_Xdigit,
	BT_Null,
	BT_Empty
};

/* Location in an input file. */
struct InputLoc
{
	InputLoc( ) 
		: line(0), col(0) { }
	InputLoc( const BISON_YYLTYPE &loc );

	int line;
	int col;
};

/* Reference to a named state. */
typedef Vector<char*> NameRef;
typedef Vector<NameRef*> NameRefList;
typedef Vector<NameInst*> NameTargList;

/* Element in a list of strings used for storing inline code blocks. */
struct InlineBlock
{
	InlineBlock( const InputLoc &loc, InlineList *inlineList, const NameRefList &nameRefs ) 
			: loc(loc), inlineList(inlineList), nameRefs(nameRefs) { }

	InputLoc loc;
	InlineList *inlineList;
	NameRefList nameRefs;
	NameTargList nameTargs;

	InlineBlock *prev, *next;
};

/* List of inline code blocks. */
typedef DList<InlineBlock> InlineBlockList;

/* Nodes in the tree that use this action. */
typedef Vector<NameInst*> ActionRefs;

/* Element in list of actions. Contains the string for the code to exectute. */
struct Action 
:
	public DListEl<Action>,
	public AvlTreeEl<Action>
{
public:
	Action( const InputLoc &loc, char *name, InlineList *inlineList, 
			const NameRefList &nameRefs );
	~Action();

	/* Key for action dictionary. */
	char *getKey() const { return name; }

	/* Data collected during parse. */
	InputLoc loc;
	char *name;
	InlineList *inlineList;
	int actionId;

	/* Places in the input text that reference the action. */
	ActionRefs actionRefs;

	/* Number of references in the final machine. */
	int numTransRefs;
	int numEofRefs;

	/* List of qualified name references and associated targets. */
	NameRefList nameRefs;
	NameTargList nameTargs;
};

/* A list of actions. */
typedef DList<Action> ActionList;
typedef AvlTree<Action, char *, CmpStr> ActionDict;

/* Structure for reverse action mapping. */
struct RevActionMapEl
{
	char *name;
	InputLoc location;
};

/* Store the value and type of a priority augmentation. */
struct PriorityAug
{
	PriorityAug( AugType type, int priorKey, int priorValue ) :
		type(type), priorKey(priorKey), priorValue(priorValue) { }

	AugType type;
	int priorKey;
	int priorValue;
};

/* Structrue represents an action assigned to some FactorWithAug node. The
 * factor with aug will keep an array of these. */
struct ParserAction
{
	ParserAction( AugType type, int localErrKey, Action *action )
		: type(type), localErrKey(localErrKey), action(action) { }

	AugType type;
	int localErrKey;
	Action *action;
};

/* Context statement, kept unique by name. */
struct Context
{
	Context( char *data, int id )
		: data(data), id(id), declared(false) {}

	char *data;
	int id;
	bool declared;

	Context *prev, *next;
};
typedef AvlMap<char *, Context*, CmpStr> ContextMap;
typedef AvlMapEl<char *, Context*> ContextMapEl;
typedef DList<Context> ContextList;

struct ContextEmbed
{
	ContextEmbed( AugType type, int contextId )
		: type(type), contextId(contextId) { }
	
	AugType type;
	int contextId;
};

struct VarDef;
struct Join;
struct Expression;
struct Term;
struct FactorWithAug;
struct FactorWithLabel;
struct FactorWithRep;
struct FactorWithNeg;
struct Factor;
struct Literal;
struct Range;
struct RegExpr;
struct ReItem;
struct ReOrBlock;
struct ReOrItem;

/* Graph dictionary. */
struct GraphDictEl 
:
	public AvlTreeEl<GraphDictEl>,
	public DListEl<GraphDictEl>
{
	GraphDictEl( char *k ) 
		: key(k), value(0), isInstance(false) { }
	GraphDictEl( char *k, VarDef *value ) 
		: key(k), value(value), isInstance(false) { }

	const char *getKey() { return key; }

	char *key;
	VarDef *value;
	bool isInstance;

	/* Location info of graph definition. Points to variable name of assignment. */
	InputLoc loc;
};

typedef AvlTree<GraphDictEl, char*, CmpStr> GraphDict;
typedef DList<GraphDictEl> GraphList;

/* Priority name dictionary. */
typedef AvlMapEl<char*, int> PriorDictEl;
typedef AvlMap<char*, int, CmpStr> PriorDict;

/* Local error name dictionary. */
typedef AvlMapEl<char*, int> LocalErrDictEl;
typedef AvlMap<char*, int, CmpStr> LocalErrDict;

/* Types of alphabet supported by Ragel. */
enum AlphType
{
	AT_Char,
	AT_UnsignedChar,
	AT_Short,
	AT_UnsignedShort,
	AT_Int,
	AT_UnsignedInt
};

/* Tree of instantiated names. */
typedef BstMapEl<char*, NameInst*> NameMapEl;
typedef BstMap<char*, NameInst*, CmpStr> NameMap;
typedef Vector<NameInst*> NameVect;
typedef BstSet<NameInst*> NameSet;

/* Node in the tree of instantiated names. */
struct NameInst
{
	NameInst( const InputLoc &loc, NameInst *parent, char *name, int id, bool isLabel ) 
		: loc(loc), parent(parent), name(name), id(id), isLabel(isLabel), 
		numRefs(0), numUses(0), start(0), final(0) {}

	InputLoc loc;

	/* Keep parent pointers in the name tree to retrieve 
	 * fully qulified names. */
	NameInst *parent;

	char *name;
	int id;
	bool isLabel;

	int numRefs;
	int numUses;

	/* Names underneath us, excludes anonymous names. */
	NameMap children;

	/* All names underneath us in order of appearance. */
	NameVect childVect;

	/* Join scopes need an implicit "final" target. */
	NameInst *start, *final;

	/* During a fsm generation walk, lists the names that are referenced by
	 * epsilon operations in the current scope. After the link is made by the
	 * epsilon reference and the join operation is complete, the label can
	 * have its refcount decremented if there are no more references then the
	 * entry point can be removed from the fsm returned. */
	NameVect referencedNames;

	/* Pointers for the name search queue. */
	NameInst *prev, *next;

	/* Check if this name inst or any name inst below is referenced. */
	bool anyRefsRec();
};

typedef DList<NameInst> NameInstList;

/* Stack frame used in walking the name tree. */
struct NameFrame 
{
	NameInst *prevNameInst;
	int prevNameChild;
	NameInst *prevLocalScope;
};

/* Class to collect information about the machine during the 
 * parse of input. */
struct ParseData
{
	/* Create a new parse data object. This is done at the beginning of every
	 * fsm specification. */
	ParseData( const String &fileName );
	~ParseData();

	/*
	 * Setting up the graph dict.
	 */

	/* Initialize a graph dict with the basic fsms. */
	void initGraphDict();
	void createBuiltin( char *name, BuiltinMachine builtin );

	/* Make a name id in the current name instantiation scope if it is not
	 * already there. */
	NameInst *addNameInst( const InputLoc &loc, char *data, bool isLabel );
	void makeNameTree( GraphDictEl *gdNode );
	void fillNameIndex( NameInst *from );
	void printNameTree();

	/* Increments the usage count on entry names. Names that are no longer
	 * needed will have their entry points unset. */
	void unsetObsoleteEntries( FsmAp *graph );

	/* Resove name references in action code and epsilon transitions. */
	NameSet resolvePart( NameInst *refFrom, char *data, bool recLabelsOnly );
	void resolveFrom( NameSet &result, NameInst *refFrom, 
			const NameRef &nameRef, int namePos );
	NameInst *resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action );
	void resolveNameRefs( InlineList *inlineList, Action *action );
	void resolveActionNameRefs();
	void resolveInitNameRefs();

	/* Make an index of pointers to the action blocks. */
	void fillActionIndex();

	/* Set the alphabet type. If type types are not valid returns false. */
	bool setAlphType( String s1, String s2 );
	bool setAlphType( String s1 );

	/* Dumping the name instantiation tree. */
	void printNameInst( NameInst *nameInst, int level );

	/* Make the graph from a graph dict node. Does minimization. */
	FsmAp *makeInstance( GraphDictEl *gdNode );
	FsmAp *makeSpecific( GraphDictEl *gdNode );
	FsmAp *makeAll();
	FsmAp *makeGraph( GraphDictEl *gdNode );

	/* Resolving machine paths. */
	bool shouldGenerate( GraphDictEl *&genDictEl );

	/* Dumping and printing. */
	void dumpFsm( GraphDictEl *gdNode );
	void dumpFsm();
	void printFsm();

	/* Generate graphviz code. */
	void generateGraphviz( GraphDictEl *gdNode );
	void generateGraphviz();

	/* Generate and write out the fsm. */
	void generateCode( );
	void doGenerateCode( GraphDictEl *gdNode );
	FsmCodeGen *makeCodeGen( RedFsmAp *machine2 );

	/* Set the lower and upper range from the lower and upper number keys. */
	void setLowerUpperRange( );
	void initKeyOps();

	/*
	 * Querying the parse data
	 */

	/* Is the alphabet a signed type? */
	bool isAlphSigned();
	
	/*
	 * Data collected during the parse.
	 */

	/* Dictionary of graphs. Both instances and non-instances go here. */
	GraphDict graphDict;

	/* The list of instances. */
	GraphList instanceList;

	/* Dictionary of actions. Lets actions be defined and then referenced. */
	ActionDict actionDict;

	/* Dictionary of named priorities. */
	PriorDict priorDict;

	/* Dictionary of named local errors. */
	LocalErrDict localErrDict;

	/* List of actions. Will be pasted into a switch statement. */
	ActionList actionList;
	Action **actionIndex;

	/* List of sections of data to add to the fsm struct. */
	InlineBlockList dataList;
	char *baseClause;

	/* The id of the next priority name and label. */
	int nextPriorKey, nextLocalErrKey, nextNameId;
	
	/* The default priority number key for a machine. This is active during
	 * the parse of the rhs of a machine assignment. */
	int curDefPriorKey;

	int curDefLocalErrKey;

	/* If a machine is given this is set. */
	bool machineGiven;

	/* Code sections put in the fsms init routine. */
	InlineBlockList initCodeList;

	/* Alphabet type. */
	AlphType alphType;

	/* Element type and get key expression. */
	InlineList *elementType;
	InlineList *getKeyExpr;

	/* The set of contexts. A context goes in the context list if it is
	 * declared. */
	ContextMap contextMap;
	ContextList contextList;
	int nextContextId;

	/* The alphabet range. */
	String lowerNum, upperNum;
	long lowKey, highKey;
	InputLoc rangeLowLoc, rangeHighLoc;

	/* Key operators. */
	KeyOps keyOps;

	/* The name and location of the fsm. */
	String fsmName;
	InputLoc fsmStartSecLoc;
	InputLoc fsmEndSecLoc;

	/* The name of the file the fsm is from. */
	String fileName;

	/* Number of errors encountered parsing the fsm spec. */
	int errorCount;

	/* Counting the action and priority ordering. */
	int curActionOrd;
	int curPriorOrd;

	/* Root of the name tree. */
	NameInst *rootName;
	NameInst *curNameInst;
	int curNameChild;

	/* Root of the name tree used for doing local name searches. */
	NameInst *localNameScope;

	void initNameWalk();
	NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; }
	NameFrame enterNameScope( bool isLocal, int numScopes );
	void popNameScope( const NameFrame &frame );
	void resetNameScope( const NameFrame &frame );

	/* Make name ids to name inst pointers. */
	NameInst **nameIndex;
};

void afterOpMinimize( FsmAp *fsm );
long makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd );
long makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd );
long makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd );
long makeFsmKeyChar( char c, ParseData *pd );
void makeFsmKeyArray( long *result, char *data, int len, ParseData *pd );
int makeFsmUniqueKeyArray( long *result, char *data, int len, ParseData *pd );
FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd );

void errorStateLabels( const NameSet &locations );

#endif /* _PARSEDATA_H */
