/* $Id: FAUhdlScanner.l 4504 2009-04-27 09:08:58Z potyra $ */
/* vim:tabstop=8:shiftwidth=8:textwidth=72:encoding=utf8:
 * VHDL scanner.
 *
 * Copyright (C) 2007-2009 FAUmachine Team <info@faumachine.org>.
 * This program is free software. You can redistribute it and/or modify it
 * under the terms of the GNU General Public License, either version 2 of
 * the License, or (at your option) any later version. See COPYING.
 */

%option c++
%option outfile="FAUhdlScanner.cpp"
%option batch
%option case-insensitive
%option stack
%option noyywrap

/* start conditions */

/* the next token starts an association list */
%x ASSOC_LIST
/* the token is part of a formal_part */
%s FORMAL
/* the token is inside braces in a formal_part */
%s NESTED_FORMAL
/* the token is part of an actual part */
%s ACTUAL
/* the token is inside braces in an actual_part */
%s ACTUAL_NESTED
/* handle tokens in an identifier list followed by a colon. All of these
   identifieres are declarations. (except specifications, but these can
   and should be looked up afterwards, since more restrictions apply
   there) */
%x ID_LIST
/* check if the next tokens from an association_list,
   must be called with yy_push_state.
   Doesn't consume characters.
   Sets this->isAssociation as a result and returns to the 
   previous state. */
%x AL_SCANNER
/* check if the next token form a formal_part. 
   set state to FORMAL_PART or ACTUAL_PART as result.
   Doesn't consume characters. */
%x FORMAL_SCANNER 

%top{
#include <cstdio>
#include <string>
#include <stdexcept>
/* ParserDriver.hpp needs to be included before the scanner header */
#include "ParserDriver.hpp"
}

%{
#include "FAUhdlScanner.hpp"
#include "frontend/reporting/ErrorRegistry.hpp"

typedef yy::FAUhdlParser::token token;

#define DEBUG_SCANNER 0
%}

ws		[\t\r ]
upper 		[A-Z]
lower		[a-z]
digit 		[0-9]
extended_digit	{digit}|[a-fA-F]
nl		[\n]
extupper 	[ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ]
extlower 	[ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ]
other_special	[!$%@?\\^`{}~¡¢£¤¥§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿­×÷¦]
space_character [ \t]
format_effector	[\t\v\r\f]
special_no_qm	[#&'()*+,-/:;<=>\[\]_|]
special		{special_no_qm}|\"
lowercase 	{lower}|{extlower}
uppercase 	{upper}|{extupper}
letter 		{uppercase}|{lowercase}
letter_or_digit	{letter}|{digit}
basic_id	{letter}+(_?{letter_or_digit})*

number		{digit}(_?{digit})*
extended_number	{extended_digit}(_?{extended_digit})*

pos_exponent	[eE]\+?{number}
neg_exponent	[eE]-{number}
exponent	{pos_exponent}|{neg_exponent}

integer10	{number}({pos_exponent})?
real10		{number}\.{number}({exponent})?
based_integer	{number}\#{extended_number}\#({pos_exponent})?
based_real	{number}\#{extended_number}\.{extended_number}\#{exponent}?

basic_graphic1  {uppercase}|{digit}|{special_no_qm}|{space_character}
graphic1	{basic_graphic1}|{lowercase}|{other_special}

basic_graphic   {uppercase}|{digit}|{special}|{space_character}
graphic		{basic_graphic}|{lowercase}|{other_special}

esc_char	\"\"
string_seq	{graphic1}|{esc_char}

char_lit	'{graphic}'
string_lit	\"{string_seq}*\"
bit_string	[bBoOxX]\"{extended_number}\"

%{
	/* lookahead expressions for identifier lists followd by a 
	   colon. These are always declarations and mustn't get 
	   looked up in the symbol table.
	*/
%}
comment		\-\-[^\v\r\n\f]*[\n]
separator	{ws}|{format_effector}|{nl}|{comment}

id_list		({basic_id}{separator}*\,{separator}*)*{basic_id}
ids_with_col	{id_list}{separator}*\:[^=]

%{
	/* lookahead expressions for association_list. Since there
	   cannot exist a regular expression to correctly match an
	   association_list, {association} is a heuristic which 
	   must detect all *possible* following association lists, 
	   and may detect several non-association-lists as well.
	   It servers the purpose to start the association list 
	   parser (modelled as state AL_PARSER) to verify the result.
	 */
%}
assoc_part	{comment}|{separator}|{string_lit}|{char_lit}|[^;"']
association	\({assoc_part}*=>{assoc_part}*;

%%

<AL_SCANNER>{
{ws}+		{ this->backup += ' '; }
{nl} 		|
{comment}	{ /* ingore comment, only register newline */
		  this->backup += '\n';
		}
{char_lit}	|
{string_lit}	|
{bit_string}	{
		  this->backup += yytext;
		}
\(		{
		  this->bracesCtr++;
		  this->backup += '(';
		}
\)		{
		  this->bracesCtr--;
		  this->backup += ')';
		  if (this->bracesCtr <= 0) {
		  	this->isAssociation &= (this->bracesCtr == 0);
		  	this->putBack();

#if DEBUG_SCANNER
			if (this->isAssociation) {
				std::cerr << "association_list <"
					<< this->backup << '>' << std::endl;
			}
#endif
			yy_pop_state();
		  } 
		  /* nested braces */
		}
" when "	|
" is "		|
\;		{
		  this->backup += yytext;
		  this->putBack();
		  yy_pop_state();
		  this->isAssociation = false;
		}
"=>"		{
		  this->backup += "=>";
		  if (this->bracesCtr == 1) {
		  	this->isAssociation = true;
		  }
		}
.		{ this->backup += yytext; }
}

<FORMAL_SCANNER>{
{ws}+		{ this->backup += ' '; }
{nl} 		|
{comment}	{ /* ingore comment, only register newline */
		  this->backup += '\n';
		}
{char_lit}	|
{string_lit}	|
\|		|
{bit_string}	{
		  this->backup += yytext;
		}
\(		{
		  this->bracesCtr++;
		  this->backup += '(';
		}
\)		{
		  this->bracesCtr--;
		  this->backup += ')';
		  if (this->bracesCtr < 0) {
		  	this->putBack();
			driver.lookupIdentifiers = true;
#if DEBUG_SCANNER
			std::cerr << "not a formal: )" << std::endl;
#endif
			BEGIN(ACTUAL);
		  }
		  /* fall through: nested braces */
		}
"=>"		{
		  this->backup += "=>";
		  if (this->bracesCtr == 0) {
		  	this->putBack();
			driver.lookupIdentifiers = false;
#if DEBUG_SCANNER
			std::cerr << "formal_part <" << this->backup <<
				'>' << std::endl;
#endif
			BEGIN(FORMAL);
		  }
		}
\,		{
		  this->backup += ',';
		  if (this->bracesCtr == 0) {
		  	this->putBack();
			driver.lookupIdentifiers = true;
#if DEBUG_SCANNER
			std::cerr << "not a formal: ," << std::endl;
#endif
			BEGIN(ACTUAL);
		  }
		} 
.		{ this->backup += yytext; }

}

{ws}		{}
{association} { 
		  if (this->scannedForAssociation) {
		  	this->scannedForAssociation = false;
		  	if (this->isAssociation) {
				yy_push_state(ASSOC_LIST);
				yyless(0);
			} else {
				REJECT;
			}
		  } else {
#if DEBUG_SCANNER
			  std::cerr << '<' << yytext << '>' << std::endl;
			  std::cerr << "scanning for association_list..." 
			  	<< std::endl;
#endif
			  this->reset();
			  this->scannedForAssociation = true;
			  yy_push_state(AL_SCANNER);
			  yyless(0);
		  }
		}
<ASSOC_LIST>\(	{
#if DEBUG_SCANNER
		  std::cerr << "probably a formal_part, scanning..." 
		  	<< std::endl;
#endif
		  this->reset();
		  yy_pop_state();
		  yy_push_state(FORMAL_SCANNER);
		  return token::t_LeftParen;
		}
<FORMAL>\(	{
#if DEBUG_SCANNER
		  std::cerr << "formal: ( switching to nested formal" 
		  	<< std::endl;
#endif
		  yy_push_state(NESTED_FORMAL);
#if DEBUG_SCANNER
		  std::cerr << "FORMAl( push" << std::endl;
#endif
		  driver.lookupIdentifiers = true;
		  return token::t_LeftParen;
		}
<NESTED_FORMAL>\( {
#if DEBUG_SCANNER
		  std::cerr << "NESTED_FORMAl( push" << std::endl;
#endif
		  yy_push_state(NESTED_FORMAL);
		  return token::t_LeftParen;
		}
<NESTED_FORMAL>\) {
#if DEBUG_SCANNER
		  std::cerr << "NESTED_fORMAL) pop" << std::endl;
#endif
		  if (yy_top_state() == FORMAL) {
		  	driver.lookupIdentifiers = false;
		  }
		  yy_pop_state();
		  return token::t_RightParen;
		}
<FORMAL>"=>"	{
#if DEBUG_SCANNER
		  std::cerr << "formal t_Arrow, entering actual" 
		  	<< std::endl;
#endif
		  BEGIN(ACTUAL);
		  driver.lookupIdentifiers = true;
		  return token::t_Arrow;
		}
<ACTUAL>\(	{
#if DEBUG_SCANNER
		  std::cerr << "ACTUAl( push" << std::endl;
#endif
		  yy_push_state(ACTUAL_NESTED);
		  return token::t_LeftParen;
		} 
<ACTUAL>\)	{ 
#if DEBUG_SCANNER
		  std::cerr << "actual ). pop." << std::endl;
#endif
		  yy_pop_state();
		  return token::t_RightParen;
		}
<ACTUAL>\,	{
#if DEBUG_SCANNER
		  std::cerr << "actual ',' scanning for formal_part."
		  	<< std::endl;
#endif
		  this->reset();
		  BEGIN(FORMAL_SCANNER);
		  return token::t_Comma;
		}
<ACTUAL_NESTED>\( {
#if DEBUG_SCANNER
		  std::cerr << "ACTUAL_nESTED( push" << std::endl;
#endif
		  yy_push_state(ACTUAL_NESTED);
		  return token::t_LeftParen;
		}
<ACTUAL_NESTED>\) {
		  yy_pop_state();
#if DEBUG_SCANNER
		  std::cerr << "ACTUAL_nESTED) pop" << std::endl;
#endif
		  return token::t_RightParen;
		}
\&		{ return token::t_Ampersand; }
\'		{ return token::t_Apostrophe; }
\(		{
#if DEBUG_SCANNER
		  std::cerr << "<INITIAL>(" << std::endl;
#endif
		  return token::t_LeftParen; 
		}
\)		{ return token::t_RightParen; }
"**"		{ return token::t_DoubleStar; }
\*		{ return token::t_Star; }
\+		{ return token::t_Plus; }
\,		{ return token::t_Comma; }
\-		{ return token::t_Minus; }
":="		{ return token::t_VarAsgn; }
\:		{ return token::t_Colon; }
\;		{ return token::t_Semicolon; }
"<="		{ return token::t_LESym; }
">="		{ return token::t_GESym; }
\<		{ return token::t_LTSym; }
\>		{ return token::t_GTSym; }
=		{ return token::t_EQSym; }
\/=		{ return token::t_NESym; }
"=>"		{ return token::t_Arrow; }
"<>"		{ return token::t_Box; }
\|		{ return token::t_Bar; }
!		{ return token::t_Bar; }
\.		{ return token::t_Dot; }
\/		{ return token::t_Slash; }

abs		{ return token::t_ABS; }
access		{ return token::t_ACCESS; }
after		{ return token::t_AFTER; }
alias		{ return token::t_ALIAS;}
all		{ return token::t_ALL; }
and		{ return token::t_AND; }
architecture	{ return token::t_ARCHITECTURE; }
array		{ return token::t_ARRAY; }
assert		{ return token::t_ASSERT; }
attribute	{ return token::t_ATTRIBUTE; }
begin		{ return token::t_BEGIN; }
block		{ return token::t_BLOCK; }
body		{ return token::t_BODY; }
buffer		{ return token::t_BUFFER; }
bus		{ return token::t_BUS; }
case		{ return token::t_CASE; }
component	{ return token::t_COMPONENT; }
configuration	{ return token::t_CONFIGURATION; }
constant	{ return token::t_CONSTANT; }
disconnect	{ return token::t_DISCONNECT; }
downto		{ return token::t_DOWNTO; }
else		{ return token::t_ELSE; }
elsif		{ return token::t_ELSIF; }
end		{ return token::t_END; }
entity		{ return token::t_ENTITY; }
exit		{ return token::t_EXIT; }
file		{ return token::t_FILE; }
for		{ return token::t_FOR; }
function	{ return token::t_FUNCTION; }
generate	{ return token::t_GENERATE; }
generic		{ return token::t_GENERIC; }
group		{ return token::t_GROUP; }
guarded		{ return token::t_GUARDED; }
if		{ return token::t_IF; }
impure		{ return token::t_IMPURE; }
in		{ return token::t_IN; }
inertial	{ return token::t_INERTIAL; }
inout		{ return token::t_INOUT; }
is		{ return token::t_IS; }
label		{ return token::t_LABEL; }
library		{ return token::t_LIBRARY; }
linkage		{ return token::t_LINKAGE; }
literal		{ return token::t_LITERAL; }
loop		{ return token::t_LOOP; }
map		{ return token::t_MAP; }
mod		{ return token::t_MOD; }
nand		{ return token::t_NAND; }
new		{ return token::t_NEW; }
next		{ return token::t_NEXT; }
nor		{ return token::t_NOR; }
not		{ return token::t_NOT; }
null		{ return token::t_NULL; }
of		{ return token::t_OF; }
on		{ return token::t_ON; }
open		{ return token::t_OPEN; }
or		{ return token::t_OR; }
others		{ return token::t_OTHERS; }
out		{ return token::t_OUT; }
package		{ return token::t_PACKAGE; }
port		{ return token::t_PORT; }
postponed	{ return token::t_POSTPONED; }
procedure	{ return token::t_PROCEDURE; }
process		{ return token::t_PROCESS; }
pure		{ return token::t_PURE; }
range		{ return token::t_RANGE; }
record		{ return token::t_RECORD; }
register	{ return token::t_REGISTER; }
reject		{ return token::t_REJECT; }
rem		{ return token::t_REM; }
report		{ return token::t_REPORT; }
return		{ return token::t_RETURN; }
rol		{ return token::t_ROL; }
ror		{ return token::t_ROR; }
select		{ return token::t_SELECT; }
severity	{ return token::t_SEVERITY; }
signal		{ return token::t_SIGNAL; }
sla		{ return token::t_SLA; }
sll		{ return token::t_SLL; }
sra		{ return token::t_SRA; }
srl		{ return token::t_SRL; }
shared		{ return token::t_SHARED; }
subtype		{ return token::t_SUBTYPE; }
then		{ return token::t_THEN; }
to		{ return token::t_TO; }
transport	{ return token::t_TRANSPORT; }
type		{ return token::t_TYPE; }
unaffected	{ return token::t_UNAFFECTED; }
units		{ return token::t_UNITS; }
until		{ return token::t_UNTIL; }
use		{ return token::t_USE; }
variable	{ return token::t_VARIABLE; }
wait		{ return token::t_WAIT; }
when		{ return token::t_WHEN; }
while		{ return token::t_WHILE; }
with		{ return token::t_WITH; }
xnor		{ return token::t_XNOR; }
xor		{ return token::t_XOR; }
{based_real}	{ yylval->r = ParserDriver::makeBased<double>(
						std::string(yytext));
		  return token::t_REAL;
		}
{based_integer} { yylval->i = 
			ParserDriver::makeBased<long>(std::string(yytext));
		  return token::t_INTEGER;
		}

{real10}	{ yylval->r = 
			ParserDriver::makeBase10<double>(std::string(yytext));
		  return token::t_REAL;
		}
{integer10}	{ yylval->i = 
			ParserDriver::makeBase10<long>(std::string(yytext));
		  return token::t_INTEGER; 
		}

{char_lit}	{ yylval->s = new std::string(yytext);
		  return token::t_CHAR;
		}

{string_lit}	{ yylval->s = 
			ParserDriver::removeQuotes(std::string(yytext));
		  return token::t_STRING;
		}

{bit_string} 	{ yylval->s = 
			ParserDriver::makeBitString(std::string(yytext));
		  return token::t_STRING;
		}
{ids_with_col}	{
#if DEBUG_SCANNER
			std::cerr << "ids_with_col, push" << std::endl;
#endif
			yy_push_state(ID_LIST);
			driver.lookupIdentifiers = false;
			/* FIXME use yyless(0); instead of reject! */
			REJECT;
		}
{basic_id}	{ 
		  return driver.getTokenForId(
		  			yytext,
					yylval->identifier, 
					*yylloc);
		}
{nl} 		{
		  /* advance location */
		  yylloc->lines(); 
		  yylloc->step();
		}

{comment}	{ /* skip comment */
			yylloc->lines();
			yylloc->step();
		}

.		{ /* final catch all rule, report error */
		  driver.error(*yylloc, "unrecognized token \"" 
				+ std::string(yytext) + "\"");
		}

<ID_LIST>{ws}		{}
<ID_LIST>\,		{ 	return token::t_Comma; }
<ID_LIST>{basic_id}	{
				return driver.getTokenForId(
						yytext,
						yylval->identifier,
						*yylloc);
			}
<ID_LIST>\:		{
				/* return to normal state */
#if DEBUG_SCANNER
				std::cerr << "id_list: pop" <<
				std::endl;
#endif
				yy_pop_state();
				driver.lookupIdentifiers = true;
				return token::t_Colon;
			}
<ID_LIST>{nl}		{
				yylloc->lines();
				yylloc->step();
			}
<ID_LIST>{comment}	{ /* skip comment */
				yylloc->lines();
				yylloc->step();
			}
<ID_LIST>.		{
				/* start condition id_list wrong! */
				assert(false);
			}
%%

int
yyFlexLexer::yylex(void) 
{
	throw std::logic_error("this method should never have been called.");
}
