// Lua 5.3 RE/flex scanner by Robert van Engelen

%top{
  #include <cstring>
  #include <cctype>
  #include "LuaParser.hpp" // generated by bison from lua.y
  #include "location.hpp"  // generated by bison %locations
}

// lexer syntax: enable free-space mode regular expressions for clarity
%option freespace

// lexer optimization: generate fast scanner in direct code
%option fast

// lexer class: yy::LuaScanner
%option namespace=yy
%option lexer=LuaScanner

// lexer output files
%option outfile=LuaScanner.cpp
%option header-file=LuaScanner.hpp

// lexer errors: throw an exception in the scanner's default rule
%option exception="yy::LuaParser::syntax_error(location(), \"Unknown token.\")"

// parser integration: output code for the bison complete with locations parser
%option bison-complete
%option bison-locations
%option bison-cc-namespace=yy
%option bison-cc-parser=LuaParser

// LuaScanner class
%class{

 public:

  // lookup/insert a symbol in the "symbol table"
  Name symbol(const char *str)
  {
    return &*symbols.insert(str).first;
  }

 private:

  struct Keyword
  {
    const char *name;
    int         token;
  };

  // translate keyword to the corresponding token or return 0 if not a keyword
  int keyword_token(const char *str)
  {
    static const Keyword keywords[] =
    {
      { "and",      LuaParser::token::TOKEN_AND },
      { "break",    LuaParser::token::TOKEN_BREAK },
      { "do",       LuaParser::token::TOKEN_DO },
      { "else",     LuaParser::token::TOKEN_ELSE },
      { "elseif",   LuaParser::token::TOKEN_ELSEIF },
      { "end",      LuaParser::token::TOKEN_END },
      { "false",    LuaParser::token::TOKEN_FALSE },
      { "for",      LuaParser::token::TOKEN_FOR },
      { "function", LuaParser::token::TOKEN_FUNCTION },
      { "goto",     LuaParser::token::TOKEN_GOTO },
      { "if",       LuaParser::token::TOKEN_IF },
      { "in",       LuaParser::token::TOKEN_IN },
      { "local",    LuaParser::token::TOKEN_LOCAL },
      { "nil",      LuaParser::token::TOKEN_NIL },
      { "not",      LuaParser::token::TOKEN_NOT },
      { "or",       LuaParser::token::TOKEN_OR },
      { "repeat",   LuaParser::token::TOKEN_REPEAT },
      { "return",   LuaParser::token::TOKEN_RETURN },
      { "then",     LuaParser::token::TOKEN_THEN },
      { "true",     LuaParser::token::TOKEN_TRUE },
      { "until",    LuaParser::token::TOKEN_UNTIL },
      { "while",    LuaParser::token::TOKEN_WHILE },
      { NULL,       0 }
    };

    for (const Keyword *keyword = keywords; keyword->name != NULL; ++keyword)
      if (strcmp(keyword->name, str) == 0)
        return keyword->token;

    return 0;
  }

  // lookup/insert matched text() in the "symbol table" matching an {name}
  Name symbol()
  {
    return symbol(text());
  }

  // lookup/insert matched text() int the "symbol table" matching a quoted {string}
  StringLiteral string()
  {
    return symbols.insert(translate_escapes()).first->c_str();
  }

  // translate \a, \b, \t, \n, \v, \f, \r, \\, \', \", \xXX, and \u{xxx} in matched text() matching string
  std::string translate_escapes()
  {
    std::string t;
    const char *s = matcher().begin() + 1;
    const char *e = matcher().end() - 1;
    while (s < e)
    {
      if (*s == '\\' && s + 1 < e)
      {
        static const char *escapes = "abtnvfr";
        const char *esc = strchr(escapes, *++s);
        if (esc != NULL)
        {
          t.push_back(esc - escapes + '\a');
          ++s;
        }
        else if (isdigit(*s))
        {
          int n = 0, k = 3;
          do
            n = 10*n + *s++ - '0';
          while (s < e && --k && isdigit(*s));
          t.push_back(n);
        }
        else if (*s == 'x' && s + 2 < e)
        {
          int n1 = s[1] - (s[1] >= 'a' ? 'a'-10 : s[1] >= 'A' ? 'A'-10 : '0');
          if (n1 > 9)
            n1 -= 7;
          int n2 = s[2] - (s[2] >= 'a' ? 'a'-10 : s[2] >= 'A' ? 'A'-10 : '0');
          if (n2 > 9)
            n2 -= 7;
          t.push_back(n1 << 4 | n2);
          s += 3;
        }
        else if (*s == 'u' && s + 1 < e && s[1] == '{')
        {
          char buf[8], *u = buf;
          int n = 0;
          while (++s < e && isdigit(*s))
            n = 16*n + *s - (*s >= 'a' ? 'a'-10 : *s >= 'A' ? 'A'-10 : '0');
          if (s < e && *s == '}')
            ++s;
          size_t k = reflex::utf8(n, u);
          while (k--)
            t.push_back(*u++);
        }
        else
        {
          t.push_back(*s++);
        }
      }
      else
      {
        t.push_back(*s++);
      }
    }
    return t;
  }

  // return token of matched text() matching {identifier}
  LuaParser::symbol_type Name()
  {
    int token = keyword_token(text());
    return token ? LuaParser::symbol_type(token, location()) : LuaParser::make_NAME(symbol(), location());
  }

  // return token of matched text() matching {integer}
  LuaParser::symbol_type IntegerLiteral()
  {
    return LuaParser::make_INTEGER(strtol(text(), NULL, 0), location());
  }

  // return token of matched text() matching {float}
  LuaParser::symbol_type FloatLiteral()
  {
    return LuaParser::make_FLOAT(strtod(text(), NULL), location());
  }

  // return token of matched text() matching {string}
  LuaParser::symbol_type StringLiteral()
  {
    return LuaParser::make_STRING(string(), location());
  }

  // a symbol table
  std::set<std::string> symbols;

  // bracket matching
  size_t bracketlen;

  // long string
  std::string longstring;
}

// LuaScanner class constructor
%init{
}

%x LONGCOMMENT LONGSTRING

digit                           [0-9]
alpha                           [a-zA-Z_]
name                            {alpha} ( {alpha} | {digit} )*
integer                         {digit}+ | 0 [xX] [0-9a-fA-F]+
exp                             [eE] [-+]? {digit}+
float                           {digit}+ \. {digit}* {exp}?
string                          \" ( \\. | [^\\"\n] )* \" | \' ( \\. | [^\\'\n] )* \'
longbracket                     \[=*\[

%%

[[:space:]]+                    // skip white space
"--" {longbracket}              { bracketlen = size()-2; start(LONGCOMMENT); }
"--".*                          // ignore inline comment
{name}                          { return Name(); }
{integer}                       { return IntegerLiteral(); }
{float}                         { return FloatLiteral(); }
{string}                        { return StringLiteral(); }
{longbracket}                   { bracketlen = size(); longstring.clear(); start(LONGSTRING); }
"=="                            { return LuaParser::symbol_type(LuaParser::token::TOKEN_EQU, location()); }
"~="                            { return LuaParser::symbol_type(LuaParser::token::TOKEN_NEQ, location()); }
"<="                            { return LuaParser::symbol_type(LuaParser::token::TOKEN_LTE, location()); }
">="                            { return LuaParser::symbol_type(LuaParser::token::TOKEN_GTE, location()); }
"<<"                            { return LuaParser::symbol_type(LuaParser::token::TOKEN_SHL, location()); }
">>"                            { return LuaParser::symbol_type(LuaParser::token::TOKEN_SHR, location()); }
"//"                            { return LuaParser::symbol_type(LuaParser::token::TOKEN_DIV, location()); }
".."                            { return LuaParser::symbol_type(LuaParser::token::TOKEN_CAT, location()); }
"..."                           { return LuaParser::symbol_type(LuaParser::token::TOKEN_DOTS, location()); }
"::"                            { return LuaParser::symbol_type(LuaParser::token::TOKEN_COLS, location()); }
[#%&()*+,\-./:;<=>\[\]^{|}~]    { return LuaParser::symbol_type(chr(), location()); }
<<EOF>>                         { return LuaParser::make_EOF(location()); }

<LONGCOMMENT>{
\]=*\]                          { if (size() == bracketlen) start(INITIAL); }
. | \n                          // ignore long comment
<<EOF>>                         { yy::Parser::syntax_error(location(), "long comment not closed"); }
}

<LONGSTRING>{
\]=*\]                          { if (size() == bracketlen)
                                  {
                                    start(INITIAL);
                                    return LuaParser::make_STRING(longstring.c_str(), location());
                                  }
                                  matcher().less(1);
                                  longstring.push_back(chr());
                                }
. | \n                          { longstring.push_back(chr()); }
<<EOF>>                         { yy::Parser::syntax_error(location(), "long string not closed"); }
}

%%

