#include "LuaParser/Lexer/LuaLexer.h"
#include "LuaParser/File/LuaSource.h"
#include "LuaParser/Lexer/LuaDefine.h"
#include "LuaParser/Lexer/LuaIdentify.h"
#include "LuaParser/Lexer/LuaTokenTypeDetail.h"
#include "Util/Utf8.h"
#include "Util/format.h"
#include <limits>
std::map<std::string, LuaTokenKind, std::less<>> LuaLexer::LuaReserved = {
{"and", TK_AND },
{"break", TK_BREAK },
{"do", TK_DO },
{"else", TK_ELSE },
{"elseif", TK_ELSEIF },
{"end", TK_END },
{"false", TK_FALSE },
{"for", TK_FOR },
{"function", TK_FUNCTION},
{"goto", TK_GOTO },
{"if", TK_IF },
{"in", TK_IN },
{"local", TK_LOCAL },
{"nil", TK_NIL },
{"not", TK_NOT },
{"or", TK_OR },
{"repeat", TK_REPEAT },
{"return", TK_RETURN },
{"then", TK_THEN },
{"true", TK_TRUE },
{"until", TK_UNTIL },
{"while", TK_WHILE },
{"//", TK_IDIV },
{"..", TK_CONCAT },
{"...", TK_DOTS },
{"==", TK_EQ },
{">=", TK_GE },
{"<=", TK_LE },
{"~=", TK_NE },
{"<<", TK_SHL },
{">>", TK_SHR },
{"::", TK_DBCOLON }
};
LuaLexer::LuaLexer(std::shared_ptr<LuaSource> source)
: _linenumber(0),
_supportNonStandardSymbol(false),
_supportCLikeComments(false),
_reader(source->GetSource()),
_file(source) {
}
bool LuaLexer::Parse() {
_file->Reset();
while (true) {
auto type = Lex();
if (type == TK_EOF) {
break;
}
_tokens.emplace_back(type, _reader.GetTokenRange());
if (!_errors.empty()) {
_file->SetTotalLine(_linenumber);
_file->UpdateLineInfo(_linenumber);
return false;
}
}
_file->SetTotalLine(_linenumber);
return true;
}
std::vector<LuaTokenError> &LuaLexer::GetErrors() {
return _errors;
}
bool LuaLexer::HasError() const {
return !_errors.empty();
}
std::shared_ptr<LuaSource> LuaLexer::GetFile() {
return _file;
}
std::vector<LuaToken> &LuaLexer::GetTokens() {
return _tokens;
}
void LuaLexer::SupportNonStandardSymbol() {
_supportNonStandardSymbol = true;
}
void LuaLexer::SupportCLikeComments(){
_supportCLikeComments = true;
}
LuaTokenKind LuaLexer::Lex() {
_reader.ResetBuffer();
for (;;) {
int ch = _reader.GetCurrentChar();
switch (ch) {
case '\n':
case '\r': {
IncLinenumber();
break;
}
case ' ':
case '\f':
case '\t':
case '\v': {
_reader.NextChar();
break;
}
case '-': {
_reader.SaveAndNext();
if (_reader.GetCurrentChar() != '-') {
if (_supportNonStandardSymbol && _reader.CheckNext1('=')) {
return '=';
}
return '-';
}
_reader.SaveAndNext();
LuaTokenKind type = TK_SHORT_COMMENT;
if (_reader.GetCurrentChar() == '[') {
std::size_t sep = SkipSep();
if (sep >= 2) {
ReadLongString(sep);
return TK_LONG_COMMENT;
}
} else if (_reader.GetCurrentChar() == '-') {
_reader.SaveAndNext();
}
while (!CurrentIsNewLine() && _reader.GetCurrentChar() != EOZ) {
_reader.SaveAndNext();
}
return type;
}
case '+': {
_reader.SaveAndNext();
if (_supportNonStandardSymbol && _reader.CheckNext1('=')) {
return '=';
}
return '+';
}
case '*': {
_reader.SaveAndNext();
if (_supportNonStandardSymbol && _reader.CheckNext1('=')) {
return '=';
}
return '*';
}
case '[': {
std::size_t sep = SkipSep();
if (sep >= 2) {
ReadLongString(sep);
return TK_LONG_STRING;
} else if (sep == 0) {
TokenError("invalid long string delimiter",
TextRange(_reader.GetPos(), 0));
return TK_LONG_STRING;
}
return '[';
}
case '=': {
_reader.SaveAndNext();
if (_reader.CheckNext1('=')) {
return TK_EQ;
} else {
return '=';
}
}
case '<': {
_reader.SaveAndNext();
if (_reader.CheckNext1('=')) {
return TK_LE;
} else if (_reader.CheckNext1('<')) {
if (_supportNonStandardSymbol && _reader.CheckNext1('=')) {
return '=';
}
return TK_SHL;
} else {
return '<';
}
}
case '>': {
_reader.SaveAndNext();
if (_reader.CheckNext1('=')) {
return TK_GE;
} else if (_reader.CheckNext1('>')) {
if (_supportNonStandardSymbol && _reader.CheckNext1('=')) {
return '=';
}
return TK_SHR;
} else {
return '>';
}
}
case '/': {
_reader.SaveAndNext();
if (_supportNonStandardSymbol) {
switch (_reader.GetCurrentChar()) {
case '=': {
_reader.SaveAndNext();
return '=';
}
case '/': {
_reader.SaveAndNext();
if(_supportCLikeComments) {
while (!CurrentIsNewLine() && _reader.GetCurrentChar() != EOZ) {
_reader.SaveAndNext();
}
return TK_SHORT_COMMENT;
}
if (_reader.CheckNext1('=')) {
return '=';
}
return TK_IDIV;
}
case '*': {
_reader.SaveAndNext();
ReadLongCLikeComment();
return TK_LONG_COMMENT;
}
default: {
return '/';
}
}
} else {
if (_reader.CheckNext1('/')) {
return TK_IDIV;
}
return '/';
}
}
case '~': {
_reader.SaveAndNext();
if (_reader.CheckNext1('=')) {
return TK_NE;
} else {
return '~';
}
}
case '^': {
_reader.SaveAndNext();
if (_supportNonStandardSymbol && _reader.CheckNext1('=')) {
return '=';
}
return '^';
}
case '|': {
_reader.SaveAndNext();
if (_supportNonStandardSymbol) {
if (_reader.CheckNext1('|')) {
return TK_OR;
}
if (_reader.CheckNext1('=')) {
return '=';
}
}
return '|';
}
case '&': {
_reader.SaveAndNext();
if (_supportNonStandardSymbol) {
if (_reader.CheckNext1('&')) {
return TK_OR; }
if (_reader.CheckNext1('=')) {
return '=';
}
}
return '&';
}
case '%': {
_reader.SaveAndNext();
if (_supportNonStandardSymbol && _reader.CheckNext1('=')) {
return '=';
}
return '%';
}
case '!': {
_reader.SaveAndNext();
if (_supportNonStandardSymbol) {
if (_reader.CheckNext1('=')) {
return TK_NE;
}
return TK_NOT;
}
return '!';
}
case ':': {
_reader.SaveAndNext();
if (_reader.CheckNext1(':')) {
return TK_DBCOLON;
} else {
return ':';
}
}
case '"':
case '\'':
case '`': {
ReadString(ch);
return TK_STRING;
}
case '.': {
_reader.SaveAndNext();
if (_reader.CheckNext1('.')) {
if (_reader.CheckNext1('.')) {
return TK_DOTS;
} else {
return TK_CONCAT;
}
} else if (!lisdigit(_reader.GetCurrentChar())) {
return '.';
} else {
return ReadNumeral();
}
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': {
return ReadNumeral();
}
case EOZ: {
return TK_EOF;
}
case '#': {
_reader.SaveAndNext();
if (_linenumber == 0 && _tokens.empty()) {
while (!CurrentIsNewLine() && _reader.GetCurrentChar() != EOZ) {
_reader.SaveAndNext();
}
return TK_SHEBANG;
}
return '#';
}
case '{': {
_reader.SaveAndNext();
if (_supportNonStandardSymbol && _reader.CheckNext1('@')) {
_reader.EatWhile([](char ch) { return lislalnum(ch); });
if (_reader.CheckNext1('}')) {
return TK_NAME;
}
return TK_ERR;
}
return '{';
}
case '?': {
_reader.SaveAndNext();
if (_supportNonStandardSymbol) {
if (_reader.CheckNext1('.')) {
return '.';
} else if (_reader.CheckNext1(':')) {
return ':';
} else if (_reader.CheckNext1('[')) {
return '[';
}
}
return '?';
}
default: {
if (lislalpha(_reader.GetCurrentChar()))
{
do {
_reader.SaveAndNext();
} while (lislalnum(_reader.GetCurrentChar()));
auto text = _reader.GetSaveText();
if (IsReserved(text)) {
return LuaReserved.find(text)->second;
} else {
return TK_NAME;
}
} else
{
int c = _reader.GetCurrentChar();
_reader.SaveAndNext();
return c;
}
}
}
}
}
LuaTokenKind LuaLexer::ReadNumeral() {
int first = _reader.GetCurrentChar();
const char *expo = "Ee";
_reader.SaveAndNext();
if (first == '0' && _reader.CheckNext2("xX"))
{
expo = "Pp";
}
for (;;) {
if (_reader.CheckNext2(expo))
{
_reader.CheckNext2("-+");
} else if (lisxdigit(_reader.GetCurrentChar()) || _reader.GetCurrentChar() == '.')
{
_reader.SaveAndNext();
} else {
break;
}
}
if (std::isalpha(_reader.GetCurrentChar()))
{
do {
_reader.SaveAndNext();
} while (std::isalpha(_reader.GetCurrentChar()));
}
return TK_NUMBER;
}
std::size_t LuaLexer::SkipSep() {
std::size_t count = 0;
int ch = _reader.GetCurrentChar();
_reader.SaveAndNext();
while (_reader.GetCurrentChar() == '=') {
_reader.SaveAndNext();
count++;
}
return _reader.GetCurrentChar() == ch
? count + 2
: (count == 0) ? 1
: 0;
}
void LuaLexer::ReadLongString(std::size_t sep) {
_reader.SaveAndNext();
if (CurrentIsNewLine()) {
IncLinenumber();
}
for (;;) {
switch (_reader.GetCurrentChar()) {
case EOZ: {
TokenError("unfinished long string starting", TextRange(_reader.GetPos(), 0));
return;
}
case ']': {
if (SkipSep() == sep) {
_reader.SaveAndNext();
return;
}
break;
}
case '\n':
case '\r': {
_reader.Save();
IncLinenumber();
break;
}
default: {
_reader.SaveAndNext();
}
}
}
}
void LuaLexer::ReadLongCLikeComment() {
if (CurrentIsNewLine()) {
IncLinenumber();
}
for (;;) {
switch (_reader.GetCurrentChar()) {
case EOZ: {
TokenError("unfinished long string starting", TextRange(_reader.GetPos(), 0));
return;
}
case '*': {
_reader.SaveAndNext();
if (_reader.CheckNext1('/')) {
return;
}
break;
}
case '\n':
case '\r': {
_reader.Save();
IncLinenumber();
break;
}
default: {
_reader.SaveAndNext();
}
}
}
}
void LuaLexer::ReadString(int del) {
_reader.SaveAndNext();
while (_reader.GetCurrentChar() != del) {
switch (_reader.GetCurrentChar()) {
case EOZ:
case '\n':
case '\r': {
TokenError("unfinished string", TextRange(_reader.GetPos(), 0));
return;
}
case '\\': {
_reader.SaveAndNext();
switch (_reader.GetCurrentChar()) {
case EOZ:
TokenError("unfinished string", TextRange(_reader.GetPos(), 0));
return;
case 'z': {
_reader.SaveAndNext();
while (lisspace(_reader.GetCurrentChar())) {
if (CurrentIsNewLine()) {
IncLinenumber();
} else {
_reader.SaveAndNext();
}
}
goto no_save;
}
case '\r':
case '\n': {
if (CurrentIsNewLine()) {
IncLinenumber();
}
goto no_save;
}
}
break;
}
}
_reader.SaveAndNext();
no_save:;
}
_reader.SaveAndNext();
}
void LuaLexer::IncLinenumber() {
int old = _reader.GetCurrentChar();
_reader.NextChar();
if (CurrentIsNewLine() && _reader.GetCurrentChar() != old) {
_reader.NextChar();
_file->SetEndOfLineState(EndOfLine::CRLF);
} else if (old == '\n') {
_file->SetEndOfLineState(EndOfLine::LF);
} else {
_file->SetEndOfLineState(EndOfLine::CR);
}
if (++_linenumber >= std::numeric_limits<int>::max()) {
return;
}
_file->PushLine(_reader.GetPos());
}
bool LuaLexer::CurrentIsNewLine() {
int ch = _reader.GetCurrentChar();
return ch == '\n' || ch == '\r';
}
bool LuaLexer::IsReserved(std::string_view text) {
return LuaReserved.find(text) != LuaReserved.end();
}
void LuaLexer::TokenError(std::string_view message, TextRange range) {
_errors.emplace_back(message, range, 0);
}