#include "IntStream.h"
#include "atn/OrderedATNConfigSet.h"
#include "Token.h"
#include "LexerNoViableAltException.h"
#include "atn/RuleStopState.h"
#include "atn/RuleTransition.h"
#include "atn/SingletonPredictionContext.h"
#include "atn/PredicateTransition.h"
#include "atn/ActionTransition.h"
#include "atn/TokensStartState.h"
#include "misc/Interval.h"
#include "dfa/DFA.h"
#include "Lexer.h"
#include "internal/Synchronization.h"
#include "dfa/DFAState.h"
#include "atn/LexerATNConfig.h"
#include "atn/LexerActionExecutor.h"
#include "atn/LexerATNSimulator.h"
#ifndef LEXER_DEBUG_ATN
#define LEXER_DEBUG_ATN 0
#endif
#ifndef LEXER_DEBUG_DFA
#define LEXER_DEBUG_DFA 0
#endif
using namespace antlr4;
using namespace antlr4::atn;
using namespace antlr4::internal;
using namespace antlrcpp;
void LexerATNSimulator::SimState::reset() {
*this = SimState();
}
LexerATNSimulator::LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA,
PredictionContextCache &sharedContextCache)
: LexerATNSimulator(nullptr, atn, decisionToDFA, sharedContextCache) {
}
LexerATNSimulator::LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA,
PredictionContextCache &sharedContextCache)
: ATNSimulator(atn, sharedContextCache), _recog(recog), _decisionToDFA(decisionToDFA) {
InitializeInstanceFields();
}
void LexerATNSimulator::copyState(LexerATNSimulator *simulator) {
_charPositionInLine = simulator->_charPositionInLine;
_line = simulator->_line;
_mode = simulator->_mode;
_startIndex = simulator->_startIndex;
}
size_t LexerATNSimulator::match(CharStream *input, size_t mode) {
_mode = mode;
ssize_t mark = input->mark();
auto onExit = finally([input, mark] {
input->release(mark);
});
_startIndex = input->index();
_prevAccept.reset();
const dfa::DFA &dfa = _decisionToDFA[mode];
dfa::DFAState* s0;
{
SharedLock<SharedMutex> stateLock(atn._stateMutex);
s0 = dfa.s0;
}
if (s0 == nullptr) {
return matchATN(input);
} else {
return execATN(input, s0);
}
}
void LexerATNSimulator::reset() {
_prevAccept.reset();
_startIndex = 0;
_line = 1;
_charPositionInLine = 0;
_mode = Lexer::DEFAULT_MODE;
}
void LexerATNSimulator::clearDFA() {
size_t size = _decisionToDFA.size();
_decisionToDFA.clear();
for (size_t d = 0; d < size; ++d) {
_decisionToDFA.emplace_back(atn.getDecisionState(d), d);
}
}
size_t LexerATNSimulator::matchATN(CharStream *input) {
ATNState *startState = atn.modeToStartState[_mode];
std::unique_ptr<ATNConfigSet> s0_closure = computeStartState(input, startState);
bool suppressEdge = s0_closure->hasSemanticContext;
s0_closure->hasSemanticContext = false;
dfa::DFAState *next = addDFAState(s0_closure.release(), suppressEdge);
size_t predict = execATN(input, next);
return predict;
}
size_t LexerATNSimulator::execATN(CharStream *input, dfa::DFAState *ds0) {
if (ds0->isAcceptState) {
captureSimState(input, ds0);
}
size_t t = input->LA(1);
dfa::DFAState *s = ds0;
while (true) { dfa::DFAState *target = getExistingTargetState(s, t);
if (target == nullptr) {
target = computeTargetState(input, s, t);
}
if (target == ERROR.get()) {
break;
}
if (t != Token::EOF) {
consume(input);
}
if (target->isAcceptState) {
captureSimState(input, target);
if (t == Token::EOF) {
break;
}
}
t = input->LA(1);
s = target; }
return failOrAccept(input, s->configs.get(), t);
}
dfa::DFAState *LexerATNSimulator::getExistingTargetState(dfa::DFAState *s, size_t t) {
dfa::DFAState* retval = nullptr;
SharedLock<SharedMutex> edgeLock(atn._edgeMutex);
if (t <= MAX_DFA_EDGE) {
auto iterator = s->edges.find(t - MIN_DFA_EDGE);
#if LEXER_DEBUG_ATN == 1
if (iterator != s->edges.end()) {
std::cout << std::string("reuse state ") << s->stateNumber << std::string(" edge to ") << iterator->second->stateNumber << std::endl;
}
#endif
if (iterator != s->edges.end())
retval = iterator->second;
}
return retval;
}
dfa::DFAState *LexerATNSimulator::computeTargetState(CharStream *input, dfa::DFAState *s, size_t t) {
OrderedATNConfigSet *reach = new OrderedATNConfigSet();
getReachableConfigSet(input, s->configs.get(), reach, t);
if (reach->isEmpty()) { if (!reach->hasSemanticContext) {
addDFAEdge(s, t, ERROR.get());
}
delete reach;
return ERROR.get();
}
return addDFAEdge(s, t, reach);
}
size_t LexerATNSimulator::failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t) {
if (_prevAccept.dfaState != nullptr) {
accept(input, _prevAccept.dfaState->lexerActionExecutor, _startIndex, _prevAccept.index, _prevAccept.line, _prevAccept.charPos);
return _prevAccept.dfaState->prediction;
} else {
if (t == Token::EOF && input->index() == _startIndex) {
return Token::EOF;
}
throw LexerNoViableAltException(_recog, input, _startIndex, reach);
}
}
void LexerATNSimulator::getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, ATNConfigSet *reach, size_t t) {
size_t skipAlt = ATN::INVALID_ALT_NUMBER;
for (const auto &c : closure_->configs) {
bool currentAltReachedAcceptState = c->alt == skipAlt;
if (currentAltReachedAcceptState && (std::static_pointer_cast<LexerATNConfig>(c))->hasPassedThroughNonGreedyDecision()) {
continue;
}
#if LEXER_DEBUG_ATN == 1
std::cout << "testing " << getTokenName((int)t) << " at " << c->toString(true) << std::endl;
#endif
size_t n = c->state->transitions.size();
for (size_t ti = 0; ti < n; ti++) { const Transition *trans = c->state->transitions[ti].get();
ATNState *target = getReachableTarget(trans, (int)t);
if (target != nullptr) {
auto lexerActionExecutor = downCast<const LexerATNConfig&>(*c).getLexerActionExecutor();
if (lexerActionExecutor != nullptr) {
lexerActionExecutor = lexerActionExecutor->fixOffsetBeforeMatch((int)input->index() - (int)_startIndex);
}
bool treatEofAsEpsilon = t == Token::EOF;
Ref<LexerATNConfig> config = std::make_shared<LexerATNConfig>(downCast<const LexerATNConfig&>(*c),
target, std::move(lexerActionExecutor));
if (closure(input, config, reach, currentAltReachedAcceptState, true, treatEofAsEpsilon)) {
skipAlt = c->alt;
break;
}
}
}
}
}
void LexerATNSimulator::accept(CharStream *input, const Ref<const LexerActionExecutor> &lexerActionExecutor, size_t ,
size_t index, size_t line, size_t charPos) {
#if LEXER_DEBUG_ATN == 1
std::cout << "ACTION ";
std::cout << toString(lexerActionExecutor) << std::endl;
#endif
input->seek(index);
_line = line;
_charPositionInLine = (int)charPos;
if (lexerActionExecutor != nullptr && _recog != nullptr) {
lexerActionExecutor->execute(_recog, input, _startIndex);
}
}
atn::ATNState *LexerATNSimulator::getReachableTarget(const Transition *trans, size_t t) {
if (trans->matches(t, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)) {
return trans->target;
}
return nullptr;
}
std::unique_ptr<ATNConfigSet> LexerATNSimulator::computeStartState(CharStream *input, ATNState *p) {
Ref<const PredictionContext> initialContext = PredictionContext::EMPTY; std::unique_ptr<ATNConfigSet> configs(new OrderedATNConfigSet());
for (size_t i = 0; i < p->transitions.size(); i++) {
ATNState *target = p->transitions[i]->target;
Ref<LexerATNConfig> c = std::make_shared<LexerATNConfig>(target, (int)(i + 1), initialContext);
closure(input, c, configs.get(), false, false, false);
}
return configs;
}
bool LexerATNSimulator::closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs,
bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon) {
#if LEXER_DEBUG_ATN == 1
std::cout << "closure(" << config->toString(true) << ")" << std::endl;
#endif
if (config->state != nullptr && config->state->getStateType() == ATNStateType::RULE_STOP) {
#if LEXER_DEBUG_ATN == 1
if (_recog != nullptr) {
std::cout << "closure at " << _recog->getRuleNames()[config->state->ruleIndex] << " rule stop " << config << std::endl;
} else {
std::cout << "closure at rule stop " << config << std::endl;
}
#endif
if (config->context == nullptr || config->context->hasEmptyPath()) {
if (config->context == nullptr || config->context->isEmpty()) {
configs->add(config);
return true;
} else {
configs->add(std::make_shared<LexerATNConfig>(*config, config->state, PredictionContext::EMPTY));
currentAltReachedAcceptState = true;
}
}
if (config->context != nullptr && !config->context->isEmpty()) {
for (size_t i = 0; i < config->context->size(); i++) {
if (config->context->getReturnState(i) != PredictionContext::EMPTY_RETURN_STATE) {
Ref<const PredictionContext> newContext = config->context->getParent(i); ATNState *returnState = atn.states[config->context->getReturnState(i)];
Ref<LexerATNConfig> c = std::make_shared<LexerATNConfig>(*config, returnState, newContext);
currentAltReachedAcceptState = closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon);
}
}
}
return currentAltReachedAcceptState;
}
if (!config->state->epsilonOnlyTransitions) {
if (!currentAltReachedAcceptState || !config->hasPassedThroughNonGreedyDecision()) {
configs->add(config);
}
}
ATNState *p = config->state;
for (size_t i = 0; i < p->transitions.size(); i++) {
const Transition *t = p->transitions[i].get();
Ref<LexerATNConfig> c = getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon);
if (c != nullptr) {
currentAltReachedAcceptState = closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon);
}
}
return currentAltReachedAcceptState;
}
Ref<LexerATNConfig> LexerATNSimulator::getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, const Transition *t,
ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon) {
Ref<LexerATNConfig> c = nullptr;
switch (t->getTransitionType()) {
case TransitionType::RULE: {
const RuleTransition *ruleTransition = static_cast<const RuleTransition*>(t);
Ref<const PredictionContext> newContext = SingletonPredictionContext::create(config->context, ruleTransition->followState->stateNumber);
c = std::make_shared<LexerATNConfig>(*config, t->target, newContext);
break;
}
case TransitionType::PRECEDENCE:
throw UnsupportedOperationException("Precedence predicates are not supported in lexers.");
case TransitionType::PREDICATE: {
const PredicateTransition *pt = static_cast<const PredicateTransition*>(t);
#if LEXER_DEBUG_ATN == 1
std::cout << "EVAL rule " << pt->getRuleIndex() << ":" << pt->getPredIndex() << std::endl;
#endif
configs->hasSemanticContext = true;
if (evaluatePredicate(input, pt->getRuleIndex(), pt->getPredIndex(), speculative)) {
c = std::make_shared<LexerATNConfig>(*config, t->target);
}
break;
}
case TransitionType::ACTION:
if (config->context == nullptr|| config->context->hasEmptyPath()) {
auto lexerActionExecutor = LexerActionExecutor::append(config->getLexerActionExecutor(),
atn.lexerActions[static_cast<const ActionTransition *>(t)->actionIndex]);
c = std::make_shared<LexerATNConfig>(*config, t->target, std::move(lexerActionExecutor));
break;
}
else {
c = std::make_shared<LexerATNConfig>(*config, t->target);
break;
}
case TransitionType::EPSILON:
c = std::make_shared<LexerATNConfig>(*config, t->target);
break;
case TransitionType::ATOM:
case TransitionType::RANGE:
case TransitionType::SET:
if (treatEofAsEpsilon) {
if (t->matches(Token::EOF, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)) {
c = std::make_shared<LexerATNConfig>(*config, t->target);
break;
}
}
break;
default: break;
}
return c;
}
bool LexerATNSimulator::evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative) {
if (_recog == nullptr) {
return true;
}
if (!speculative) {
return _recog->sempred(nullptr, ruleIndex, predIndex);
}
size_t savedCharPositionInLine = _charPositionInLine;
size_t savedLine = _line;
size_t index = input->index();
ssize_t marker = input->mark();
auto onExit = finally([this, input, savedCharPositionInLine, savedLine, index, marker] {
_charPositionInLine = savedCharPositionInLine;
_line = savedLine;
input->seek(index);
input->release(marker);
});
consume(input);
return _recog->sempred(nullptr, ruleIndex, predIndex);
}
void LexerATNSimulator::captureSimState(CharStream *input, dfa::DFAState *dfaState) {
_prevAccept.index = input->index();
_prevAccept.line = _line;
_prevAccept.charPos = _charPositionInLine;
_prevAccept.dfaState = dfaState;
}
dfa::DFAState *LexerATNSimulator::addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q) {
bool suppressEdge = q->hasSemanticContext;
q->hasSemanticContext = false;
dfa::DFAState *to = addDFAState(q);
if (suppressEdge) {
return to;
}
addDFAEdge(from, t, to);
return to;
}
void LexerATNSimulator::addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q) {
if ( t > MAX_DFA_EDGE) { return;
}
UniqueLock<SharedMutex> edgeLock(atn._edgeMutex);
p->edges[t - MIN_DFA_EDGE] = q; }
dfa::DFAState *LexerATNSimulator::addDFAState(ATNConfigSet *configs) {
return addDFAState(configs, true);
}
dfa::DFAState *LexerATNSimulator::addDFAState(ATNConfigSet *configs, bool suppressEdge) {
assert(!configs->hasSemanticContext);
dfa::DFAState *proposed = new dfa::DFAState(std::unique_ptr<ATNConfigSet>(configs));
Ref<ATNConfig> firstConfigWithRuleStopState = nullptr;
for (const auto &c : configs->configs) {
if (RuleStopState::is(c->state)) {
firstConfigWithRuleStopState = c;
break;
}
}
if (firstConfigWithRuleStopState != nullptr) {
proposed->isAcceptState = true;
proposed->lexerActionExecutor = downCast<const LexerATNConfig&>(*firstConfigWithRuleStopState).getLexerActionExecutor();
proposed->prediction = atn.ruleToTokenType[firstConfigWithRuleStopState->state->ruleIndex];
}
dfa::DFA &dfa = _decisionToDFA[_mode];
{
UniqueLock<SharedMutex> stateLock(atn._stateMutex);
auto [existing, inserted] = dfa.states.insert(proposed);
if (!inserted) {
delete proposed;
proposed = *existing;
} else {
proposed->stateNumber = static_cast<int>(dfa.states.size() - 1);
proposed->configs->setReadonly(true);
}
if (!suppressEdge) {
dfa.s0 = proposed;
}
}
return proposed;
}
dfa::DFA& LexerATNSimulator::getDFA(size_t mode) {
return _decisionToDFA[mode];
}
std::string LexerATNSimulator::getText(CharStream *input) {
return input->getText(misc::Interval(_startIndex, input->index() - 1));
}
size_t LexerATNSimulator::getLine() const {
return _line;
}
void LexerATNSimulator::setLine(size_t line) {
_line = line;
}
size_t LexerATNSimulator::getCharPositionInLine() {
return _charPositionInLine;
}
void LexerATNSimulator::setCharPositionInLine(size_t charPositionInLine) {
_charPositionInLine = charPositionInLine;
}
void LexerATNSimulator::consume(CharStream *input) {
size_t curChar = input->LA(1);
if (curChar == '\n') {
_line++;
_charPositionInLine = 0;
} else {
_charPositionInLine++;
}
input->consume();
}
std::string LexerATNSimulator::getTokenName(size_t t) {
if (t == Token::EOF) {
return "EOF";
}
return std::string("'") + static_cast<char>(t) + std::string("'");
}
void LexerATNSimulator::InitializeInstanceFields() {
_startIndex = 0;
_line = 1;
_charPositionInLine = 0;
_mode = antlr4::Lexer::DEFAULT_MODE;
}