lbug 0.15.4

An in-process property graph database management system built for query speed and scalability
Documentation
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD 3-clause license that
 * can be found in the LICENSE.txt file in the project root.
 */

#include "atn/LexerATNSimulator.h"
#include "Exceptions.h"
#include "misc/Interval.h"
#include "CommonTokenFactory.h"
#include "LexerNoViableAltException.h"
#include "ANTLRErrorListener.h"
#include "support/CPPUtils.h"
#include "CommonToken.h"

#include "Lexer.h"

#define DEBUG_LEXER 0

using namespace antlrcpp;
using namespace antlr4;

Lexer::Lexer() : Recognizer() {
  InitializeInstanceFields();
  _input = nullptr;
}

Lexer::Lexer(CharStream *input) : Recognizer(), _input(input) {
  InitializeInstanceFields();
}

void Lexer::reset() {
  // wack Lexer state variables
  _input->seek(0); // rewind the input

  _syntaxErrors = 0;
  token.reset();
  type = Token::INVALID_TYPE;
  channel = Token::DEFAULT_CHANNEL;
  tokenStartCharIndex = INVALID_INDEX;
  tokenStartCharPositionInLine = 0;
  tokenStartLine = 0;
  type = 0;
  _text = "";

  hitEOF = false;
  mode = Lexer::DEFAULT_MODE;
  modeStack.clear();

  getInterpreter<atn::LexerATNSimulator>()->reset();
}

std::unique_ptr<Token> Lexer::nextToken() {
  // Mark start location in char stream so unbuffered streams are
  // guaranteed at least have text of current token
  ssize_t tokenStartMarker = _input->mark();

  auto onExit = finally([this, tokenStartMarker]{
    // make sure we release marker after match or
    // unbuffered char stream will keep buffering
    _input->release(tokenStartMarker);
  });

  while (true) {
  outerContinue:
    if (hitEOF) {
      emitEOF();
      return std::move(token);
    }

    token.reset();
    channel = Token::DEFAULT_CHANNEL;
    tokenStartCharIndex = _input->index();
    tokenStartCharPositionInLine = getInterpreter<atn::LexerATNSimulator>()->getCharPositionInLine();
    tokenStartLine = getInterpreter<atn::LexerATNSimulator>()->getLine();
    _text = "";
    do {
      type = Token::INVALID_TYPE;
      size_t ttype;
      try {
        ttype = getInterpreter<atn::LexerATNSimulator>()->match(_input, mode);
      } catch (LexerNoViableAltException &e) {
        notifyListeners(e); // report error
        recover(e);
        ttype = SKIP;
      }
      if (_input->LA(1) == EOF) {
        hitEOF = true;
      }
      if (type == Token::INVALID_TYPE) {
        type = ttype;
      }
      if (type == SKIP) {
        goto outerContinue;
      }
    } while (type == MORE);
    if (token == nullptr) {
      emit();
    }
    return std::move(token);
  }
}

void Lexer::skip() {
  type = SKIP;
}

void Lexer::more() {
  type = MORE;
}

void Lexer::setMode(size_t m) {
  mode = m;
}

void Lexer::pushMode(size_t m) {
#if DEBUG_LEXER == 1
    std::cout << "pushMode " << m << std::endl;
#endif

  modeStack.push_back(mode);
  setMode(m);
}

size_t Lexer::popMode() {
  if (modeStack.empty()) {
    throw EmptyStackException();
  }
#if DEBUG_LEXER == 1
    std::cout << std::string("popMode back to ") << modeStack.back() << std::endl;
#endif

  setMode(modeStack.back());
  modeStack.pop_back();
  return mode;
}


TokenFactory<CommonToken>* Lexer::getTokenFactory() {
  return _factory;
}

void Lexer::setInputStream(IntStream *input) {
  reset();
  _input = dynamic_cast<CharStream*>(input);
}

std::string Lexer::getSourceName() {
  return _input->getSourceName();
}

CharStream* Lexer::getInputStream() {
  return _input;
}

void Lexer::emit(std::unique_ptr<Token> newToken) {
  token = std::move(newToken);
}

Token* Lexer::emit() {
  emit(_factory->create({ this, _input }, type, _text, channel,
    tokenStartCharIndex, getCharIndex() - 1, tokenStartLine, tokenStartCharPositionInLine));
  return token.get();
}

Token* Lexer::emitEOF() {
  size_t cpos = getCharPositionInLine();
  size_t line = getLine();
  emit(_factory->create({ this, _input }, EOF, "", Token::DEFAULT_CHANNEL, _input->index(), _input->index() - 1, line, cpos));
  return token.get();
}

size_t Lexer::getLine() const {
  return getInterpreter<atn::LexerATNSimulator>()->getLine();
}

size_t Lexer::getCharPositionInLine() {
  return getInterpreter<atn::LexerATNSimulator>()->getCharPositionInLine();
}

void Lexer::setLine(size_t line) {
  getInterpreter<atn::LexerATNSimulator>()->setLine(line);
}

void Lexer::setCharPositionInLine(size_t charPositionInLine) {
  getInterpreter<atn::LexerATNSimulator>()->setCharPositionInLine(charPositionInLine);
}

size_t Lexer::getCharIndex() {
  return _input->index();
}

std::string Lexer::getText() {
  if (!_text.empty()) {
    return _text;
  }
  return getInterpreter<atn::LexerATNSimulator>()->getText(_input);
}

void Lexer::setText(const std::string &text) {
  _text = text;
}

std::unique_ptr<Token> Lexer::getToken() {
  return std::move(token);
}

void Lexer::setToken(std::unique_ptr<Token> newToken) {
  token = std::move(newToken);
}

void Lexer::setType(size_t ttype) {
  type = ttype;
}

size_t Lexer::getType() {
  return type;
}

void Lexer::setChannel(size_t newChannel) {
  channel = newChannel;
}

size_t Lexer::getChannel() {
  return channel;
}

std::vector<std::unique_ptr<Token>> Lexer::getAllTokens() {
  std::vector<std::unique_ptr<Token>> tokens;
  std::unique_ptr<Token> t = nextToken();
  while (t->getType() != EOF) {
    tokens.push_back(std::move(t));
    t = nextToken();
  }
  return tokens;
}

void Lexer::recover(const LexerNoViableAltException &/*e*/) {
  if (_input->LA(1) != EOF) {
    // skip a char and try again
    getInterpreter<atn::LexerATNSimulator>()->consume(_input);
  }
}

void Lexer::notifyListeners(const LexerNoViableAltException & /*e*/) {
  ++_syntaxErrors;
  std::string text = _input->getText(misc::Interval(tokenStartCharIndex, _input->index()));
  std::string msg = std::string("token recognition error at: '") + getErrorDisplay(text) + std::string("'");

  ProxyErrorListener &listener = getErrorListenerDispatch();
  listener.syntaxError(this, nullptr, tokenStartLine, tokenStartCharPositionInLine, msg, std::current_exception());
}

std::string Lexer::getErrorDisplay(const std::string &s) {
  std::stringstream ss;
  for (auto c : s) {
    switch (c) {
    case '\n':
      ss << "\\n";
      break;
    case '\t':
      ss << "\\t";
      break;
    case '\r':
      ss << "\\r";
      break;
    default:
      ss << c;
      break;
    }
  }
  return ss.str();
}

void Lexer::recover(RecognitionException * /*re*/) {
  // TODO: Do we lose character or line position information?
  _input->consume();
}

size_t Lexer::getNumberOfSyntaxErrors() {
  return _syntaxErrors;
}

void Lexer::InitializeInstanceFields() {
  _syntaxErrors = 0;
  token = nullptr;
  _factory = CommonTokenFactory::DEFAULT.get();
  tokenStartCharIndex = INVALID_INDEX;
  tokenStartLine = 0;
  tokenStartCharPositionInLine = 0;
  hitEOF = false;
  channel = 0;
  type = 0;
  mode = Lexer::DEFAULT_MODE;
}