#include <tree_sitter/parser.h>
#include <cstring>
#include <cwctype>
#include <string>
#include <vector>
namespace {
using std::string;
using std::vector;
enum TokenType {
LINE_BREAK,
NO_LINE_BREAK,
SIMPLE_SYMBOL,
STRING_START,
SYMBOL_START,
SUBSHELL_START,
REGEX_START,
STRING_ARRAY_START,
SYMBOL_ARRAY_START,
HEREDOC_BODY_START,
STRING_CONTENT,
HEREDOC_CONTENT,
STRING_END,
HEREDOC_BODY_END,
HEREDOC_START,
FORWARD_SLASH,
BLOCK_AMPERSAND,
SPLAT_STAR,
UNARY_MINUS,
UNARY_MINUS_NUM,
BINARY_MINUS,
BINARY_STAR,
SINGLETON_CLASS_LEFT_ANGLE_LEFT_ANGLE,
HASH_KEY_SYMBOL,
IDENTIFIER_SUFFIX,
CONSTANT_SUFFIX,
HASH_SPLAT_STAR_STAR,
BINARY_STAR_STAR,
ELEMENT_REFERENCE_BRACKET,
SHORT_INTERPOLATION,
NONE
};
struct Literal {
TokenType type;
int32_t open_delimiter;
int32_t close_delimiter;
uint32_t nesting_depth;
bool allows_interpolation;
};
struct Heredoc {
Heredoc() : end_word_indentation_allowed(false), allows_interpolation(false), started(false) {}
string word;
bool end_word_indentation_allowed;
bool allows_interpolation;
bool started;
};
const char NON_IDENTIFIER_CHARS[] = {
'\0',
'\n',
'\r',
'\t',
' ',
':',
';',
'`',
'"',
'\'',
'@',
'$',
'#',
'.',
',',
'|',
'^',
'&',
'<',
'=',
'>',
'+',
'-',
'*',
'/',
'\\',
'%',
'?',
'!',
'~',
'(',
')',
'[',
']',
'{',
'}',
};
struct Scanner {
Scanner() : has_leading_whitespace(false) {}
void reset() {
literal_stack.clear();
open_heredocs.clear();
}
unsigned serialize(char *buffer) {
unsigned i = 0;
if (literal_stack.size() * 5 + 2 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
return 0;
buffer[i++] = literal_stack.size();
for (
vector<Literal>::iterator iter = literal_stack.begin(),
end = literal_stack.end();
iter != end;
++iter) {
buffer[i++] = iter->type;
buffer[i++] = iter->open_delimiter;
buffer[i++] = iter->close_delimiter;
buffer[i++] = iter->nesting_depth;
buffer[i++] = iter->allows_interpolation;
}
buffer[i++] = open_heredocs.size();
for (
vector<Heredoc>::iterator iter = open_heredocs.begin(),
end = open_heredocs.end();
iter != end;
++iter) {
if (i + 2 + iter->word.size() >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
return 0;
buffer[i++] = iter->end_word_indentation_allowed;
buffer[i++] = iter->allows_interpolation;
buffer[i++] = iter->started;
buffer[i++] = iter->word.size();
iter->word.copy(&buffer[i], iter->word.size());
i += iter->word.size();
}
return i;
}
void deserialize(const char *buffer, unsigned length) {
unsigned i = 0;
has_leading_whitespace = false;
literal_stack.clear();
open_heredocs.clear();
if (length == 0)
return;
uint8_t literal_depth = buffer[i++];
for (unsigned j = 0; j < literal_depth; j++) {
Literal literal;
literal.type = static_cast<TokenType>(buffer[i++]);
literal.open_delimiter = buffer[i++];
literal.close_delimiter = buffer[i++];
literal.nesting_depth = buffer[i++];
literal.allows_interpolation = buffer[i++];
literal_stack.push_back(literal);
}
uint8_t open_heredoc_count = buffer[i++];
for (unsigned j = 0; j < open_heredoc_count; j++) {
Heredoc heredoc;
heredoc.end_word_indentation_allowed = buffer[i++];
heredoc.allows_interpolation = buffer[i++];
heredoc.started = buffer[i++];
uint8_t word_length = buffer[i++];
heredoc.word.assign(buffer + i, buffer + i + word_length);
i += word_length;
open_heredocs.push_back(heredoc);
}
}
void skip(TSLexer *lexer) {
has_leading_whitespace = true;
lexer->advance(lexer, true);
}
void advance(TSLexer *lexer) {
lexer->advance(lexer, false);
}
bool scan_whitespace(TSLexer *lexer, const bool *valid_symbols) {
bool heredoc_body_start_is_valid =
!open_heredocs.empty() &&
!open_heredocs[0].started &&
valid_symbols[HEREDOC_BODY_START];
bool crossed_newline = false;
for (;;) {
if (!valid_symbols[NO_LINE_BREAK] && valid_symbols[LINE_BREAK] && lexer->is_at_included_range_start(lexer)) {
lexer->mark_end(lexer);
lexer->result_symbol = LINE_BREAK;
return true;
}
switch (lexer->lookahead) {
case ' ':
case '\t':
skip(lexer);
break;
case '\r':
if (heredoc_body_start_is_valid) {
lexer->result_symbol = HEREDOC_BODY_START;
open_heredocs[0].started = true;
return true;
} else {
skip(lexer);
break;
}
case '\n':
if (heredoc_body_start_is_valid) {
lexer->result_symbol = HEREDOC_BODY_START;
open_heredocs[0].started = true;
return true;
} else if (!valid_symbols[NO_LINE_BREAK] && valid_symbols[LINE_BREAK] && !crossed_newline) {
lexer->mark_end(lexer);
advance(lexer);
crossed_newline = true;
} else {
skip(lexer);
}
break;
case '\\':
advance(lexer);
if ('\r' == lexer->lookahead) {
skip(lexer);
}
if (iswspace(lexer->lookahead)) {
skip(lexer);
} else {
return false;
}
break;
default:
if (crossed_newline && lexer->lookahead != '.' && lexer->lookahead != '&' && lexer->lookahead != '#') {
lexer->result_symbol = LINE_BREAK;
}
return true;
}
}
}
bool scan_operator(TSLexer *lexer) {
switch (lexer->lookahead) {
case '<':
advance(lexer);
if (lexer->lookahead == '<')
advance(lexer);
else if (lexer->lookahead == '=') {
advance(lexer);
if (lexer->lookahead == '>')
advance(lexer);
}
return true;
case '>':
advance(lexer);
if (lexer->lookahead == '>')
advance(lexer);
else if (lexer->lookahead == '=')
advance(lexer);
return true;
case '=':
advance(lexer);
if (lexer->lookahead == '~') {
advance(lexer);
return true;
} else if (lexer->lookahead == '=') {
advance(lexer);
if (lexer->lookahead == '=')
advance(lexer);
return true;
}
return false;
case '+':
case '-':
case '~':
advance(lexer);
if (lexer->lookahead == '@')
advance(lexer);
return true;
case '.':
advance(lexer);
if (lexer->lookahead == '.') {
advance(lexer);
return true;
}
return false;
case '&':
case '^':
case '|':
case '/':
case '%':
case '`':
advance(lexer);
return true;
case '!':
advance(lexer);
if (lexer->lookahead == '=' || lexer->lookahead == '~')
advance(lexer);
return true;
case '*':
advance(lexer);
if (lexer->lookahead == '*')
advance(lexer);
return true;
case '[':
advance(lexer);
if (lexer->lookahead == ']')
advance(lexer);
else
return false;
if (lexer->lookahead == '=')
advance(lexer);
return true;
default:
return false;
}
}
bool is_iden_char(char c) {
return memchr(&NON_IDENTIFIER_CHARS, c, sizeof(NON_IDENTIFIER_CHARS)) == NULL;
}
bool scan_symbol_identifier(TSLexer *lexer) {
if (lexer->lookahead == '@') {
advance(lexer);
if (lexer->lookahead == '@') {
advance(lexer);
}
} else if (lexer->lookahead == '$') {
advance(lexer);
}
if (is_iden_char(lexer->lookahead)) {
advance(lexer);
} else if (!scan_operator(lexer)) {
return false;
}
while (is_iden_char(lexer->lookahead)) {
advance(lexer);
}
if (lexer->lookahead == '?' || lexer->lookahead == '!') {
advance(lexer);
}
if (lexer->lookahead == '=') {
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead != '>') {
lexer->mark_end(lexer);
}
}
return true;
}
bool scan_open_delimiter(TSLexer *lexer, Literal &literal, const bool *valid_symbols) {
switch (lexer->lookahead) {
case '"':
literal.type = STRING_START;
literal.open_delimiter = literal.close_delimiter = lexer->lookahead;
literal.allows_interpolation = true;
advance(lexer);
return true;
case '\'':
literal.type = STRING_START;
literal.open_delimiter = literal.close_delimiter = lexer->lookahead;
literal.allows_interpolation = false;
advance(lexer);
return true;
case '`':
if (!valid_symbols[SUBSHELL_START])
return false;
literal.type = SUBSHELL_START;
literal.open_delimiter = literal.close_delimiter = lexer->lookahead;
literal.allows_interpolation = true;
advance(lexer);
return true;
case '/':
if (!valid_symbols[REGEX_START])
return false;
literal.type = REGEX_START;
literal.open_delimiter = literal.close_delimiter = lexer->lookahead;
literal.allows_interpolation = true;
advance(lexer);
if (valid_symbols[FORWARD_SLASH]) {
if (!has_leading_whitespace)
return false;
if (lexer->lookahead == ' ' || lexer->lookahead == '\t')
return false;
if (lexer->lookahead == '=')
return false;
}
return true;
case '%':
advance(lexer);
switch (lexer->lookahead) {
case 's':
if (!valid_symbols[SIMPLE_SYMBOL])
return false;
literal.type = SYMBOL_START;
literal.allows_interpolation = false;
advance(lexer);
break;
case 'r':
if (!valid_symbols[REGEX_START])
return false;
literal.type = REGEX_START;
literal.allows_interpolation = true;
advance(lexer);
break;
case 'x':
if (!valid_symbols[SUBSHELL_START])
return false;
literal.type = SUBSHELL_START;
literal.allows_interpolation = true;
advance(lexer);
break;
case 'q':
if (!valid_symbols[STRING_START])
return false;
literal.type = STRING_START;
literal.allows_interpolation = false;
advance(lexer);
break;
case 'Q':
if (!valid_symbols[STRING_START])
return false;
literal.type = STRING_START;
literal.allows_interpolation = true;
advance(lexer);
break;
case 'w':
if (!valid_symbols[STRING_ARRAY_START])
return false;
literal.type = STRING_ARRAY_START;
literal.allows_interpolation = false;
advance(lexer);
break;
case 'i':
if (!valid_symbols[SYMBOL_ARRAY_START])
return false;
literal.type = SYMBOL_ARRAY_START;
literal.allows_interpolation = false;
advance(lexer);
break;
case 'W':
if (!valid_symbols[STRING_ARRAY_START])
return false;
literal.type = STRING_ARRAY_START;
literal.allows_interpolation = true;
advance(lexer);
break;
case 'I':
if (!valid_symbols[SYMBOL_ARRAY_START])
return false;
literal.type = SYMBOL_ARRAY_START;
literal.allows_interpolation = true;
advance(lexer);
break;
default:
if (!valid_symbols[STRING_START])
return false;
literal.type = STRING_START;
literal.allows_interpolation = true;
break;
}
switch (lexer->lookahead) {
case '(':
literal.open_delimiter = '(';
literal.close_delimiter = ')';
break;
case '[':
literal.open_delimiter = '[';
literal.close_delimiter = ']';
break;
case '{':
literal.open_delimiter = '{';
literal.close_delimiter = '}';
break;
case '<':
literal.open_delimiter = '<';
literal.close_delimiter = '>';
break;
case '\r':
case '\n':
case ' ':
case '\t':
if (valid_symbols[FORWARD_SLASH])
return false;
case '|':
case '!':
case '#':
case '/':
case '\\':
case '@':
case '$':
case '%':
case '^':
case '&':
case '*':
case ')':
case ']':
case '}':
case '>':
case '+':
case '-':
case '~':
case '`':
case ',':
case '.':
case '?':
case ':':
case ';':
case '_':
case '"':
case '\'':
literal.open_delimiter = lexer->lookahead;
literal.close_delimiter = lexer->lookahead;
break;
default:
return false;
}
advance(lexer);
return true;
default:
return false;
}
}
void scan_heredoc_word(TSLexer *lexer, Heredoc *heredoc) {
string word;
int32_t quote = 0;
switch (lexer->lookahead) {
case '\'':
case '"':
case '`':
quote = lexer->lookahead;
advance(lexer);
while (lexer->lookahead != quote && !lexer->eof(lexer)) {
word += lexer->lookahead;
advance(lexer);
}
advance(lexer);
break;
default:
if (iswalnum(lexer->lookahead) || lexer->lookahead == '_') {
word += lexer->lookahead;
advance(lexer);
while (iswalnum(lexer->lookahead) || lexer->lookahead == '_') {
word += lexer->lookahead;
advance(lexer);
}
}
break;
}
heredoc->word = word;
heredoc->allows_interpolation = quote != '\'';
}
bool scan_heredoc_content(TSLexer *lexer) {
Heredoc heredoc = open_heredocs.front();
size_t position_in_word = 0;
bool look_for_heredoc_end = true;
bool has_content = false;
for (;;) {
if (position_in_word == heredoc.word.size()) {
if (!has_content)
lexer->mark_end(lexer);
while (lexer->lookahead == ' ' ||
lexer->lookahead == '\t')
advance(lexer);
if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
if (has_content) {
lexer->result_symbol = HEREDOC_CONTENT;
} else {
open_heredocs.erase(open_heredocs.begin());
lexer->result_symbol = HEREDOC_BODY_END;
}
return true;
} else {
has_content = true;
position_in_word = 0;
}
}
if (lexer->eof(lexer)) {
lexer->mark_end(lexer);
if (has_content) {
lexer->result_symbol = HEREDOC_CONTENT;
} else {
open_heredocs.erase(open_heredocs.begin());
lexer->result_symbol = HEREDOC_BODY_END;
}
return true;
}
if (lexer->lookahead == heredoc.word[position_in_word] && look_for_heredoc_end) {
advance(lexer);
position_in_word++;
} else {
position_in_word = 0;
look_for_heredoc_end = false;
if (heredoc.allows_interpolation && lexer->lookahead == '\\') {
if (has_content) {
lexer->result_symbol = HEREDOC_CONTENT;
return true;
} else {
return false;
}
}
if (heredoc.allows_interpolation && lexer->lookahead == '#') {
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == '{') {
if (has_content) {
lexer->result_symbol = HEREDOC_CONTENT;
return true;
} else {
return false;
}
}
if (scan_short_interpolation(lexer, has_content, HEREDOC_CONTENT)) {
return true;
}
} else if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
if (lexer->lookahead == '\r') {
advance(lexer);
if (lexer->lookahead == '\n') {
advance(lexer);
}
} else {
advance(lexer);
}
has_content = true;
look_for_heredoc_end = true;
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
advance(lexer);
if (!heredoc.end_word_indentation_allowed)
look_for_heredoc_end = false;
}
lexer->mark_end(lexer);
} else {
has_content = true;
advance(lexer);
lexer->mark_end(lexer);
}
}
}
}
bool scan_literal_content(TSLexer *lexer) {
Literal &literal = literal_stack.back();
bool has_content = false;
bool stop_on_space = literal.type == SYMBOL_ARRAY_START || literal.type == STRING_ARRAY_START;
for (;;) {
if (stop_on_space && iswspace(lexer->lookahead)) {
if (has_content) {
lexer->mark_end(lexer);
lexer->result_symbol = STRING_CONTENT;
return true;
} else {
return false;
}
} else if (lexer->lookahead == literal.close_delimiter) {
lexer->mark_end(lexer);
if (literal.nesting_depth == 1) {
if (has_content) {
lexer->result_symbol = STRING_CONTENT;
} else {
advance(lexer);
if (literal.type == REGEX_START) {
while (iswlower(lexer->lookahead))
advance(lexer);
}
literal_stack.pop_back();
lexer->result_symbol = STRING_END;
lexer->mark_end(lexer);
}
return true;
} else {
literal.nesting_depth--;
advance(lexer);
}
} else if (lexer->lookahead == literal.open_delimiter) {
literal.nesting_depth++;
advance(lexer);
} else if (literal.allows_interpolation && lexer->lookahead == '#') {
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == '{') {
if (has_content) {
lexer->result_symbol = STRING_CONTENT;
return true;
} else {
return false;
}
}
if (scan_short_interpolation(lexer, has_content, STRING_CONTENT)) {
return true;
}
} else if (lexer->lookahead == '\\') {
if (literal.allows_interpolation) {
if (has_content) {
lexer->mark_end(lexer);
lexer->result_symbol = STRING_CONTENT;
return true;
} else {
return false;
}
} else {
advance(lexer);
advance(lexer);
}
} else if (lexer->eof(lexer)) {
advance(lexer);
lexer->mark_end(lexer);
return false;
} else {
advance(lexer);
}
has_content = true;
}
}
bool scan_short_interpolation(TSLexer *lexer, const bool has_content, const TSSymbol content_symbol) {
char start = lexer->lookahead;
if (start == '@' || start == '$') {
if (has_content) {
lexer->result_symbol = content_symbol;
return true;
} else {
lexer->mark_end(lexer);
advance(lexer);
bool is_short_interpolation = false;
if (start == '$') {
if (strchr("!@&`'+~=/\\,;.<>*$?:\"", lexer->lookahead) != NULL) {
is_short_interpolation = true;
} else {
if (lexer->lookahead == '-') {
advance(lexer);
is_short_interpolation = iswalpha(lexer->lookahead) || lexer->lookahead == '_';
} else {
is_short_interpolation = iswalnum(lexer->lookahead) || lexer->lookahead == '_';
}
}
}
if (start == '@') {
if (lexer->lookahead == '@') {
advance(lexer);
}
is_short_interpolation = is_iden_char(lexer->lookahead) && !iswdigit(lexer->lookahead);
}
if (is_short_interpolation) {
lexer->result_symbol = SHORT_INTERPOLATION;
return true;
}
}
}
return false;
}
bool scan(TSLexer *lexer, const bool *valid_symbols) {
has_leading_whitespace = false;
if (!valid_symbols[STRING_START]) {
if (
(valid_symbols[STRING_CONTENT] || valid_symbols[STRING_END]) &&
!literal_stack.empty())
return scan_literal_content(lexer);
if (
(valid_symbols[HEREDOC_CONTENT] || valid_symbols[HEREDOC_BODY_END]) &&
!open_heredocs.empty())
return scan_heredoc_content(lexer);
}
lexer->result_symbol = NONE;
if (!scan_whitespace(lexer, valid_symbols))
return false;
if (lexer->result_symbol != NONE)
return true;
switch (lexer->lookahead) {
case '&':
if (valid_symbols[BLOCK_AMPERSAND]) {
advance(lexer);
if (lexer->lookahead != '&' &&
lexer->lookahead != '.' &&
lexer->lookahead != '=' &&
!iswspace(lexer->lookahead)) {
lexer->result_symbol = BLOCK_AMPERSAND;
return true;
} else {
return false;
}
}
break;
case '<':
if (valid_symbols[SINGLETON_CLASS_LEFT_ANGLE_LEFT_ANGLE]) {
advance(lexer);
if (lexer->lookahead == '<') {
advance(lexer);
lexer->result_symbol = SINGLETON_CLASS_LEFT_ANGLE_LEFT_ANGLE;
return true;
} else {
return false;
}
}
break;
case '*':
if (valid_symbols[SPLAT_STAR] || valid_symbols[BINARY_STAR] ||
valid_symbols[HASH_SPLAT_STAR_STAR] || valid_symbols[BINARY_STAR_STAR]) {
advance(lexer);
if (lexer->lookahead == '=')
return false;
if (lexer->lookahead == '*') {
if (valid_symbols[HASH_SPLAT_STAR_STAR] || valid_symbols[BINARY_STAR_STAR]) {
advance(lexer);
if (lexer->lookahead == '=')
return false;
if (valid_symbols[BINARY_STAR_STAR] && !has_leading_whitespace) {
lexer->result_symbol = BINARY_STAR_STAR;
return true;
} else if (valid_symbols[HASH_SPLAT_STAR_STAR] && !iswspace(lexer->lookahead)) {
lexer->result_symbol = HASH_SPLAT_STAR_STAR;
return true;
} else if (valid_symbols[BINARY_STAR_STAR]) {
lexer->result_symbol = BINARY_STAR_STAR;
return true;
} else if (valid_symbols[HASH_SPLAT_STAR_STAR]) {
lexer->result_symbol = HASH_SPLAT_STAR_STAR;
return true;
} else {
return false;
}
} else {
return false;
}
} else {
if (valid_symbols[BINARY_STAR] && !has_leading_whitespace) {
lexer->result_symbol = BINARY_STAR;
return true;
} else if (valid_symbols[SPLAT_STAR] && !iswspace(lexer->lookahead)) {
lexer->result_symbol = SPLAT_STAR;
return true;
} else if (valid_symbols[BINARY_STAR]) {
lexer->result_symbol = BINARY_STAR;
return true;
} else if (valid_symbols[SPLAT_STAR]) {
lexer->result_symbol = SPLAT_STAR;
return true;
} else {
return false;
}
}
}
break;
case '-':
if (valid_symbols[UNARY_MINUS] || valid_symbols[UNARY_MINUS_NUM] || valid_symbols[BINARY_MINUS]) {
advance(lexer);
if (lexer->lookahead != '=' && lexer->lookahead != '>') {
if (valid_symbols[UNARY_MINUS_NUM] && (!valid_symbols[BINARY_STAR] || has_leading_whitespace) && iswdigit(lexer->lookahead)) {
lexer->result_symbol = UNARY_MINUS_NUM;
return true;
} else if (valid_symbols[UNARY_MINUS] && has_leading_whitespace && !iswspace(lexer->lookahead)) {
lexer->result_symbol = UNARY_MINUS;
} else if (valid_symbols[BINARY_MINUS]) {
lexer->result_symbol = BINARY_MINUS;
} else {
lexer->result_symbol = UNARY_MINUS;
}
return true;
} else {
return false;
}
}
break;
case ':':
if (valid_symbols[SYMBOL_START]) {
Literal literal;
literal.type = SYMBOL_START;
literal.nesting_depth = 1;
advance(lexer);
switch (lexer->lookahead) {
case '"':
advance(lexer);
literal.open_delimiter = '"';
literal.close_delimiter = '"';
literal.allows_interpolation = true;
literal_stack.push_back(literal);
lexer->result_symbol = SYMBOL_START;
return true;
case '\'':
advance(lexer);
literal.open_delimiter = '\'';
literal.close_delimiter = '\'';
literal.allows_interpolation = false;
literal_stack.push_back(literal);
lexer->result_symbol = SYMBOL_START;
return true;
default:
if (scan_symbol_identifier(lexer)) {
lexer->result_symbol = SIMPLE_SYMBOL;
return true;
}
}
return false;
}
break;
case '[':
if (valid_symbols[ELEMENT_REFERENCE_BRACKET] && (!has_leading_whitespace ||
!valid_symbols[STRING_START])) {
advance(lexer);
lexer->result_symbol = ELEMENT_REFERENCE_BRACKET;
return true;
}
break;
default:
break;
}
if ((valid_symbols[HASH_KEY_SYMBOL] || valid_symbols[IDENTIFIER_SUFFIX]) && (iswalpha(lexer->lookahead) || lexer->lookahead == '_') ||
valid_symbols[CONSTANT_SUFFIX] && iswupper(lexer->lookahead)) {
TokenType validIdentifierSymbol = iswupper(lexer->lookahead) ? CONSTANT_SUFFIX : IDENTIFIER_SUFFIX;
char word[8];
int index = 0;
while (iswalnum(lexer->lookahead) || lexer->lookahead == '_') {
if (index < 8) {
word[index] = lexer->lookahead;
}
index++;
advance(lexer);
}
if (valid_symbols[HASH_KEY_SYMBOL] && lexer->lookahead == ':') {
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead != ':') {
lexer->result_symbol = HASH_KEY_SYMBOL;
return true;
}
} else if (valid_symbols[validIdentifierSymbol] && lexer->lookahead == '!') {
advance(lexer);
if (lexer->lookahead != '=') {
lexer->result_symbol = validIdentifierSymbol;
return true;
}
}
return false;
}
if (valid_symbols[STRING_START]) {
Literal literal;
literal.nesting_depth = 1;
if (lexer->lookahead == '<') {
advance(lexer);
if (lexer->lookahead != '<')
return false;
advance(lexer);
Heredoc heredoc;
if (lexer->lookahead == '-' || lexer->lookahead == '~') {
advance(lexer);
heredoc.end_word_indentation_allowed = true;
}
scan_heredoc_word(lexer, &heredoc);
if (heredoc.word.empty())
return false;
open_heredocs.push_back(heredoc);
lexer->result_symbol = HEREDOC_START;
return true;
}
if (scan_open_delimiter(lexer, literal, valid_symbols)) {
literal_stack.push_back(literal);
lexer->result_symbol = literal.type;
return true;
} else {
return false;
}
}
return false;
}
bool has_leading_whitespace;
vector<Literal> literal_stack;
vector<Heredoc> open_heredocs;
};
}
extern "C" {
void *tree_sitter_ruby_external_scanner_create() {
return new Scanner();
}
bool tree_sitter_ruby_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
Scanner *scanner = static_cast<Scanner *>(payload);
return scanner->scan(lexer, valid_symbols);
}
unsigned tree_sitter_ruby_external_scanner_serialize(void *payload, char *buffer) {
Scanner *scanner = static_cast<Scanner *>(payload);
return scanner->serialize(buffer);
}
void tree_sitter_ruby_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = static_cast<Scanner *>(payload);
scanner->deserialize(buffer, length);
}
void tree_sitter_ruby_external_scanner_destroy(void *payload) {
Scanner *scanner = static_cast<Scanner *>(payload);
delete scanner;
}
}