#include "tree_sitter/parser.h"
#include <wctype.h>
enum TokenType {
AUTOMATIC_SEMICOLON,
TEMPLATE_CHARS,
TERNARY_QMARK,
HTML_COMMENT,
LOGICAL_OR,
ESCAPE_SEQUENCE,
REGEX_PATTERN,
};
void *tree_sitter_javascript_external_scanner_create() { return NULL; }
void tree_sitter_javascript_external_scanner_destroy(void *p) {}
unsigned tree_sitter_javascript_external_scanner_serialize(void *p, char *buffer) { return 0; }
void tree_sitter_javascript_external_scanner_deserialize(void *p, const char *b, unsigned n) {}
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
static bool scan_template_chars(TSLexer *lexer) {
lexer->result_symbol = TEMPLATE_CHARS;
for (bool has_content = false;; has_content = true) {
lexer->mark_end(lexer);
switch (lexer->lookahead) {
case '`':
return has_content;
case '\0':
return false;
case '$':
advance(lexer);
if (lexer->lookahead == '{') {
return has_content;
}
break;
case '\\':
return has_content;
default:
advance(lexer);
}
}
}
typedef enum {
REJECT, NO_NEWLINE, ACCEPT, } WhitespaceResult;
static WhitespaceResult scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment, bool consume) {
bool saw_block_newline = false;
for (;;) {
while (iswspace(lexer->lookahead)) {
skip(lexer);
}
if (lexer->lookahead == '/') {
skip(lexer);
if (lexer->lookahead == '/') {
skip(lexer);
while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
lexer->lookahead != 0x2029) {
skip(lexer);
}
*scanned_comment = true;
} else if (lexer->lookahead == '*') {
skip(lexer);
while (lexer->lookahead != 0) {
if (lexer->lookahead == '*') {
skip(lexer);
if (lexer->lookahead == '/') {
skip(lexer);
*scanned_comment = true;
if (lexer->lookahead != '/' && !consume) {
return saw_block_newline ? ACCEPT : NO_NEWLINE;
}
break;
}
} else if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
saw_block_newline = true;
skip(lexer);
} else {
skip(lexer);
}
}
} else {
return REJECT;
}
} else {
return ACCEPT;
}
}
}
static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, bool *scanned_comment) {
lexer->result_symbol = AUTOMATIC_SEMICOLON;
lexer->mark_end(lexer);
for (;;) {
if (lexer->lookahead == 0) {
return true;
}
if (lexer->lookahead == '/') {
WhitespaceResult result = scan_whitespace_and_comments(lexer, scanned_comment, false);
if (result == REJECT) {
return false;
}
if (result == ACCEPT && comment_condition && lexer->lookahead != ',' && lexer->lookahead != '=') {
return true;
}
}
if (lexer->lookahead == '}') {
return true;
}
if (lexer->is_at_included_range_start(lexer)) {
return true;
}
if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
break;
}
if (!iswspace(lexer->lookahead)) {
return false;
}
skip(lexer);
}
skip(lexer);
if (scan_whitespace_and_comments(lexer, scanned_comment, true) == REJECT) {
return false;
}
switch (lexer->lookahead) {
case ',':
case ':':
case ';':
case '*':
case '%':
case '>':
case '<':
case '=':
case '[':
case '(':
case '?':
case '^':
case '|':
case '&':
case '/':
return false;
case '.':
skip(lexer);
return iswdigit(lexer->lookahead);
case '+':
skip(lexer);
return lexer->lookahead == '+';
case '-':
skip(lexer);
return lexer->lookahead == '-';
case '!':
skip(lexer);
return lexer->lookahead != '=';
case 'i':
skip(lexer);
if (lexer->lookahead != 'n') {
return true;
}
skip(lexer);
if (!iswalpha(lexer->lookahead)) {
return false;
}
for (unsigned i = 0; i < 8; i++) {
if (lexer->lookahead != "stanceof"[i]) {
return true;
}
skip(lexer);
}
if (!iswalpha(lexer->lookahead)) {
return false;
}
break;
default:
break;
}
return true;
}
static bool scan_ternary_qmark(TSLexer *lexer) {
for (;;) {
if (!iswspace(lexer->lookahead)) {
break;
}
skip(lexer);
}
if (lexer->lookahead == '?') {
advance(lexer);
if (lexer->lookahead == '?') {
return false;
}
lexer->mark_end(lexer);
lexer->result_symbol = TERNARY_QMARK;
if (lexer->lookahead == '.') {
advance(lexer);
if (iswdigit(lexer->lookahead)) {
return true;
}
return false;
}
return true;
}
return false;
}
static bool scan_html_comment(TSLexer *lexer) {
while (iswspace(lexer->lookahead) || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
skip(lexer);
}
const char *comment_start = "<!--";
const char *comment_end = "-->";
if (lexer->lookahead == '<') {
for (unsigned i = 0; i < 4; i++) {
if (lexer->lookahead != comment_start[i]) {
return false;
}
advance(lexer);
}
} else if (lexer->lookahead == '-') {
for (unsigned i = 0; i < 3; i++) {
if (lexer->lookahead != comment_end[i]) {
return false;
}
advance(lexer);
}
} else {
return false;
}
while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
lexer->lookahead != 0x2029) {
advance(lexer);
}
lexer->result_symbol = HTML_COMMENT;
lexer->mark_end(lexer);
return true;
}
bool tree_sitter_javascript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
if (valid_symbols[TEMPLATE_CHARS]) {
if (valid_symbols[AUTOMATIC_SEMICOLON]) {
return false;
}
return scan_template_chars(lexer);
}
if (valid_symbols[AUTOMATIC_SEMICOLON]) {
bool scanned_comment = false;
bool ret = scan_automatic_semicolon(lexer, !valid_symbols[LOGICAL_OR], &scanned_comment);
if (!ret && !scanned_comment && valid_symbols[TERNARY_QMARK] && lexer->lookahead == '?') {
return scan_ternary_qmark(lexer);
}
return ret;
}
if (valid_symbols[TERNARY_QMARK]) {
return scan_ternary_qmark(lexer);
}
if (valid_symbols[HTML_COMMENT] && !valid_symbols[LOGICAL_OR] && !valid_symbols[ESCAPE_SEQUENCE] &&
!valid_symbols[REGEX_PATTERN]) {
return scan_html_comment(lexer);
}
return false;
}