#include "tree_sitter/alloc.h"
#include "tree_sitter/parser.h"
#include <wctype.h>
enum TokenType {
STRING_CONTENT,
RAW_STRING_LITERAL_START,
RAW_STRING_LITERAL_CONTENT,
RAW_STRING_LITERAL_END,
FLOAT_LITERAL,
BLOCK_OUTER_DOC_MARKER,
BLOCK_INNER_DOC_MARKER,
BLOCK_COMMENT_CONTENT,
LINE_DOC_CONTENT,
ERROR_SENTINEL
};
typedef struct {
uint8_t opening_hash_count;
} Scanner;
void *tree_sitter_rust_external_scanner_create() { return ts_calloc(1, sizeof(Scanner)); }
void tree_sitter_rust_external_scanner_destroy(void *payload) { ts_free((Scanner *)payload); }
unsigned tree_sitter_rust_external_scanner_serialize(void *payload, char *buffer) {
Scanner *scanner = (Scanner *)payload;
buffer[0] = (char)scanner->opening_hash_count;
return 1;
}
void tree_sitter_rust_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = (Scanner *)payload;
scanner->opening_hash_count = 0;
if (length == 1) {
Scanner *scanner = (Scanner *)payload;
scanner->opening_hash_count = buffer[0];
}
}
static inline bool is_num_char(int32_t c) { return c == '_' || iswdigit(c); }
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
static inline bool process_string(TSLexer *lexer) {
bool has_content = false;
for (;;) {
if (lexer->lookahead == '\"' || lexer->lookahead == '\\') {
break;
}
if (lexer->eof(lexer)) {
return false;
}
has_content = true;
advance(lexer);
}
lexer->result_symbol = STRING_CONTENT;
lexer->mark_end(lexer);
return has_content;
}
static inline bool scan_raw_string_start(Scanner *scanner, TSLexer *lexer) {
if (lexer->lookahead == 'b' || lexer->lookahead == 'c') {
advance(lexer);
}
if (lexer->lookahead != 'r') {
return false;
}
advance(lexer);
uint8_t opening_hash_count = 0;
while (lexer->lookahead == '#') {
advance(lexer);
opening_hash_count++;
}
if (lexer->lookahead != '"') {
return false;
}
advance(lexer);
scanner->opening_hash_count = opening_hash_count;
lexer->result_symbol = RAW_STRING_LITERAL_START;
return true;
}
static inline bool scan_raw_string_content(Scanner *scanner, TSLexer *lexer) {
for (;;) {
if (lexer->eof(lexer)) {
return false;
}
if (lexer->lookahead == '"') {
lexer->mark_end(lexer);
advance(lexer);
unsigned hash_count = 0;
while (lexer->lookahead == '#' && hash_count < scanner->opening_hash_count) {
advance(lexer);
hash_count++;
}
if (hash_count == scanner->opening_hash_count) {
lexer->result_symbol = RAW_STRING_LITERAL_CONTENT;
return true;
}
} else {
advance(lexer);
}
}
}
static inline bool scan_raw_string_end(Scanner *scanner, TSLexer *lexer) {
advance(lexer);
for (unsigned i = 0; i < scanner->opening_hash_count; i++) {
advance(lexer);
}
lexer->result_symbol = RAW_STRING_LITERAL_END;
return true;
}
static inline bool process_float_literal(TSLexer *lexer) {
lexer->result_symbol = FLOAT_LITERAL;
advance(lexer);
while (is_num_char(lexer->lookahead)) {
advance(lexer);
}
bool has_fraction = false, has_exponent = false;
if (lexer->lookahead == '.') {
has_fraction = true;
advance(lexer);
if (iswalpha(lexer->lookahead)) {
return false;
}
if (lexer->lookahead == '.') {
return false;
}
while (is_num_char(lexer->lookahead)) {
advance(lexer);
}
}
lexer->mark_end(lexer);
if (lexer->lookahead == 'e' || lexer->lookahead == 'E') {
has_exponent = true;
advance(lexer);
if (lexer->lookahead == '+' || lexer->lookahead == '-') {
advance(lexer);
}
if (!is_num_char(lexer->lookahead)) {
return true;
}
advance(lexer);
while (is_num_char(lexer->lookahead)) {
advance(lexer);
}
lexer->mark_end(lexer);
}
if (!has_exponent && !has_fraction) {
return false;
}
if (lexer->lookahead != 'u' && lexer->lookahead != 'i' && lexer->lookahead != 'f') {
return true;
}
advance(lexer);
if (!iswdigit(lexer->lookahead)) {
return true;
}
while (iswdigit(lexer->lookahead)) {
advance(lexer);
}
lexer->mark_end(lexer);
return true;
}
static inline bool process_line_doc_content(TSLexer *lexer) {
lexer->result_symbol = LINE_DOC_CONTENT;
for (;;) {
if (lexer->eof(lexer)) {
return true;
}
if (lexer->lookahead == '\n') {
advance(lexer);
return true;
}
advance(lexer);
}
}
typedef enum {
LeftForwardSlash,
LeftAsterisk,
Continuing,
} BlockCommentState;
typedef struct {
BlockCommentState state;
unsigned nestingDepth;
} BlockCommentProcessing;
static inline void process_left_forward_slash(BlockCommentProcessing *processing, char current) {
if (current == '*') {
processing->nestingDepth += 1;
}
processing->state = Continuing;
};
static inline void process_left_asterisk(BlockCommentProcessing *processing, char current, TSLexer *lexer) {
if (current == '*') {
lexer->mark_end(lexer);
processing->state = LeftAsterisk;
return;
}
if (current == '/') {
processing->nestingDepth -= 1;
}
processing->state = Continuing;
}
static inline void process_continuing(BlockCommentProcessing *processing, char current) {
switch (current) {
case '/':
processing->state = LeftForwardSlash;
break;
case '*':
processing->state = LeftAsterisk;
break;
}
}
static inline bool process_block_comment(TSLexer *lexer, const bool *valid_symbols) {
char first = (char)lexer->lookahead;
if (valid_symbols[BLOCK_INNER_DOC_MARKER] && first == '!') {
lexer->result_symbol = BLOCK_INNER_DOC_MARKER;
advance(lexer);
return true;
}
if (valid_symbols[BLOCK_OUTER_DOC_MARKER] && first == '*') {
advance(lexer);
lexer->mark_end(lexer);
if (lexer->lookahead == '/') {
return false;
}
if (lexer->lookahead != '*') {
lexer->result_symbol = BLOCK_OUTER_DOC_MARKER;
return true;
}
} else {
advance(lexer);
}
if (valid_symbols[BLOCK_COMMENT_CONTENT]) {
BlockCommentProcessing processing = {Continuing, 1};
switch (first) {
case '*':
processing.state = LeftAsterisk;
if (lexer->lookahead == '/') {
return false;
}
break;
case '/':
processing.state = LeftForwardSlash;
break;
default:
processing.state = Continuing;
break;
}
while (!lexer->eof(lexer) && processing.nestingDepth != 0) {
first = (char)lexer->lookahead;
switch (processing.state) {
case LeftForwardSlash:
process_left_forward_slash(&processing, first);
break;
case LeftAsterisk:
process_left_asterisk(&processing, first, lexer);
break;
case Continuing:
lexer->mark_end(lexer);
process_continuing(&processing, first);
break;
default:
break;
}
advance(lexer);
if (first == '/' && processing.nestingDepth != 0) {
lexer->mark_end(lexer);
}
}
lexer->result_symbol = BLOCK_COMMENT_CONTENT;
return true;
}
return false;
}
bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
if (valid_symbols[ERROR_SENTINEL]) {
return false;
}
Scanner *scanner = (Scanner *)payload;
if (valid_symbols[BLOCK_COMMENT_CONTENT] || valid_symbols[BLOCK_INNER_DOC_MARKER] ||
valid_symbols[BLOCK_OUTER_DOC_MARKER]) {
return process_block_comment(lexer, valid_symbols);
}
if (valid_symbols[STRING_CONTENT] && !valid_symbols[FLOAT_LITERAL]) {
return process_string(lexer);
}
if (valid_symbols[LINE_DOC_CONTENT]) {
return process_line_doc_content(lexer);
}
while (iswspace(lexer->lookahead)) {
skip(lexer);
}
if (valid_symbols[RAW_STRING_LITERAL_START] &&
(lexer->lookahead == 'r' || lexer->lookahead == 'b' || lexer->lookahead == 'c')) {
return scan_raw_string_start(scanner, lexer);
}
if (valid_symbols[RAW_STRING_LITERAL_CONTENT]) {
return scan_raw_string_content(scanner, lexer);
}
if (valid_symbols[RAW_STRING_LITERAL_END] && lexer->lookahead == '"') {
return scan_raw_string_end(scanner, lexer);
}
if (valid_symbols[FLOAT_LITERAL] && iswdigit(lexer->lookahead)) {
return process_float_literal(lexer);
}
return false;
}