#include <string.h>
#include <wctype.h>
#include "tree_sitter/alloc.h"
#include "tree_sitter/parser.h"
#ifdef TREE_SITTER_R_DEBUG
#include <stdarg.h>
#include <stdio.h>
static inline void debug_print(const char* fmt, ...) {
va_list args;
va_start(args, fmt);
vprintf(fmt, args);
va_end(args);
}
#else
#define debug_print(...)
#endif
enum TokenType {
START,
NEWLINE,
SEMICOLON,
RAW_STRING_LITERAL,
ELSE,
OPEN_PAREN,
CLOSE_PAREN,
OPEN_BRACE,
CLOSE_BRACE,
OPEN_BRACKET,
CLOSE_BRACKET,
OPEN_BRACKET2,
CLOSE_BRACKET2,
ERROR_SENTINEL
};
typedef char Scope;
const Scope SCOPE_TOP_LEVEL = 0;
const Scope SCOPE_BRACE = 1;
const Scope SCOPE_PAREN = 2;
const Scope SCOPE_BRACKET = 3;
const Scope SCOPE_BRACKET2 = 4;
typedef struct {
Scope* arr;
unsigned len;
} Stack;
static Stack* stack_new(void) {
Scope* arr = ts_malloc(TREE_SITTER_SERIALIZATION_BUFFER_SIZE);
if (arr == NULL) {
debug_print("`stack_new()` failed. Can't allocate scope array.");
return NULL;
}
Stack* stack = ts_malloc(sizeof(Stack));
if (stack == NULL) {
debug_print("`stack_new()` failed. Can't allocate stack.");
return NULL;
}
stack->arr = arr;
stack->len = 0;
return stack;
}
static void stack_free(Stack* stack) {
ts_free(stack->arr);
ts_free(stack);
}
static bool stack_push(Stack* stack, Scope scope) {
if (stack->len >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
debug_print("`stack_push()` failed. Stack is at maximum capacity.\n");
return false;
}
stack->arr[stack->len] = scope;
stack->len++;
return true;
}
static Scope stack_peek(Stack* stack) {
if (stack->len == 0) {
return SCOPE_TOP_LEVEL;
} else {
return stack->arr[stack->len - 1];
}
}
static bool stack_pop(Stack* stack, Scope scope) {
if (stack->len == 0) {
debug_print("`stack_pop()` failed. Stack is empty, nothing to pop.\n");
return false;
}
Scope x = stack_peek(stack);
stack->len--;
if (x != scope) {
debug_print(
"`stack_pop()` failed. Actual scope '%c' does not match expected scope '%c'.\n",
x,
scope
);
return false;
}
return true;
}
static unsigned stack_serialize(Stack* stack, char* buffer) {
unsigned len = stack->len;
if (len > 0) {
memcpy(buffer, stack->arr, len);
}
return len;
}
static void stack_deserialize(Stack* stack, const char* buffer, unsigned len) {
if (len > 0) {
memcpy(stack->arr, buffer, len);
}
stack->len = len;
}
static inline bool stack_exists(void* stack) {
return stack != NULL;
}
static inline void consume_whitespace_and_ignored_newlines(TSLexer* lexer, Stack* stack) {
while (iswspace(lexer->lookahead)) {
if (lexer->lookahead != '\n') {
lexer->advance(lexer, true);
continue;
}
Scope scope = stack_peek(stack);
if (scope == SCOPE_PAREN || scope == SCOPE_BRACKET || scope == SCOPE_BRACKET2) {
lexer->advance(lexer, true);
continue;
}
break;
}
}
static inline bool scan_else(TSLexer* lexer) {
if (lexer->lookahead != 'e') {
return false;
}
lexer->advance(lexer, false);
if (lexer->lookahead != 'l') {
return false;
}
lexer->advance(lexer, false);
if (lexer->lookahead != 's') {
return false;
}
lexer->advance(lexer, false);
if (lexer->lookahead != 'e') {
return false;
}
lexer->advance(lexer, false);
lexer->mark_end(lexer);
lexer->result_symbol = ELSE;
return true;
}
static inline bool scan_else_with_leading_newlines(TSLexer* lexer) {
while (iswspace(lexer->lookahead)) {
if (lexer->lookahead != '\n') {
lexer->advance(lexer, true);
continue;
}
lexer->advance(lexer, true);
lexer->mark_end(lexer);
lexer->result_symbol = NEWLINE;
}
if (lexer->lookahead == '#') {
return false;
}
scan_else(lexer);
return true;
}
static inline bool scan_raw_string_literal(TSLexer* lexer) {
lexer->mark_end(lexer);
char prefix = lexer->lookahead;
if (prefix != 'r' && prefix != 'R') {
return false;
}
lexer->advance(lexer, false);
char closing_quote = lexer->lookahead;
if (closing_quote != '"' && closing_quote != '\'') {
return false;
}
lexer->advance(lexer, false);
int hyphen_count = 0;
while (lexer->lookahead == '-') {
lexer->advance(lexer, false);
hyphen_count += 1;
}
char opening_bracket = lexer->lookahead;
char closing_bracket = 0;
if (opening_bracket == '(') {
closing_bracket = ')';
lexer->advance(lexer, false);
} else if (opening_bracket == '[') {
closing_bracket = ']';
lexer->advance(lexer, false);
} else if (opening_bracket == '{') {
closing_bracket = '}';
lexer->advance(lexer, false);
} else {
return false;
}
while (!lexer->eof(lexer)) {
if (lexer->lookahead != closing_bracket) {
lexer->advance(lexer, false);
continue;
}
lexer->advance(lexer, false);
bool matched_hyphens = true;
for (int i = 0; i < hyphen_count; i++) {
if (lexer->lookahead != '-') {
matched_hyphens = false;
break;
}
lexer->advance(lexer, false);
}
if (!matched_hyphens) {
continue;
}
if (lexer->lookahead != closing_quote) {
continue;
}
lexer->advance(lexer, false);
lexer->mark_end(lexer);
lexer->result_symbol = RAW_STRING_LITERAL;
return true;
}
return false;
}
static inline bool scan_semicolon(TSLexer* lexer) {
lexer->advance(lexer, false);
lexer->mark_end(lexer);
lexer->result_symbol = SEMICOLON;
return true;
}
static inline bool scan_newline(TSLexer* lexer) {
lexer->advance(lexer, false);
lexer->mark_end(lexer);
lexer->result_symbol = NEWLINE;
return true;
}
static inline bool
scan_open_block(TSLexer* lexer, Stack* stack, Scope scope, TSSymbol symbol) {
if (!stack_push(stack, scope)) {
return false;
}
lexer->advance(lexer, false);
lexer->mark_end(lexer);
lexer->result_symbol = symbol;
return true;
}
static inline bool
scan_close_block(TSLexer* lexer, Stack* stack, Scope scope, TSSymbol symbol) {
if (!stack_pop(stack, scope)) {
return false;
}
lexer->advance(lexer, false);
lexer->mark_end(lexer);
lexer->result_symbol = symbol;
return true;
}
static inline bool
scan_open_bracket_or_bracket2(TSLexer* lexer, Stack* stack, const bool* valid_symbols) {
lexer->advance(lexer, false);
if (valid_symbols[OPEN_BRACKET2] && lexer->lookahead == '[') {
if (!stack_push(stack, SCOPE_BRACKET2)) {
return false;
}
lexer->advance(lexer, false);
lexer->mark_end(lexer);
lexer->result_symbol = OPEN_BRACKET2;
return true;
}
if (valid_symbols[OPEN_BRACKET]) {
if (!stack_push(stack, SCOPE_BRACKET)) {
return false;
}
lexer->mark_end(lexer);
lexer->result_symbol = OPEN_BRACKET;
return true;
}
return false;
}
static inline bool scan_close_bracket2(TSLexer* lexer, Stack* stack) {
lexer->advance(lexer, false);
if (lexer->lookahead != ']') {
return false;
}
return scan_close_block(lexer, stack, SCOPE_BRACKET2, CLOSE_BRACKET2);
}
static bool scan(TSLexer* lexer, Stack* stack, const bool* valid_symbols) {
if (valid_symbols[ERROR_SENTINEL]) {
return false;
}
if (valid_symbols[START]) {
lexer->result_symbol = START;
return true;
}
consume_whitespace_and_ignored_newlines(lexer, stack);
if (valid_symbols[SEMICOLON] && lexer->lookahead == ';') {
return scan_semicolon(lexer);
} else if (valid_symbols[OPEN_PAREN] && lexer->lookahead == '(') {
return scan_open_block(lexer, stack, SCOPE_PAREN, OPEN_PAREN);
} else if (valid_symbols[CLOSE_PAREN] && lexer->lookahead == ')') {
return scan_close_block(lexer, stack, SCOPE_PAREN, CLOSE_PAREN);
} else if (valid_symbols[OPEN_BRACE] && lexer->lookahead == '{') {
return scan_open_block(lexer, stack, SCOPE_BRACE, OPEN_BRACE);
} else if (valid_symbols[CLOSE_BRACE] && lexer->lookahead == '}') {
return scan_close_block(lexer, stack, SCOPE_BRACE, CLOSE_BRACE);
} else if ((valid_symbols[OPEN_BRACKET] || valid_symbols[OPEN_BRACKET2]) && lexer->lookahead == '[') {
return scan_open_bracket_or_bracket2(lexer, stack, valid_symbols);
} else if (valid_symbols[CLOSE_BRACKET] && lexer->lookahead == ']' && stack_peek(stack) == SCOPE_BRACKET) {
return scan_close_block(lexer, stack, SCOPE_BRACKET, CLOSE_BRACKET);
} else if (valid_symbols[CLOSE_BRACKET2] && lexer->lookahead == ']' && stack_peek(stack) == SCOPE_BRACKET2) {
return scan_close_bracket2(lexer, stack);
} else if (valid_symbols[RAW_STRING_LITERAL] && (lexer->lookahead == 'r' || lexer->lookahead == 'R')) {
return scan_raw_string_literal(lexer);
} else if (valid_symbols[ELSE] && lexer->lookahead == 'e') {
return scan_else(lexer);
} else if (valid_symbols[ELSE] && stack_peek(stack) == SCOPE_BRACE && lexer->lookahead == '\n') {
return scan_else_with_leading_newlines(lexer);
} else if (valid_symbols[NEWLINE] && lexer->lookahead == '\n') {
return scan_newline(lexer);
}
return false;
}
void* tree_sitter_r_external_scanner_create(void) {
return stack_new();
}
bool tree_sitter_r_external_scanner_scan(
void* payload,
TSLexer* lexer,
const bool* valid_symbols
) {
if (stack_exists(payload)) {
return scan(lexer, payload, valid_symbols);
} else {
return false;
}
}
unsigned tree_sitter_r_external_scanner_serialize(void* payload, char* buffer) {
if (stack_exists(payload)) {
return stack_serialize(payload, buffer);
} else {
return 0;
}
}
void tree_sitter_r_external_scanner_deserialize(
void* payload,
const char* buffer,
unsigned length
) {
if (stack_exists(payload)) {
stack_deserialize(payload, buffer, length);
}
}
void tree_sitter_r_external_scanner_destroy(void* payload) {
if (stack_exists(payload)) {
stack_free(payload);
}
}