#include "tree_sitter/parser.h"
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#define DEBUG 0
#if DEBUG
#define LOG(...) fprintf(stderr, __VA_ARGS__)
#else
#define LOG(...)
#endif
enum TokenType {
NEWLINE,
INDENT,
DEDENT,
INTERPOLATION_START, INSTRUCTION_TEXT_SEGMENT, };
#define MAX_INDENT_DEPTH 100
typedef struct {
uint16_t indents[MAX_INDENT_DEPTH];
uint8_t indent_count;
int16_t pending_indent; } Scanner;
static inline void advance(TSLexer *lexer) {
lexer->advance(lexer, false);
}
static inline void skip(TSLexer *lexer) {
lexer->advance(lexer, true);
}
void *tree_sitter_agentscript_external_scanner_create(void) {
Scanner *scanner = calloc(1, sizeof(Scanner));
scanner->indents[0] = 0;
scanner->indent_count = 1;
scanner->pending_indent = -1; LOG("Scanner created, indent_count=%d\n", scanner->indent_count);
return scanner;
}
void tree_sitter_agentscript_external_scanner_destroy(void *payload) {
free(payload);
}
unsigned tree_sitter_agentscript_external_scanner_serialize(void *payload, char *buffer) {
Scanner *scanner = (Scanner *)payload;
size_t size = 0;
buffer[size++] = (char)scanner->indent_count;
buffer[size++] = scanner->pending_indent & 0xFF;
buffer[size++] = (scanner->pending_indent >> 8) & 0xFF;
for (uint8_t i = 0; i < scanner->indent_count && size < TREE_SITTER_SERIALIZATION_BUFFER_SIZE - 2; i++) {
buffer[size++] = scanner->indents[i] & 0xFF;
buffer[size++] = (scanner->indents[i] >> 8) & 0xFF;
}
return size;
}
void tree_sitter_agentscript_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = (Scanner *)payload;
if (length == 0) {
scanner->indents[0] = 0;
scanner->indent_count = 1;
scanner->pending_indent = -1;
return;
}
size_t size = 0;
scanner->indent_count = (uint8_t)buffer[size++];
if (scanner->indent_count > MAX_INDENT_DEPTH) {
scanner->indent_count = MAX_INDENT_DEPTH;
}
if (size + 1 < length) {
scanner->pending_indent = (uint8_t)buffer[size++];
scanner->pending_indent |= ((int16_t)(int8_t)buffer[size++]) << 8;
} else {
scanner->pending_indent = -1;
}
for (uint8_t i = 0; i < scanner->indent_count && size + 1 < length; i++) {
scanner->indents[i] = (uint8_t)buffer[size++];
scanner->indents[i] |= ((uint16_t)(uint8_t)buffer[size++]) << 8;
}
if (scanner->indent_count == 0) {
scanner->indents[0] = 0;
scanner->indent_count = 1;
}
}
bool tree_sitter_agentscript_external_scanner_scan(
void *payload,
TSLexer *lexer,
const bool *valid_symbols
) {
Scanner *scanner = (Scanner *)payload;
LOG("scan: lookahead='%c' (%d), valid=[N=%d,I=%d,D=%d,IS=%d,ITS=%d], indent_count=%d, current_indent=%d, pending=%d\n",
lexer->lookahead > 31 ? lexer->lookahead : '?',
lexer->lookahead,
valid_symbols[NEWLINE],
valid_symbols[INDENT],
valid_symbols[DEDENT],
valid_symbols[INTERPOLATION_START],
valid_symbols[INSTRUCTION_TEXT_SEGMENT],
scanner->indent_count,
scanner->indents[scanner->indent_count - 1],
scanner->pending_indent);
if (valid_symbols[INTERPOLATION_START] && lexer->lookahead == '{') {
advance(lexer);
LOG(" after {, lookahead='%c' (%d)\n", lexer->lookahead > 31 ? lexer->lookahead : '?', lexer->lookahead);
if (lexer->lookahead == '!') {
advance(lexer);
lexer->mark_end(lexer);
lexer->result_symbol = INTERPOLATION_START;
LOG(" => INTERPOLATION_START\n");
return true;
}
LOG(" not {!, char after { is '%c'\n", lexer->lookahead);
if (valid_symbols[INSTRUCTION_TEXT_SEGMENT]) {
while (lexer->lookahead != '\0' &&
lexer->lookahead != '\n' &&
lexer->lookahead != '{') {
advance(lexer);
}
lexer->mark_end(lexer);
lexer->result_symbol = INSTRUCTION_TEXT_SEGMENT;
LOG(" => INSTRUCTION_TEXT_SEGMENT (after lone {)\n");
return true;
}
return false;
}
if (valid_symbols[INSTRUCTION_TEXT_SEGMENT] &&
lexer->lookahead != '\0' &&
lexer->lookahead != '\n' &&
lexer->lookahead != '{') {
while (lexer->lookahead != '\0' &&
lexer->lookahead != '\n' &&
lexer->lookahead != '{') {
advance(lexer);
}
lexer->mark_end(lexer);
lexer->result_symbol = INSTRUCTION_TEXT_SEGMENT;
LOG(" => INSTRUCTION_TEXT_SEGMENT\n");
return true;
}
if (scanner->pending_indent >= 0 && valid_symbols[DEDENT]) {
uint16_t current_indent = scanner->indents[scanner->indent_count - 1];
if (scanner->pending_indent < current_indent && scanner->indent_count > 1) {
scanner->indent_count--;
LOG(" => DEDENT (pending, back to %d)\n", scanner->indents[scanner->indent_count - 1]);
lexer->result_symbol = DEDENT;
if (scanner->indents[scanner->indent_count - 1] <= scanner->pending_indent) {
scanner->pending_indent = -1;
}
return true;
}
scanner->pending_indent = -1;
}
if (lexer->lookahead != '\n' && lexer->lookahead != '\r' && !lexer->eof(lexer)) {
LOG(" not at newline/eof, returning false\n");
return false;
}
bool found_end_of_line = false;
uint16_t indent_length = 0;
for (;;) {
if (lexer->lookahead == '\n') {
if (!found_end_of_line) {
advance(lexer);
lexer->mark_end(lexer);
} else {
skip(lexer);
}
found_end_of_line = true;
indent_length = 0;
LOG(" consumed newline\n");
} else if (lexer->lookahead == '\r') {
skip(lexer);
LOG(" skipped CR\n");
} else if (lexer->lookahead == ' ' && found_end_of_line) {
indent_length++;
skip(lexer);
} else if (lexer->lookahead == '\t' && found_end_of_line) {
indent_length += 3;
skip(lexer);
} else if (lexer->lookahead == '#' && found_end_of_line) {
while (lexer->lookahead && lexer->lookahead != '\n') {
skip(lexer);
}
LOG(" skipped comment\n");
} else if (lexer->eof(lexer)) {
if (!found_end_of_line) {
lexer->mark_end(lexer);
}
indent_length = 0;
found_end_of_line = true;
LOG(" EOF\n");
break;
} else {
LOG(" found non-ws '%c', indent_length=%d\n", lexer->lookahead, indent_length);
break;
}
}
if (found_end_of_line) {
uint16_t current_indent = scanner->indents[scanner->indent_count - 1];
LOG(" found_eol: indent_length=%d, current_indent=%d\n", indent_length, current_indent);
if (valid_symbols[INDENT] && indent_length > current_indent) {
if (scanner->indent_count < MAX_INDENT_DEPTH) {
scanner->indents[scanner->indent_count++] = indent_length;
}
scanner->pending_indent = -1; LOG(" => INDENT (new level %d)\n", indent_length);
lexer->result_symbol = INDENT;
return true;
}
if (valid_symbols[DEDENT] && indent_length < current_indent) {
scanner->indent_count--;
LOG(" => DEDENT (back to %d)\n", scanner->indents[scanner->indent_count - 1]);
lexer->result_symbol = DEDENT;
if (indent_length < scanner->indents[scanner->indent_count - 1]) {
scanner->pending_indent = indent_length;
}
return true;
}
if (indent_length < current_indent) {
scanner->pending_indent = indent_length;
LOG(" stored pending_indent=%d\n", indent_length);
}
if (valid_symbols[NEWLINE] && !lexer->eof(lexer)) {
LOG(" => NEWLINE\n");
lexer->result_symbol = NEWLINE;
return true;
}
}
LOG(" => no token\n");
return false;
}