#include "tree_sitter/alloc.h"
#include "tree_sitter/array.h"
#include "tree_sitter/parser.h"
#include "unicode.h"
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#define PEEK env->lexer->lookahead
#ifdef TREE_SITTER_DEBUG
#include <locale.h>
#define S_ADVANCE advance_debug(env)
#define S_SKIP skip_debug(env)
#define MARK(s) mark_debug(env, s)
#define dbg(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
#else
#define S_ADVANCE advance(env)
#define S_SKIP env->lexer->advance(env->lexer, true)
#define MARK(s) env->lexer->mark_end(env->lexer)
#define dbg(...) do {} while (0)
#endif
#define SEQ(expr) do { unsigned res = (unsigned) expr; if (res) return res; } while (0)
typedef enum {
FAIL,
SEMICOLON,
START,
START_DO,
START_CASE,
START_IF,
START_LET,
START_QUOTE,
START_EXPLICIT,
END,
END_EXPLICIT,
START_BRACE,
END_BRACE,
START_TEXP,
END_TEXP,
WHERE,
IN,
ARROW,
BAR,
DERIVING,
COMMENT,
HADDOCK,
CPP,
PRAGMA,
QQ_START,
QQ_BODY,
SPLICE,
QUAL_DOT,
TIGHT_DOT,
PREFIX_DOT,
DOTDOT,
TIGHT_AT,
PREFIX_AT,
TIGHT_BANG,
PREFIX_BANG,
TIGHT_TILDE,
PREFIX_TILDE,
PREFIX_PERCENT,
QUALIFIED_OP,
LEFT_SECTION_OP,
NO_SECTION_OP,
MINUS,
CONTEXT,
INFIX,
DATA_INFIX,
TYPE_INSTANCE,
VARSYM,
CONSYM,
UPDATE,
} Symbol;
#ifdef TREE_SITTER_DEBUG
static const char *sym_names[] = {
"fail",
"semicolon",
"start",
"start_do",
"start_case",
"start_if",
"start_let",
"start_quote",
"start_explicit",
"end",
"end_explicit",
"start_brace",
"end_brace",
"start_texp",
"end_texp",
"where",
"in",
"arrow",
"bar",
"deriving",
"comment",
"haddock",
"cpp",
"pragma",
"qq_start",
"qq_body",
"splice",
"tight_dot",
"proj_dot",
"prefix_dot",
"dotdot",
"tight_at",
"prefix_at",
"tight_bang",
"prefix_bang",
"tight_tilde",
"prefix_tilde",
"prefix_percent",
"qualified_op",
"left_section_op",
"no_section_op",
"minus",
"context",
"infix",
"data_infix",
"type_instance",
"varsym",
"consym",
"update",
};
#endif
#ifdef TREE_SITTER_DEBUG
typedef Array(int32_t) ParseLine;
typedef Array(ParseLine) ParseLines;
typedef struct {
int marked;
unsigned marked_line;
unsigned start_col;
unsigned start_line;
unsigned end_col;
const char *marked_by;
} Debug;
Debug debug_new(TSLexer *l) {
return (Debug) {
.marked = -1,
.marked_line = 0,
.start_col = l->get_column(l),
.start_line = 0,
.end_col = 0,
.marked_by = "",
};
}
#endif
typedef enum {
DeclLayout,
DoLayout,
CaseLayout,
LetLayout,
QuoteLayout,
MultiWayIfLayout,
Braces,
TExp,
ModuleHeader,
NoContext,
} ContextSort;
#ifdef TREE_SITTER_DEBUG
static char const *context_names[] = {
"decls",
"do",
"case",
"let",
"multi_way_if",
"quote",
"braces",
"texp",
"module_header",
"none",
};
#endif
typedef struct {
ContextSort sort;
uint32_t indent;
} Context;
typedef enum {
LNothing,
LEof,
LWhere,
LIn,
LThen,
LElse,
LDeriving,
LModule,
LUpper,
LTick,
LSymop,
LSymopSpecial,
LDotDot,
LDotId,
LDotSymop,
LDotOpen,
LDollar,
LBang,
LTilde,
LAt,
LPercent,
LHash,
LBar,
LArrow,
LCArrow,
LTexpCloser,
LQuoteClose,
LPragma,
LBlockComment,
LLineComment,
LBraceClose,
LBraceOpen,
LBracketOpen,
LUnboxedClose,
LSemi,
LCppElse,
LCpp,
} Lexed;
#ifdef TREE_SITTER_DEBUG
static const char *token_names[] = {
"nothing",
"eof",
"where",
"in",
"then",
"else",
"deriving",
"module",
"upper",
"tick",
"symop",
"symop-special",
"dot-dot",
"dot-id",
"dot-symop",
"dot-open",
"dollar",
"bang",
"tilde",
"at",
"percent",
"hash",
"bar",
"arrow",
"ctr",
"texp-closer",
"quote-close",
"pragma",
"block-comment",
"line-comment",
"brace-close",
"brace-open",
"bracket-open",
"unboxed-close",
"semi",
"cpp-else",
"cpp",
};
#endif
typedef enum {
NInactive,
NInit,
NProcess,
NResume,
} NewlineState;
typedef struct {
NewlineState state;
Lexed end;
uint32_t indent;
bool eof;
bool no_semi;
bool skip_semi;
bool unsafe;
} Newline;
typedef Array(Context) Contexts;
typedef struct {
int32_t *contents;
uint32_t size;
uint32_t capacity;
uint32_t offset;
} Lookahead;
typedef struct {
Contexts contexts;
Newline newline;
Lookahead lookahead;
#ifdef TREE_SITTER_DEBUG
ParseLines parse;
#endif
} State;
typedef struct {
TSLexer *lexer;
const bool *symbols;
uint32_t symop;
State *state;
#ifdef TREE_SITTER_DEBUG
Debug debug;
#endif
} Env;
static Env env_new(TSLexer *l, const bool * symbols, State *state) {
return (Env) {
.lexer = l,
.symbols = symbols,
.symop = 0,
.state = state,
#ifdef TREE_SITTER_DEBUG
.debug = debug_new(l),
#endif
};
}
static void reset_newline(Env *env) { memset(&env->state->newline, 0, sizeof(Newline)); }
static bool newline_active(Env *env) { return env->state->newline.state == NInit || env->state->newline.state == NProcess; }
static bool newline_init(Env *env) { return env->state->newline.state == NInit; }
static bool is_eof(Env *env) { return env->lexer->eof(env->lexer); }
static bool not_eof(Env *env) { return !(is_eof(env)); }
static uint32_t column(Env *env) {
return is_eof(env) ? 0 : env->lexer->get_column(env->lexer);
}
static void advance(Env *env) {
if (not_eof(env)) {
array_push(&env->state->lookahead, PEEK);
env->lexer->advance(env->lexer, false);
}
}
static bool set_result_symbol(Env *env, Symbol result) {
if (result != FAIL) {
env->lexer->result_symbol = (TSSymbol) result;
return true;
}
return false;
}
#ifdef TREE_SITTER_DEBUG
static void mark_debug(Env *env, const char *restrict marked_by) {
dbg("mark: %s\n", marked_by);
env->debug.marked = (int) column(env);
env->debug.marked_line = 0;
env->debug.marked_by = marked_by;
env->lexer->mark_end(env->lexer);
}
static void append_parse_buffer(Env *env);
static void advance_debug(Env *env) {
append_parse_buffer(env);
advance(env);
}
static void skip_debug(Env *env) {
append_parse_buffer(env);
env->lexer->advance(env->lexer, true);
}
#endif
static inline bool valid(Env *env, Symbol s) { return env->symbols[s]; }
static Symbol finish(Symbol s, const char *restrict desc) {
(void) desc;
dbg("finish: %s\n", desc);
return s;
}
static Symbol finish_if_valid(Env *env, Symbol s, const char *restrict desc) {
if (valid(env, s)) return finish(s, desc);
return FAIL;
}
static Symbol finish_marked(Env *env, Symbol s, const char *restrict desc) {
(void) desc;
MARK(desc);
return s;
}
static Symbol update_state(const char *restrict desc) {
return finish(UPDATE, desc);
}
static void advance_over_abs(Env *env, uint32_t abs) {
for (uint32_t i = env->state->lookahead.size; i <= abs; i++) S_ADVANCE;
}
static void advance_over(Env *env, uint32_t rel) {
advance_over_abs(env, env->state->lookahead.offset + rel);
}
static void skip_over(Env *env, uint32_t rel) {
Lookahead *l = &env->state->lookahead;
if (l->offset > l->size) advance_over_abs(env, l->offset - 1);
uint32_t abs = l->offset + rel;
for (uint32_t i = env->state->lookahead.size; i <= abs; i++) S_SKIP;
}
static void advance_before(Env *env, uint32_t rel) {
uint32_t abs = env->state->lookahead.offset + rel;
if (abs > 0) advance_over_abs(env, abs - 1);
}
static int32_t unsafe_peek_abs(Env *env, uint32_t abs) {
return
abs < env->state->lookahead.size ?
env->state->lookahead.contents[abs] :
0;
}
static int32_t unsafe_peek(Env *env, uint32_t rel) {
return unsafe_peek_abs(env, env->state->lookahead.offset + rel);
}
#ifdef TREE_SITTER_DEBUG
static void debug_peek(Env *env, uint32_t rel) {
uint32_t abs = env->state->lookahead.offset + rel;
dbg("peek ");
if (env->state->lookahead.offset > 0) dbg("%u->", env->state->lookahead.offset);
dbg("%u", rel);
if (abs < env->state->lookahead.size)
dbg(" cached | len: %u", env->state->lookahead.size);
else if (abs > env->state->lookahead.size)
dbg(" advance | len: %u", env->state->lookahead.size);
dbg("\n");
}
#endif
static int32_t peek(Env *env, uint32_t rel) {
#ifdef TREE_SITTER_DEBUG
debug_peek(env, rel);
#endif
if (env->state->lookahead.offset + rel < env->state->lookahead.size) return unsafe_peek(env, rel);
else {
advance_before(env, rel);
return PEEK;
}
}
static int32_t peek0(Env *env) { return peek(env, 0); }
static int32_t peek1(Env *env) { return peek(env, 1); }
static int32_t peek2(Env *env) { return peek(env, 2); }
static bool char_at(Env *env, uint32_t n, int32_t c) {
return peek(env, n) == c;
}
static bool char0(Env *env, int32_t c) {
return char_at(env, 0, c);
}
static bool char1(Env *env, int32_t c) {
return char_at(env, 1, c);
}
static bool char2(Env *env, int32_t c) {
return char_at(env, 2, c);
}
static void reset_lookahead_abs(Env *env, uint32_t abs) {
dbg("reset: %u\n", abs);
env->state->lookahead.offset = abs;
env->symop = 0;
}
static void reset_lookahead_to(Env *env, uint32_t rel) {
reset_lookahead_abs(env, env->state->lookahead.offset + rel);
}
static void reset_lookahead(Env *env) {
reset_lookahead_abs(env, env->state->lookahead.size);
}
static bool no_lookahead(Env *env) {
return env->state->lookahead.size == 0;
}
static uint32_t start_column(Env *env) {
return column(env) - env->state->lookahead.size;
}
static uint32_t advance_while(Env *env, uint32_t i, bool (*pred)(int32_t)) {
while (pred(peek(env, i))) { i++; }
return i;
}
static uint32_t advance_until_char(Env *env, uint32_t i, int32_t c) {
while (not_eof(env) && !char_at(env, i, c)) { i++; }
return i;
}
static bool has_contexts(Env *env) { return env->state->contexts.size != 0; }
static void push_context(Env *env, ContextSort sort, uint32_t indent) {
dbg("push: %s %d\n", context_names[sort], indent);
Context ctx = (Context) {.sort = sort, .indent = indent};
array_push(&env->state->contexts, ctx);
}
static void pop(Env *env) {
if (has_contexts(env)) {
dbg("pop: %s\n", context_names[array_back(&env->state->contexts)->sort]);
array_pop(&env->state->contexts);
}
}
static ContextSort current_context(Env *env) {
return has_contexts(env) ? array_back(&env->state->contexts)->sort : NoContext;
}
static bool is_layout_context(Env *env) {
return current_context(env) < Braces;
}
static bool is_semicolon_context(Env *env) {
return current_context(env) < MultiWayIfLayout;
}
static uint32_t current_indent(Env *env) {
for (int32_t i = (int32_t) env->state->contexts.size - 1; i >= 0; i--) {
Context *cur = array_get(&env->state->contexts, i);
if (cur->sort < Braces) return cur->indent;
}
return 0;
}
static bool indent_less(Env *env, uint32_t indent) {
return is_layout_context(env) && indent < current_indent(env);
}
static bool indent_lesseq(Env *env, uint32_t indent) {
return is_layout_context(env) && indent <= current_indent(env);
}
static bool top_layout(Env *env) {
return env->state->contexts.size == 1;
}
static bool in_module_header(Env *env) {
return current_context(env) == ModuleHeader;
}
static Symbol context_end_sym(ContextSort s) {
switch (s) {
case TExp:
return END_TEXP;
case Braces:
return END_BRACE;
default:
return s < Braces ? END : FAIL;
}
}
#define NEWLINE_CASES \
case '\n': \
case '\r': \
case '\f'
static bool is_newline(int32_t c) {
switch (c) {
NEWLINE_CASES:
return true;
default:
return false;
}
}
static bool varid_start_char(const int32_t c) { return c == '_' || is_varid_start_char(c); }
static bool is_id_char(const int32_t c) {
return c == '_' || c == '\'' || is_identifier_char(c);
}
static bool is_inner_id_char(const int32_t c) {
return is_id_char(c) || c == '#';
}
static bool quoter_char(const int32_t c) { return is_id_char(c) || c == '.'; }
static bool reserved_symbolic(const int32_t c) {
switch (c) {
case '(':
case ')':
case ',':
case ';':
case '[':
case ']':
case '`':
case '{':
case '}':
case '"':
case '\'':
case '_':
return true;
default: return false;
}
}
static bool symop_char(const int32_t c) {
return is_symop_char(c) && !reserved_symbolic(c);
}
static uint32_t symop_lookahead(Env *env) {
if (env->symop == 0) {
env->symop = advance_while(env, 0, symop_char);
if (env->symop > 0)
dbg("symop: %d, %.*ls\n", env->symop, env->symop, env->state->lookahead.contents + env->state->lookahead.offset);
}
return env->symop;
}
static bool is_symop(Env *env) {
return symop_lookahead(env) > 0;
}
static bool after_error(Env *env) { return valid(env, FAIL); }
#ifdef TREE_SITTER_DEBUG
static void push_parse_buffer_line(Env *env) {
ParseLine new_line = array_new();
array_reserve(&new_line, 1);
array_push(&env->state->parse, new_line);
}
static ParseLine *ensure_parse_buffer(Env *env) {
ParseLines *buffer = &env->state->parse;
if (buffer->size == 0) push_parse_buffer_line(env);
if (is_newline(PEEK)) push_parse_buffer_line(env);
return array_back(buffer);
}
static void append_parse_buffer(Env *env) {
ParseLine *current_line = ensure_parse_buffer(env);
if (is_newline(PEEK)) {
env->debug.marked_line++;
env->debug.start_line++;
}
else if (column(env) >= current_line->size) array_push(current_line, PEEK);
}
static void fill_parse_buffer(Env *env) {
env->debug.end_col = column(env);
while (!(is_newline(PEEK) || is_eof(env))) S_ADVANCE;
}
static bool seq(Env *env, const char *restrict s);
static void print_lookahead(Env *env) {
dbg("lookahead: %.*ls\n", env->state->lookahead.size, env->state->lookahead.contents);
}
static const char * space = "<space>";
static const char * newline_char = "\\n";
static const char * show_char(int32_t c) {
switch (c) {
NEWLINE_CASES:
return newline_char;
case ' ':
case '\t':
case '\v':
return space;
default:
return NULL;
}
}
static void print_lookahead_chars_from(Env *env, uint32_t start) {
if (start < env->state->lookahead.size) {
dbg("lookahead from %d: ", start);
for (; start < env->state->lookahead.size; start++) {
int32_t c = env->state->lookahead.contents[start];
const char * s = show_char(c);
if (s == NULL) dbg("%lc", c);
else dbg("%s", s);
}
dbg("\n");
}
else
dbg("print_lookahead_chars_from: Too large (%d / %d)", start, env->state->lookahead.size);
}
static void debug_contexts(Env *env) {
if (env->state->contexts.size == 0) dbg("empty");
bool empty = true;
for (size_t i = 0; i < env->state->contexts.size; i++) {
if (!empty) dbg("-");
Context ctx = *array_get(&env->state->contexts, i);
if (ctx.sort == ModuleHeader) dbg("pre");
else if (ctx.sort == Braces) dbg("brace");
else if (ctx.sort == TExp) dbg("texp");
else {
if (ctx.sort == DoLayout) dbg("do ");
else if (ctx.sort == LetLayout) dbg("let ");
else if (ctx.sort == CaseLayout) dbg("case ");
else if (ctx.sort == MultiWayIfLayout) dbg("if ");
else if (ctx.sort == QuoteLayout) dbg("quote ");
dbg("%d", ctx.indent);
}
empty = false;
}
}
void debug_newline(Env *env) {
switch (env->state->newline.state) {
case NInactive:
dbg("no");
break;
case NInit:
dbg("init");
break;
case NProcess:
dbg("process");
break;
case NResume:
dbg("resume");
break;
}
if (env->state->newline.state != NInactive) dbg(" %d %s", env->state->newline.indent, token_names[env->state->newline.end]);
if (env->state->newline.eof) dbg(" [eof]");
if (env->state->newline.no_semi) dbg(" [no_semi]");
if (env->state->newline.skip_semi) dbg(" [skip_semi]");
if (env->state->newline.unsafe) dbg(" [unsafe]");
}
static void debug_valid(Env *env, const bool *syms) {
if (after_error(env)) {
dbg("all");
return;
}
bool fst = true;
for (Symbol i = FAIL; i <= UPDATE; i++) {
if (syms[i]) {
if (!fst) dbg(",");
dbg("%s", sym_names[i]);
fst = false;
}
}
}
static bool debug_init(Env *env) {
setlocale(LC_ALL, "C.UTF-8");
dbg("\n");
dbg("state:\n syms = ");
debug_valid(env, env->symbols);
dbg("\n contexts = ");
debug_contexts(env);
dbg("\n newline = ");
debug_newline(env);
dbg("\n");
return false;
}
void sgr(const char *restrict code) {
dbg("\x1b[%sm", code);
}
void color(unsigned c) {
char code[3];
sprintf(code, "3%d", c);
sgr(code);
}
void palette() {
color(4);
dbg("before");
color(2);
dbg(" marked");
color(3);
dbg(" advanced");
color(5);
dbg(" lookahead");
sgr("");
dbg("\n");
}
static bool debug_parse_metadata = false;
static void dump_parse_metadata(Env *env) {
Debug *debug = &env->debug;
dbg(
"lines: %d | start_line: %d | start_col: %d | marked_line: %d | marked: %d | end_col: %d | persist lines: %d\n",
env->state->parse.size,
debug->start_line,
debug->start_col,
debug->marked_line,
debug->marked,
debug->end_col,
env->state->parse.size - debug->marked_line
);
}
void debug_parse(Env *env) {
Debug *debug = &env->debug;
ParseLines *buffer = &env->state->parse;
uint32_t lines = buffer->size;
dbg("-----------------------\n");
if (debug_parse_metadata) dump_parse_metadata(env);
if (lines > 0) {
color(4);
for (uint32_t i = 0; i < lines; i++) {
ParseLine *line = array_get(buffer, i);
int32_t *buf = line->contents;
if (line->contents == NULL) break;
uint32_t pos = 0;
if (debug->start_line == lines - 1 - i) {
while (pos < debug->start_col) { dbg("%lc", buf[pos]); pos++; }
color(2);
}
if (debug->marked >= 0 && debug->marked_line == lines - 1 - i) {
while ((int) pos < debug->marked) { dbg("%lc", buf[pos]); pos++; }
color(3);
}
if (i == lines - 1) {
while (pos < debug->end_col) { dbg("%lc", buf[pos]); pos++; }
color(5);
}
while (pos < line->size) { dbg("%lc", buf[pos]); pos++; }
dbg("\n");
}
sgr("");
}
dbg("-----------------------\n");
}
static unsigned serialize_parse_lines(char *cursor, ParseLines *parse, unsigned to_copy) {
for (unsigned i = 0; i < parse->size; i++) {
ParseLine *line = array_get(parse, i);
unsigned line_size = line->size * sizeof(uint32_t);
to_copy += line_size + sizeof(uint32_t);
if (to_copy > TREE_SITTER_SERIALIZATION_BUFFER_SIZE) return 0;
*((uint32_t *) cursor) = line->size;
cursor += sizeof(line->size);
memcpy(cursor, line->contents, line_size);
cursor += line_size;
}
return to_copy;
}
static void deserialize_parse_lines(const char *cursor, ParseLines *parse, uint32_t size) {
array_reserve(parse, size);
for (unsigned i = 0; i < size; i++) {
if (i >= parse->size) { array_push(parse, (ParseLine)array_new()); }
ParseLine *line = &parse->contents[i];
uint32_t line_len = *((uint32_t *) cursor);
cursor += sizeof(uint32_t);
array_reserve(line, line_len);
line->size = line_len;
unsigned line_size = line->size * sizeof(uint32_t);
memcpy(line->contents, cursor, line_size);
cursor += line_size;
}
for (unsigned i = parse->size; i > size; i--) { array_delete(array_get(parse, i - 1)); }
parse->size = size;
}
void debug_finish(Env *env, Symbol result) {
dbg("result: ");
if (result) dbg("%s, ", sym_names[result]);
else dbg("<skipped>, ");
if (env->debug.marked == -1) dbg("%d", column(env));
else dbg("%s@%d", env->debug.marked_by, env->debug.marked);
dbg("\n\n");
fill_parse_buffer(env);
debug_parse(env);
env->state->parse.size -= env->debug.marked_line;
}
#endif
static bool seq_from(Env *env, const char *restrict s, uint32_t start) {
uint32_t len = (uint32_t) strlen(s);
for (uint32_t i = 0; i < len; i++) {
int32_t c = s[i];
int32_t c2 = peek(env, start + i);
if (c != c2) return false;
}
peek(env, start + len);
return true;
}
static bool seq(Env *env, const char *restrict s) {
return seq_from(env, s, 0);
}
static void take_line(Env *env) {
while (not_eof(env) && !is_newline(PEEK)) S_ADVANCE;
}
static bool is_space_or_tab(int32_t c) {
return c == ' ' || c == '\t';
}
static void take_line_escaped_newline(Env *env) {
for (;;) {
while (not_eof(env) && !is_newline(PEEK) && PEEK != '\\') S_ADVANCE;
if (PEEK == '\\') {
S_ADVANCE;
if (is_space_or_tab(PEEK)) {
while (is_space_or_tab(PEEK)) S_ADVANCE;
if (is_newline(PEEK)) S_ADVANCE;
}
else S_ADVANCE;
}
else return;
}
}
static bool skip_space(Env *env) {
if (!is_space_char(PEEK)) return false;
S_SKIP;
while(is_space_char(PEEK)) S_SKIP;
return true;
}
static bool skip_newlines(Env *env) {
if (!is_newline(PEEK)) return false;
S_SKIP;
while(is_newline(PEEK)) S_SKIP;
return true;
}
typedef enum {
NoSpace,
Indented,
BOL,
} Space;
static Space skip_whitespace(Env *env) {
Space space = NoSpace;
while (true) {
if (skip_space(env)) space = Indented;
else if (skip_newlines(env)) space = BOL;
else return space;
};
}
static uint32_t take_space_from(Env *env, uint32_t start) {
return advance_while(env, start, is_space_char);
}
static bool token_end(int32_t c) { return !is_inner_id_char(c); }
static bool token_from(Env *env, const char *restrict s, uint32_t start) {
return seq_from(env, s, start) && token_end(peek(env, start + (uint32_t) strlen(s)));
}
static bool token(Env *env, const char *restrict s) {
return seq(env, s) && token_end(peek(env, (uint32_t) strlen(s)));
}
static bool any_token_from(Env *env, size_t n, const char * tokens[], uint32_t start) {
for (size_t i = 0; i < n; i++) {
if (token_from(env, tokens[i], start)) return true;
}
return false;
}
static bool match_symop(Env *env, const char *restrict target) {
return symop_lookahead(env) == strlen(target) && seq(env, target);
}
static bool uninitialized(Env *env) { return !has_contexts(env); }
static uint32_t conid(Env *env) {
if (!is_conid_start_char(peek0(env))) return 0;
return advance_while(env, 1, is_inner_id_char);
}
typedef enum {
NoQualifiedName,
QualifiedTarget,
QualifiedConid,
} QualifiedName;
static QualifiedName qualified_name(Env *env, bool (*name)(Env *)) {
bool qualified = false;
while (true) {
uint32_t end = conid(env);
if (end == 0) break;
if (!char_at(env, end, '.')) {
if (qualified) return QualifiedConid;
else break;
}
qualified = true;
reset_lookahead_to(env, end + 1);
if (name(env)) return true;
}
return NoQualifiedName;
}
static bool odd_backslashes_before(Env *env, int32_t index) {
bool odd = false;
while (index >= 0 && peek(env, (uint32_t) index) == '\\') {
odd = !odd;
index--;
}
return odd;
}
static uint32_t take_string_literal(Env *env) {
uint32_t end = 1;
while (true) {
end = advance_until_char(env, end, '"') + 1;
if (is_eof(env) || !odd_backslashes_before(env, (int) end - 2)) return end;
}
}
static uint32_t take_char_literal(Env *env) {
if (char1(env, '\\')) return advance_until_char(env, 2, '\'') + 2;
else return char_at(env, 2, '\'') ? 3 : 1;
}
typedef enum {
CppNothing,
CppStart,
CppElse,
CppEnd,
CppOther,
} CppDirective;
static const char *cpp_tokens_start[3] = {
"if",
"ifdef",
"ifndef",
};
static bool cpp_cond_start(Env *env, uint32_t start) {
return any_token_from(env, 3, cpp_tokens_start, start);
}
static const char *cpp_tokens_else[4] = {
"else",
"elif",
"elifdef",
"elifndef",
};
static bool cpp_cond_else(Env *env, uint32_t start) {
return any_token_from(env, 4, cpp_tokens_else, start);
}
static bool cpp_cond_end(Env *env, uint32_t start) { return token_from(env, "endif", start); }
static const char *cpp_tokens_other[7] = {
"define",
"undef",
"include",
"pragma",
"error",
"warning",
"line",
};
static bool cpp_directive_other(Env *env, uint32_t start) {
return
any_token_from(env, 7, cpp_tokens_other, start)
||
is_newline(peek(env, start))
||
(char1(env, '!') && uninitialized(env))
;
}
static CppDirective cpp_directive(Env *env) {
if (!char0(env, '#')) return CppNothing;
uint32_t start = take_space_from(env, 1);
if (cpp_cond_start(env, start)) return CppStart;
else if (cpp_cond_else(env, start)) return CppElse;
else if (cpp_cond_end(env, start)) return CppEnd;
else if (cpp_directive_other(env, start)) return CppOther;
else return CppNothing;
}
static Symbol start_brace(Env *env) {
if (valid(env, START_BRACE)) {
push_context(env, Braces, 0);
return finish(START_BRACE, "start_brace");
}
return FAIL;
}
static Symbol end_brace(Env *env) {
if (valid(env, END_BRACE) && current_context(env) == Braces) {
pop(env);
return finish(END_BRACE, "end_brace");
}
return FAIL;
}
static Symbol valid_layout_start_sym(Env *env) {
for (Symbol i = START; i < END; i++) {
if (valid(env, i)) return i;
}
return FAIL;
}
static ContextSort layout_sort(Symbol s) {
switch (s) {
case START_DO:
return DoLayout;
case START_CASE:
return CaseLayout;
case START_IF:
return MultiWayIfLayout;
case START_LET:
return LetLayout;
case START_QUOTE:
return QuoteLayout;
default:
return DeclLayout;
}
}
typedef struct {
Symbol sym;
ContextSort sort;
} StartLayout;
static StartLayout valid_layout_start(Env *env, Lexed next) {
StartLayout start = {.sym = valid_layout_start_sym(env), .sort = NoContext};
if (uninitialized(env) || start.sym == FAIL) return start;
ContextSort sort = layout_sort(start.sym);
switch (next) {
case LBar:
break;
case LBraceOpen:
if (newline_active(env)) return start;
sort = Braces;
start.sym = START_EXPLICIT;
break;
default:
if (sort == MultiWayIfLayout) return start;
break;
}
start.sort = sort;
return start;
}
static bool indent_can_start_layout(Env *env, ContextSort sort, uint32_t indent) {
if (current_context(env) == Braces) return true;
uint32_t cur = current_indent(env);
return (indent > cur || (indent == cur && sort == DoLayout));
}
static Symbol start_layout(Env *env, const StartLayout start, uint32_t indent, const char * restrict desc) {
if (in_module_header(env)) pop(env);
else if (start.sort == Braces) MARK("start_layout brace");
else if (!indent_can_start_layout(env, start.sort, indent)) return FAIL;
push_context(env, start.sort, indent);
return finish(start.sym, desc);
}
static Symbol start_layout_interior(Env *env, Lexed next) {
StartLayout start = valid_layout_start(env, next);
if (start.sort == NoContext) return FAIL;
return start_layout(env, start, start_column(env), "interior");
}
static Symbol start_layout_newline(Env *env) {
StartLayout start = valid_layout_start(env, env->state->newline.end);
if (start.sort == NoContext) return FAIL;
Symbol result = start_layout(env, start, env->state->newline.indent, "newline");
if (result != FAIL) env->state->newline.no_semi = true;
return result;
}
static Symbol texp_context(Env *env) {
if (valid(env, START_TEXP)) {
push_context(env, TExp, 0);
return finish(START_TEXP, "texp_context");
}
else if (valid(env, END_TEXP) && current_context(env) == TExp) {
pop(env);
return finish(END_TEXP, "texp_context");
}
else return FAIL;
}
static Symbol end_layout_unchecked(Env *env, const char *restrict desc) {
pop(env);
return finish(END, desc);
}
static Symbol end_layout(Env *env, const char *restrict desc) {
if (valid(env, END)) return end_layout_unchecked(env, desc);
else return FAIL;
}
static Symbol end_layout_brace(Env *env) {
if (valid(env, END_EXPLICIT) && current_context(env) == Braces) {
advance_over(env, 0);
MARK("end_layout_brace");
pop(env);
return finish(END_EXPLICIT, "brace");
}
else return FAIL;
}
static Symbol end_layout_indent(Env *env) {
if (valid(env, END) && indent_less(env, env->state->newline.indent)) {
if (top_layout(env)) {
array_back(&env->state->contexts)->indent = env->state->newline.indent;
return update_state("end top layout");
}
else {
env->state->newline.skip_semi = false;
return end_layout_unchecked(env, "indent");
}
}
return FAIL;
}
static Symbol end_layout_infix(Env *env) {
if (!valid(env, VARSYM) && !valid(env, CONSYM)) return end_layout(env, "symop invalid");
return FAIL;
}
static Symbol end_layout_where(Env *env) {
if (valid(env, END) && !valid(env, WHERE) && is_layout_context(env)) return end_layout(env, "where");
return FAIL;
}
static Symbol end_layout_in(Env *env) {
if (valid(env, END) && (!valid(env, IN) || current_context(env) == LetLayout)) return end_layout(env, "in");
return FAIL;
}
static Symbol end_layout_deriving(Env *env) {
if (valid(env, END) && !valid(env, DERIVING) && !top_layout(env) && current_context(env) == DeclLayout)
return end_layout(env, "deriving");
return FAIL;
}
static bool layouts_in_texp(Env *env) {
if (is_layout_context(env) && (env->state->contexts.size > 1)) {
for (int32_t i = (int32_t) env->state->contexts.size - 2; i >= 0; i--) {
Context *cur = array_get(&env->state->contexts, i);
if (cur->sort == TExp || cur->sort == Braces) return true;
else if (cur->sort > Braces) break;
}
}
return false;
}
static Symbol token_end_layout_texp(Env *env) {
return (valid(env, END) && layouts_in_texp(env)) ? end_layout(env, "texp") : FAIL;
}
static Symbol force_end_context(Env *env) {
for (int32_t i = (int32_t) env->state->contexts.size - 1; i >= 0; i--) {
ContextSort ctx = array_get(&env->state->contexts, i)->sort;
Symbol s = context_end_sym(ctx);
pop(env);
if (s != FAIL && valid(env, s)) return finish(s, "force_end_context");
}
return FAIL;
}
static bool opening_token(Env *env, uint32_t i) {
int32_t c = peek(env, i);
switch (c) {
case 0x27e6: case 0x2987: case '(':
case '[':
case '"':
return true;
case '{':
return peek(env, i + 1) != '-';
default:
return is_id_char(c);
}
}
static bool valid_symop_two_chars(int32_t first_char, int32_t second_char) {
switch (first_char) {
case '=':
return second_char != '>';
case '<':
return second_char != '-';
case ':':
return second_char != ':';
default:
return true;
}
}
static Lexed lex_prefix(Env *env, Lexed t) {
return opening_token(env, 1) ? t : LSymop;
}
static Lexed lex_splice(int32_t c) {
return varid_start_char(c) || c == '(' ? LDollar : LSymop;
}
static Lexed lex_symop(Env *env) {
uint32_t len = symop_lookahead(env);
if (len == 0) return LNothing;
int32_t c1 = unsafe_peek(env, 0);
if (len == 1) {
switch (c1) {
case '?':
return varid_start_char(peek1(env)) ? LNothing : LSymop;
case '#':
return char1(env, ')') ? LUnboxedClose : LHash;
case '|':
return char1(env, ']') ? LQuoteClose : LBar;
case '!':
return lex_prefix(env, LBang);
case '~':
return lex_prefix(env, LTilde);
case '@':
return lex_prefix(env, LAt);
case '%':
return lex_prefix(env, LPercent);
case '$':
return lex_splice(peek1(env));
case '.':
if (is_id_char(peek1(env))) return LDotId;
else if (opening_token(env, 1)) return LDotOpen;
else return LSymop;
case 0x2192: case 0x22b8: return LArrow;
case 0x21d2: return LCArrow;
case '=':
case 0x27e7: case 0x2988: return LTexpCloser;
case '*':
case '-':
return LSymopSpecial;
case '\\':
case 0x2190: case 0x2200: case 0x2237: case 0x2605: case 0x27e6: case 0x2919: case 0x291a: case 0x291b: case 0x291c: case 0x2987: return LNothing;
}
}
else if (len == 2) {
if (seq(env, "->")) return LArrow;
if (seq(env, "=>")) return LCArrow;
int32_t c2 = unsafe_peek(env, 1);
switch (c1) {
case '$':
if (c2 == '$') return lex_splice(peek2(env));
break;
case '|':
if (c2 == '|' && char2(env, ']')) return LQuoteClose;
break;
case '.':
if (c2 == '.') return LDotDot;
else return LDotSymop;
break;
case '#':
if (c2 == '#' || c2 == '|') return LSymopSpecial;
break;
default:
if (!valid_symop_two_chars(c1, c2)) return LNothing;
break;
}
}
else switch (c1) {
case '-':
if (seq(env, "->.")) return LArrow;
break;
case '.':
return LDotSymop;
}
return LSymop;
}
static Symbol left_section_op(Env *env, uint32_t start) {
if (valid(env, LEFT_SECTION_OP)) {
advance_before(env, start);
Space space = skip_whitespace(env);
if (char_at(env, start, ')')) return finish(LEFT_SECTION_OP, "left section");
if (space != NoSpace) return finish_if_valid(env, NO_SECTION_OP, "left section");
}
return FAIL;
}
static Symbol left_section_ticked(Env *env) {
if (valid(env, LEFT_SECTION_OP)) {
uint32_t end_tick = advance_until_char(env, 1, '`');
if (char_at(env, end_tick, '`')) {
return left_section_op(env, end_tick + 1);
}
}
return FAIL;
}
static Symbol finish_symop(Env *env, Symbol s) {
if (valid(env, s) || valid(env, LEFT_SECTION_OP)) {
uint32_t after_symop = symop_lookahead(env);
SEQ(left_section_op(env, after_symop));
MARK("symop");
return s;
}
return FAIL;
}
static Symbol tight_op(Env *env, bool whitespace, Symbol s) {
if (!whitespace) return finish_if_valid(env, s, "tight_op");
else return FAIL;
}
static Symbol prefix_or_varsym(Env *env, bool whitespace, Symbol s) {
if (whitespace) SEQ(finish_if_valid(env, s, "prefix_or_varsym"));
return finish_symop(env, VARSYM);
}
static Symbol tight_or_varsym(Env *env, bool whitespace, Symbol s) {
SEQ(tight_op(env, whitespace, s));
return finish_symop(env, VARSYM);
}
static Symbol infix_or_varsym(Env *env, bool whitespace, Symbol prefix, Symbol tight) {
SEQ(finish_if_valid(env, whitespace ? prefix : tight, "infix_or_varsym"));
return finish_symop(env, VARSYM);
}
static Symbol qualified_op(Env *env) {
if (qualified_name(env, is_symop) == QualifiedTarget) {
SEQ(left_section_op(env, symop_lookahead(env)));
return QUALIFIED_OP;
}
return FAIL;
}
static bool is_qq_start(Env *env) {
uint32_t end = advance_while(env, 1, quoter_char);
return char_at(env, end, '|');
}
static Lexed try_end_token(Env *env, const char * restrict target, Lexed match) {
if (token(env, target)) return match;
else return LNothing;
}
static bool only_minus(Env *env) {
uint32_t i = 2;
while (peek(env, i) == '-') i++;
return !symop_char(peek(env, i));
}
static bool line_comment_herald(Env *env) {
return seq(env, "--") && only_minus(env);
}
static Lexed lex_cpp(Env *env) {
switch(cpp_directive(env)) {
case CppElse:
return LCppElse;
case CppNothing:
return LNothing;
default:
return LCpp;
}
}
static Lexed lex_extras(Env *env, bool bol) {
switch (peek0(env)) {
case '{':
if (char1(env, '-')) return char2(env, '#') ? LPragma : LBlockComment;
break;
case '#':
if (bol) return lex_cpp(env);
break;
case '-':
if (line_comment_herald(env)) return LLineComment;
break;
default:
break;
}
return LNothing;
}
static Lexed lex(Env *env, bool bol) {
SEQ(lex_extras(env, bol));
if (symop_char(peek0(env))) SEQ(lex_symop(env));
else switch (peek0(env)) {
case 'w':
return try_end_token(env, "where", LWhere);
case 'i':
return try_end_token(env, "in", LIn);
case 't':
return try_end_token(env, "then", LThen);
case 'e':
return try_end_token(env, "else", LElse);
case 'd':
return try_end_token(env, "deriving", LDeriving);
case 'm':
if ((uninitialized(env) || in_module_header(env)) && token(env, "module")) return LModule;
break;
case '{':
return LBraceOpen;
case '}':
return LBraceClose;
case ';':
return LSemi;
case '`':
return LTick;
case '[':
if (valid(env, QQ_START) && is_qq_start(env)) return LBracketOpen;
break;
case ']':
case ')':
case ',':
return LTexpCloser;
default:
if (is_conid_start_char(peek0(env))) return LUpper;
break;
}
return LNothing;
}
static Symbol cpp_else(Env *env, bool emit) {
uint32_t nesting = 1;
do {
take_line_escaped_newline(env);
if (emit) MARK("cpp_else");
S_ADVANCE;
reset_lookahead(env);
switch (cpp_directive(env)) {
case CppStart:
nesting++;
break;
case CppEnd:
nesting--;
break;
default:
break;
}
}
while (not_eof(env) && nesting > 0);
if (emit) return finish(CPP, "cpp-else");
else return FAIL;
}
static Symbol cpp_line(Env *env) {
take_line_escaped_newline(env);
return finish_marked(env, CPP, "cpp");
}
static Symbol comment_type(Env *env) {
uint32_t i = 2;
while (peek(env, i) == '-') i++;
while (not_eof(env)) {
int32_t c = peek(env, i++);
if (c == '|' || c == '^') return HADDOCK;
else if (!is_space_char(c)) break;
}
return COMMENT;
}
static Symbol inline_comment(Env *env) {
Symbol sym = comment_type(env);
do {
take_line(env);
MARK("inline comment");
S_ADVANCE;
reset_lookahead(env);
} while (line_comment_herald(env));
return sym;
}
static uint32_t consume_block_comment(Env *env, uint32_t col) {
uint32_t level = 0;
for (;;) {
if (is_eof(env)) return col;
col++;
switch (PEEK) {
case '{':
S_ADVANCE;
if (PEEK == '-') {
S_ADVANCE;
col++;
level++;
}
break;
case '-':
S_ADVANCE;
if (PEEK == '}') {
S_ADVANCE;
col++;
if (level == 0) return col;
level--;
}
break;
NEWLINE_CASES:
S_ADVANCE;
col = 0;
break;
case '\t':
S_ADVANCE;
col += 7;
break;
default:
S_ADVANCE;
break;
}
}
}
static Symbol block_comment(Env *env) {
Symbol sym = comment_type(env);
consume_block_comment(env, env->state->lookahead.size);
return finish_marked(env, sym, "block_comment");
}
static bool consume_pragma(Env *env) {
if (seq(env, "{-#")) {
while (!seq(env, "#-}") && not_eof(env)) {
reset_lookahead(env);
advance_over(env, 0);
}
return true;
}
return false;
}
static Symbol pragma(Env *env) {
if (consume_pragma(env)) {
MARK("pragma");
if (env->state->newline.state != NInactive) env->state->newline.state = NResume;
return finish(PRAGMA, "newline");
}
return FAIL;
}
static Symbol qq_body(Env *env) {
for (;;) {
if (is_eof(env)) return finish(QQ_BODY, "qq_body");
else if (PEEK == 0x27e7) {
return finish_marked(env, QQ_BODY, "qq_body");
}
else if (PEEK == '|') {
MARK("qq_body");
S_ADVANCE;
if (PEEK == ']') {
return finish(QQ_BODY, "qq_body");
}
} else S_ADVANCE;
}
}
static Symbol explicit_semicolon(Env *env) {
if (valid(env, SEMICOLON) && !env->state->newline.skip_semi) {
env->state->newline.skip_semi = true;
return update_state("explicit semicolon enable");
}
return FAIL;
}
static Symbol resolve_semicolon(Env *env, Lexed next) {
if (env->state->newline.skip_semi) {
switch(next) {
case LLineComment:
case LBlockComment:
case LPragma:
case LSemi:
break;
default:
env->state->newline.skip_semi = false;
return update_state("explicit semicolon disable");
}
}
return FAIL;
}
static Symbol semicolon(Env *env) {
if (
is_semicolon_context(env)
&&
!(env->state->newline.no_semi || env->state->newline.skip_semi)
&&
indent_lesseq(env, env->state->newline.indent)
) {
env->state->newline.no_semi = true;
return finish(SEMICOLON, "newline");
}
else return FAIL;
}
static Symbol process_token_safe(Env *env, Lexed next) {
switch (next) {
case LWhere:
return end_layout_where(env);
case LIn:
return end_layout_in(env);
case LThen:
case LElse:
return end_layout(env, "then/else");
case LDeriving:
return end_layout_deriving(env);
case LBar:
if (!valid(env, BAR)) return end_layout(env, "bar");
break;
case LPragma:
return pragma(env);
case LBlockComment:
return block_comment(env);
case LLineComment:
return inline_comment(env);
case LCppElse:
return cpp_else(env, true);
case LCpp:
return cpp_line(env);
case LSymop:
case LTick:
case LHash:
return end_layout_infix(env);
case LUnboxedClose:
SEQ(token_end_layout_texp(env));
return end_layout_infix(env);
case LArrow:
if (!valid(env, ARROW)) return token_end_layout_texp(env);
break;
case LTexpCloser:
return token_end_layout_texp(env);
case LQuoteClose:
return end_layout(env, "quote bracket");
break;
default:
break;
}
return FAIL;
}
static Symbol process_token_symop(Env *env, bool whitespace, Lexed next) {
switch (next) {
case LDotDot:
SEQ(finish_if_valid(env, DOTDOT, "symop"));
return tight_op(env, whitespace, QUAL_DOT);
case LDotId:
SEQ(finish_if_valid(env, whitespace ? PREFIX_DOT : TIGHT_DOT, "symop"));
return tight_op(env, whitespace, QUAL_DOT);
case LDotSymop:
return tight_or_varsym(env, whitespace, QUAL_DOT);
case LDotOpen:
return prefix_or_varsym(env, whitespace, PREFIX_DOT);
case LBang:
return infix_or_varsym(env, whitespace, PREFIX_BANG, TIGHT_BANG);
case LTilde:
return infix_or_varsym(env, whitespace, PREFIX_TILDE, TIGHT_TILDE);
case LAt:
return infix_or_varsym(env, whitespace, PREFIX_AT, TIGHT_AT);
case LPercent:
return prefix_or_varsym(env, whitespace, PREFIX_PERCENT);
case LSymop:
if (char0(env, ':')) return finish_symop(env, CONSYM);
else return finish_symop(env, VARSYM);
case LSymopSpecial:
SEQ(left_section_op(env, symop_lookahead(env)));
if (valid(env, MINUS) && match_symop(env, "-")) return finish(MINUS, "symop");
break;
case LUnboxedClose:
case LHash:
return left_section_op(env, symop_lookahead(env));
case LTick:
return left_section_ticked(env);
case LUpper:
if (valid(env, QUALIFIED_OP) || valid(env, LEFT_SECTION_OP)) SEQ(qualified_op(env));
break;
default:
break;
}
return FAIL;
}
static Symbol process_token_splice(Env *env, Lexed next) {
switch (next) {
case LDollar:
return finish_if_valid(env, SPLICE, "symop");
default:
break;
}
return FAIL;
}
static Symbol process_token_interior(Env *env, Lexed next) {
switch (next) {
case LBraceClose:
SEQ(end_layout_brace(env));
return token_end_layout_texp(env);
case LModule:
return FAIL;
case LSemi:
return explicit_semicolon(env);
case LBracketOpen:
return finish(QQ_START, "qq_start");
default:
break;
}
SEQ(process_token_safe(env, next));
return start_layout_interior(env, next);
}
static Symbol process_token_init(Env *env, uint32_t indent, Lexed next) {
switch (next) {
case LModule:
push_context(env, ModuleHeader, 0);
return update_state("init");
case LBraceOpen:
advance_over(env, 0);
MARK("init brace");
push_context(env, Braces, indent);
return finish(START_EXPLICIT, "init");
default:
push_context(env, DeclLayout, indent);
return finish(START, "init");
}
}
static Symbol newline_extras(Env *env, Space space) {
bool bol = space == BOL || (space == NoSpace && newline_init(env));
Lexed next = lex_extras(env, bol);
dbg("newline extras token: %s\n", token_names[next]);
return process_token_safe(env, next);
}
static Symbol newline_process(Env *env) {
dbg("newline post\n");
uint32_t indent = env->state->newline.indent;
Lexed end = env->state->newline.end;
SEQ(end_layout_indent(env));
SEQ(process_token_safe(env, end));
Space space = skip_whitespace(env);
MARK("newline_post");
if (env->state->newline.unsafe) SEQ(newline_extras(env, space));
if (!env->state->newline.eof) SEQ(start_layout_newline(env));
SEQ(semicolon(env));
reset_newline(env);
if (uninitialized(env)) SEQ(process_token_init(env, indent, end));
else {
SEQ(process_token_symop(env, true, end));
SEQ(process_token_splice(env, end));
}
return update_state("newline final");
}
static Symbol newline_post(Env *env) {
Symbol res = newline_process(env);
if (newline_init(env)) env->state->newline.state = NProcess;
return res;
}
static void newline_lookahead(Env *env, Newline *newline) {
for (;;) {
switch (peek0(env)) {
NEWLINE_CASES:
skip_over(env, 0);
newline->indent = 0;
break;
case '\t':
skip_over(env, 0);
newline->indent += 8;
break;
default:
if (is_space_char(peek0(env))) {
skip_over(env, 0);
newline->indent++;
break;
}
newline->end = lex(env, newline->indent == 0);
dbg("newline token: %s, %lc\n", token_names[newline->end], peek0(env));
newline->unsafe |= !no_lookahead(env);
switch (newline->end) {
case LEof:
newline->indent = 0;
newline->eof = true;
return;
case LThen:
case LElse:
case LSemi:
newline->no_semi = true;
return;
case LBlockComment:
newline->indent = consume_block_comment(env, newline->indent + 2);
break;
case LLineComment:
newline->indent = 0;
take_line(env);
break;
case LCppElse:
cpp_else(env, false);
take_line_escaped_newline(env);
break;
case LCpp:
take_line_escaped_newline(env);
break;
default:
return;
}
}
reset_lookahead(env);
}
}
static Symbol newline_start(Env *env) {
dbg("newline lookahead\n");
env->state->newline.state = NInit;
newline_lookahead(env, &env->state->newline);
if (env->state->newline.unsafe) return update_state("newline lookahead");
else return newline_post(env);
}
static Symbol newline_resume(Env *env) {
dbg("newline resume\n");
uint32_t indent = env->state->newline.indent;
skip_space(env);
reset_newline(env);
env->state->newline.indent = indent;
return newline_start(env);
}
typedef enum {
CtrUndecided,
CtrImpossible,
CtrArrowFound,
CtrInfixFound,
CtrEqualsFound,
CtrBarFound,
} CtrResult;
#ifdef TREE_SITTER_DEBUG
static const char *ctr_result_names[] = {
"undecided",
"impossible",
"arrow",
"infix",
"equals",
"bar",
};
#endif
typedef struct {
uint32_t reset;
uint32_t brackets;
bool context;
bool infix;
bool data_infix;
bool type_instance;
} CtrState;
static CtrResult ctr_bracket_open(CtrState *state) {
state->brackets++;
state->reset = 1;
return CtrUndecided;
}
static CtrResult ctr_bracket_close(CtrState *state) {
if (state->brackets == 0) return CtrImpossible;
state->brackets--;
state->reset = 1;
return CtrUndecided;
}
static CtrResult ctr_stop_on_token(Env *env, const char * restrict target) {
return token(env, target) ? CtrImpossible : CtrUndecided;
}
static CtrResult ctr_top(Env *env, Lexed next) {
switch (next) {
case LCArrow:
return CtrArrowFound;
case LSymop:
case LSymopSpecial:
case LTilde:
case LTick:
return CtrInfixFound;
case LBar:
return CtrBarFound;
case LArrow:
case LWhere:
case LDotDot:
case LSemi:
break;
case LTexpCloser:
switch (peek0(env)) {
case '=':
return CtrEqualsFound;
default:
break;
}
break;
default:
switch (peek0(env)) {
case '=':
return CtrEqualsFound;
case 0x2200: break;
case ':':
if (char1(env, ':')) break;
return CtrUndecided;
case 'f':
SEQ(ctr_stop_on_token(env, "forall"));
return ctr_stop_on_token(env, "family");
case 'i':
return ctr_stop_on_token(env, "instance");
default:
return CtrUndecided;
}
}
return CtrImpossible;
}
static CtrResult ctr_lookahead_step(Env *env, CtrState *state, Lexed next) {
state->reset = 1;
switch (next) {
case LBraceClose:
return ctr_bracket_close(state);
case LUnboxedClose:
SEQ(ctr_bracket_close(state));
state->reset = 2;
return CtrUndecided;
case LBraceOpen:
return ctr_bracket_open(state);
case LSymopSpecial:
case LSymop:
state->reset = symop_lookahead(env);
break;
case LUpper:
state->reset = conid(env);
return CtrUndecided;
case LDotId:
return CtrUndecided;
case LPragma:
if (consume_pragma(env)) state->reset = 3;
return CtrUndecided;
case LTexpCloser:
case LNothing:
switch (peek0(env)) {
case ')':
case ']':
return ctr_bracket_close(state);
case '(':
case '[':
return ctr_bracket_open(state);
case '"':
state->reset = take_string_literal(env);
return CtrUndecided;
case '\'':
state->reset = take_char_literal(env);
return CtrUndecided;
default:
if (varid_start_char(peek0(env))) state->reset = advance_while(env, 1, is_id_char);
break;
}
default:
break;
}
if (state->brackets != 0) return CtrUndecided;
return ctr_top(env, next);
}
static Symbol constraint_lookahead(Env *env) {
dbg("type lookahead\n");
CtrState state = {.reset = 0};
bool done = false;
while (!done && not_eof(env)) {
Newline newline = {.state = 0, .indent = 99999};
newline_lookahead(env, &newline);
if (newline.indent <= current_indent(env) && current_context(env) != Braces) break;
CtrResult result = ctr_lookahead_step(env, &state, newline.end);
dbg("type: %lc, %s\n", peek0(env), ctr_result_names[result]);
switch (result) {
case CtrArrowFound:
state.context = true;
done = true;
break;
case CtrInfixFound:
if (char0(env, ':') || char0(env, '`')) state.data_infix = true;
state.infix = true;
done = !valid(env, CONTEXT);
break;
case CtrEqualsFound:
done = !valid(env, TYPE_INSTANCE);
state.type_instance = true;
break;
case CtrBarFound:
done = true;
state.type_instance = false;
break;
case CtrImpossible:
done = true;
case CtrUndecided:
break;
}
reset_lookahead_to(env, state.reset);
state.reset = 0;
}
if (state.context) SEQ(finish_if_valid(env, CONTEXT, "ctr"));
if (state.infix) SEQ(finish_if_valid(env, INFIX, "ctr"));
if (state.data_infix) SEQ(finish_if_valid(env, DATA_INFIX, "ctr"));
if (state.type_instance) SEQ(finish_if_valid(env, TYPE_INSTANCE, "ctr"));
return FAIL;
}
static Symbol process_token_constraint(Env *env) {
if (
valid(env, CONTEXT)
||
valid(env, INFIX)
||
valid(env, DATA_INFIX)
||
valid(env, TYPE_INSTANCE)
)
return constraint_lookahead(env);
return FAIL;
}
static Symbol interior(Env *env, bool whitespace) {
Lexed next = lex(env, false);
dbg("interior, column %d, ws %d, token %s\n", column(env), whitespace, token_names[next]);
SEQ(resolve_semicolon(env, next));
SEQ(process_token_interior(env, next));
SEQ(process_token_symop(env, whitespace, next));
SEQ(process_token_constraint(env));
SEQ(process_token_splice(env, next));
return FAIL;
}
static Symbol pre_ws_commands(Env *env) {
SEQ(texp_context(env));
SEQ(start_brace(env));
SEQ(end_brace(env));
if (valid(env, QQ_BODY)) return qq_body(env);
if (newline_active(env)) SEQ(newline_post(env));
else if (env->state->newline.state == NResume) SEQ(newline_resume(env));
return FAIL;
}
static Symbol scan_main(Env *env) {
MARK("main");
SEQ(pre_ws_commands(env));
bool whitespace = skip_space(env);
if (is_newline(PEEK)) return newline_start(env);
else if (not_eof(env)) return interior(env, whitespace);
return FAIL;
}
#ifdef TREE_SITTER_DEBUG
static Symbol scan_debug(Env *env) {
if (debug_init(env)) return update_state("debug init parse buffer");
Symbol result = scan_main(env);
debug_finish(env, result);
return result;
}
#endif
static bool process_result(Env *env, Symbol result) {
if (result == FAIL && is_eof(env) && no_lookahead(env)) {
MARK("eof whitespace");
if (valid(env, END)) result = end_layout_unchecked(env, "eof");
else if (valid(env, SEMICOLON)) result = finish(SEMICOLON, "eof");
else {
result = force_end_context(env);
if (result == FAIL) {
dbg("eof | context cap: %d | lookahead cap: %d | parse cap: %d\n",
env->state->contexts.capacity, env->state->lookahead.capacity, env->state->parse.capacity);}
}
}
return set_result_symbol(env, result);
}
static bool scan(Env *env) {
if(after_error(env)) { dbg("error recovery\n"); return false; }
#ifdef TREE_SITTER_DEBUG
Symbol result = scan_debug(env);
#else
Symbol result = scan_main(env);
#endif
return process_result(env, result);
}
typedef struct {
unsigned contexts;
Newline newline;
#ifdef TREE_SITTER_DEBUG
unsigned parse;
#endif
} Persist;
void *tree_sitter_haskell_external_scanner_create() {
State *state = ts_calloc(1, sizeof(State));
array_reserve(&state->contexts, 8);
array_reserve(&state->lookahead, 8);
#ifdef TREE_SITTER_DEBUG
array_reserve(&state->parse, 20);
#endif
return state;
}
bool tree_sitter_haskell_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
Env env = env_new(lexer, valid_symbols, (State*) payload);
return scan(&env);
}
unsigned tree_sitter_haskell_external_scanner_serialize(void *payload, char *buffer) {
State *state = (State *) payload;
Persist persist = {.contexts = state->contexts.size, .newline = state->newline};
#ifdef TREE_SITTER_DEBUG
persist.parse = state->parse.size;
#endif
unsigned contexts_size = persist.contexts * sizeof(Context);
memcpy(buffer, &persist, sizeof(Persist));
unsigned to_copy = sizeof(Persist) + contexts_size;
if (to_copy > TREE_SITTER_SERIALIZATION_BUFFER_SIZE) return 0;
memcpy(buffer + sizeof(Persist), state->contexts.contents, contexts_size);
#ifdef TREE_SITTER_DEBUG
to_copy = serialize_parse_lines(buffer + sizeof(Persist) + contexts_size, &state->parse, to_copy);
#endif
return to_copy;
}
void tree_sitter_haskell_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
State *state = (State *) payload;
Persist p;
Persist *persist;
if (length > 0)
persist = (Persist *) buffer;
else {
p = (Persist) {.contexts = 0};
persist = &p;
persist->newline.state = NResume;
}
unsigned contexts_size = persist->contexts * sizeof(Context);
state->newline = persist->newline;
array_reserve(&state->contexts, persist->contexts);
state->contexts.size = persist->contexts;
if (length > 0)
memcpy(state->contexts.contents, buffer + sizeof(Persist), contexts_size);
state->lookahead.size = 0;
state->lookahead.offset = 0;
array_reserve(&state->lookahead, 8);
#ifdef TREE_SITTER_DEBUG
if (length > 0)
deserialize_parse_lines(buffer + sizeof(Persist) + contexts_size, &state->parse, persist->parse);
#endif
}
void tree_sitter_haskell_external_scanner_destroy(void *payload) {
State *state = (State*) payload;
#ifdef TREE_SITTER_DEBUG
palette();
ParseLines *parse = &state->parse;
for (unsigned i = 0; i < parse->size; i++) array_delete(array_get(parse, i));
array_delete(parse);
#endif
array_delete(&state->contexts);
array_delete(&state->lookahead);
ts_free(state);
}