#include "tree_sitter/parser.h"
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wctype.h>
enum TokenType {
END_FILE,
COMMENT,
DIRECTIVE, L_INT,
L_FLOAT,
L_STRING, NOT_IN,
NOT_IS,
};
static bool
is_eol(int c)
{
return ((c == '\n') || (c == '\r') || (c == 0x2028) || (c == 0x2029));
}
static void
match_string_suffix(TSLexer *lexer)
{
int c = lexer->lookahead;
if ((c == 'c') || (c == 'd') || (c == 'w')) {
lexer->advance(lexer, false);
}
lexer->mark_end(lexer);
}
static bool
match_delimited_string(TSLexer *lexer, int start, int end)
{
int c;
int nest = 0;
bool first = true;
lexer->advance(lexer, false); while ((c = lexer->lookahead) != 0) {
if (c == start && start != 0) {
nest++;
}
if (c == end) {
if (nest > 0) {
nest--;
} else if (!first) {
lexer->advance(lexer, false);
if ((c = lexer->lookahead) != '"') {
continue;
}
lexer->advance(lexer, false);
lexer->result_symbol = L_STRING;
match_string_suffix(lexer);
return (true);
}
}
first = false;
lexer->advance(lexer, false);
}
return (false);
}
static bool
match_heredoc_string(TSLexer *lexer)
{
int identifier[256 + 2]; size_t i = 0;
size_t j;
int c;
while (i < (sizeof(identifier) - 2)) {
c = lexer->lookahead;
if (is_eol(c) || ((!iswalnum(c)) && (c != '_'))) {
break;
}
identifier[i++] = c;
lexer->advance(lexer, false);
}
if (i == 0) {
return (false);
}
identifier[i++] = '"';
identifier[i] = 0;
while ((c = lexer->lookahead) != 0) {
while ((!is_eol(c)) && (c != 0)) {
lexer->advance(lexer, false);
c = lexer->lookahead;
}
lexer->advance(lexer, false);
j = 0;
while (((c = lexer->lookahead) != 0) && (j < i)) {
if (c != identifier[j]) {
break;
}
lexer->advance(lexer, false);
j++;
}
if (j == i) {
match_string_suffix(lexer);
lexer->result_symbol = L_STRING;
return (true);
}
}
return (false);
}
static bool
match_eof(TSLexer *lexer)
{
const char *want = "__EOF__";
int i = 0;
int l = strlen(want);
int c;
if ((c = lexer->lookahead) != '\x1a') { for (i = 0; i < l; i++) {
if (lexer->lookahead != want[i]) {
return (false);
}
lexer->advance(lexer, false);
c = lexer->lookahead;
}
if (iswalnum(c) || (c == '_') || (c > 0x7f && !is_eol(c))) {
return (false);
}
}
while (lexer->lookahead != 0) {
lexer->advance(lexer, false);
}
lexer->mark_end(lexer);
lexer->result_symbol = END_FILE;
return (true);
}
static bool
match_directive(TSLexer *lexer, const bool *valid)
{
int c = lexer->lookahead;
assert(c == '#');
if (!valid[DIRECTIVE]) {
return (false);
}
lexer->advance(lexer, false);
c = lexer->lookahead;
if (c == '!') {
return (false);
}
while ((iswspace(c) || is_eol(c)) && (c)) {
if (is_eol(c)) {
return (false);
}
lexer->advance(lexer, false);
c = lexer->lookahead;
}
while ((!is_eol(c)) && (c)) {
lexer->advance(lexer, false);
c = lexer->lookahead;
}
lexer->advance(lexer, false);
lexer->mark_end(lexer);
lexer->result_symbol = DIRECTIVE;
return (true);
}
static bool
match_line_comment(TSLexer *lexer, const bool *valid)
{
int c = lexer->lookahead;
assert(c == '/');
if (!valid[COMMENT]) {
return (false);
}
while ((!is_eol(c)) && (c)) {
lexer->advance(lexer, false);
c = lexer->lookahead;
}
lexer->mark_end(lexer);
lexer->result_symbol = COMMENT;
return (true);
}
static bool
match_block_comment(TSLexer *lexer, const bool *valid)
{
int c = lexer->lookahead;
assert(c == '*');
if (!valid[COMMENT]) {
return (false);
}
int state = 0;
while (c != 0) {
lexer->advance(lexer, false);
c = lexer->lookahead;
switch (state) {
case 0:
if (c == '*') {
state = 1;
}
break;
case 1:
if (c == '/') {
lexer->advance(lexer, false);
lexer->mark_end(lexer);
lexer->result_symbol = COMMENT;
return (true);
} else if (c != '*') {
state = 0;
}
break;
}
}
return (false); }
static bool
match_nest_comment(TSLexer *lexer, const bool *valid)
{
int c = lexer->lookahead;
int nest = 1;
int prev = 0;
assert(c == '+');
if (!valid[COMMENT]) {
return (false);
}
while (!lexer->eof(lexer)) {
lexer->advance(lexer, false);
c = lexer->lookahead;
switch (prev) {
case '/':
if (c == '+') {
nest++;
c = 0;
}
break;
case '+':
if (c == '/') {
nest--;
if (nest == 0) {
lexer->advance(lexer, false);
lexer->mark_end(lexer);
lexer->result_symbol = COMMENT;
return (true);
}
c = 0;
}
}
prev = c;
}
return (false);
}
static bool
match_number_suffix(TSLexer *lexer, const bool *valid, bool is_float)
{
int c;
bool seen_l = false;
bool seen_i = false;
bool seen_u = false;
;
bool seen_f = false;
int tok = 0;
bool done = false;
while (((c = lexer->lookahead) != 0) && !done) {
switch (c) {
case 'u':
case 'U': if (seen_u || seen_i || seen_f || is_float) {
return (false);
}
seen_u = true;
tok = L_INT;
break;
case 'f':
case 'F':
if (seen_u || seen_f || seen_i) {
return (false);
}
seen_f = true;
tok = L_FLOAT;
break;
case 'i':
if (seen_i || seen_u) {
return (false);
}
tok = L_FLOAT;
seen_i = true;
break;
case 'L':
if (seen_l || seen_f || seen_i) {
return (false);
}
seen_l = true;
break;
default:
done = true;
break;
}
if (!done) {
lexer->advance(lexer, false);
}
}
if (iswalnum(c) || (c > 0x7f && !is_eol(c))) {
return (false);
}
if (is_float) {
tok = L_FLOAT;
}
if (valid[L_INT] && tok != L_FLOAT) {
lexer->result_symbol = L_INT;
lexer->mark_end(lexer);
return (true);
}
if (valid[L_FLOAT] && tok != L_INT) {
lexer->result_symbol = L_FLOAT;
lexer->mark_end(lexer);
return (true);
}
return (false);
}
static bool
match_number(TSLexer *lexer, const bool *valid)
{
int c = lexer->lookahead;
int next;
int prev;
bool is_hex = false;
bool is_bin = false;
bool has_digit = false;
bool has_dot = false;
bool in_exp = false;
if (c == '.') {
lexer->advance(lexer, false);
c = lexer->lookahead;
if (!iswdigit(c)) {
return (false);
}
has_dot = true;
} else if (c == '0') {
lexer->advance(lexer, false);
c = lexer->lookahead;
switch (c) {
case 'b':
case 'B':
is_bin = true;
lexer->advance(lexer, false);
break;
case 'x':
case 'X':
is_hex = true;
lexer->advance(lexer, false);
break;
default:
has_digit = true;
break;
}
}
if (!(valid[L_INT] || valid[L_FLOAT])) {
return (false);
}
bool done = false;
while (((next = lexer->lookahead) != 0) && (!done)) {
prev = c;
c = next;
if ((c > 0x7f) || iswspace(c) || (c == ';')) {
break;
}
if ((is_bin) && ((c == '0') || (c == '1'))) {
lexer->advance(lexer, false);
lexer->mark_end(lexer);
has_digit = true;
continue;
} else if (iswdigit(c) ||
(is_hex && (!in_exp) && (iswxdigit(c)))) {
lexer->advance(lexer, false);
lexer->mark_end(lexer);
has_digit = true;
continue;
}
switch (c) {
case '.':
if (!has_digit || has_dot || in_exp || is_bin) {
lexer->mark_end(lexer);
done = true;
break;
}
lexer->mark_end(lexer);
lexer->advance(lexer, false);
c = lexer->lookahead;
if (iswdigit(c) || (is_hex && iswxdigit(c))) {
has_dot = true;
continue;
}
if (iswalnum(c) || c == '_' || c == '.' ||
(c > 0x7f && !is_eol(c))) {
lexer->result_symbol = L_INT;
return (valid[L_INT]);
}
lexer->result_symbol = L_FLOAT;
lexer->mark_end(lexer);
return (valid[L_FLOAT]);
case '_':
lexer->advance(lexer, false);
continue;
case 'e':
case 'E':
case 'p':
case 'P':
if (in_exp || is_bin) {
return (false);
}
if (is_hex && (c == 'e' || c == 'E')) {
return (false);
}
if ((!is_hex) && (c == 'p' || c == 'P')) {
return (false);
}
lexer->advance(lexer, false);
c = lexer->lookahead;
if ((c == '+') || (c == '-')) {
lexer->advance(lexer, false);
}
has_digit = false; in_exp = true;
continue;
default:
done = true;
break;
}
}
if (!has_digit) {
return (false);
}
return (match_number_suffix(lexer, valid, has_dot || in_exp));
}
static bool
match_not_in_is(TSLexer *lexer, const bool *valid)
{
int c;
int token;
if (!valid[NOT_IN] && !valid[NOT_IS]) {
return (false);
}
assert(lexer->lookahead == '!');
lexer->advance(lexer, false);
while ((c = lexer->lookahead) != 0) {
if (!iswspace(c) && !is_eol(c)) {
break;
}
lexer->advance(lexer, false);
}
if (lexer->lookahead != 'i') {
return (false);
}
lexer->advance(lexer, false);
switch (lexer->lookahead) {
case 'n':
token = NOT_IN;
break;
case 's':
token = NOT_IS;
break;
default:
return (false);
}
if (!valid[token]) {
return (false);
}
lexer->advance(lexer, false);
c = lexer->lookahead;
if (iswalnum(c) || ((c > 0x7F) && (!is_eol(c)))) {
return (false);
}
lexer->result_symbol = token;
lexer->mark_end(lexer);
return (true);
}
void *
tree_sitter_d_external_scanner_create()
{
return (NULL);
}
void
tree_sitter_d_external_scanner_destroy(void *arg)
{
}
unsigned
tree_sitter_d_external_scanner_serialize(void *arg, char *buffer)
{
return (0); }
void
tree_sitter_d_external_scanner_deserialize(
void *arg, const char *buffer, unsigned length)
{
}
bool
tree_sitter_d_external_scanner_scan(
void *arg, TSLexer *lexer, const bool *valid)
{
int c = lexer->lookahead;
bool start_of_line = lexer->get_column(lexer) == 0;
while ((iswspace(c) || is_eol(c)) && (c)) {
if (is_eol(c)) {
start_of_line = true;
}
lexer->advance(lexer, true);
c = lexer->lookahead;
}
if (c == '#' && start_of_line) {
return (match_directive(lexer, valid));
}
start_of_line = false;
if (lexer->eof(lexer)) { return (false);
}
if ((c == '_') || (c == '\x1A')) {
return (match_eof(lexer));
}
if (c == '.' || isdigit(c)) {
return (match_number(lexer, valid));
}
if (c == '!') {
return (match_not_in_is(lexer, valid));
}
if ((c == 'q') && (valid[L_STRING])) {
lexer->advance(lexer, false);
if (lexer->lookahead != '"') {
return (false);
}
lexer->advance(lexer, false);
switch ((c = lexer->lookahead)) {
case '(':
return (match_delimited_string(lexer, '(', ')'));
case '[':
return (match_delimited_string(lexer, '[', ']'));
case '{':
return (match_delimited_string(lexer, '{', '}'));
case '<':
return (match_delimited_string(lexer, '<', '>'));
default:;
if (iswalnum(c) || c == '_') {
return (match_heredoc_string(lexer));
}
return (match_delimited_string(lexer, 0, c));
}
}
if (c == '/') {
lexer->advance(lexer, false);
c = lexer->lookahead;
if (c == '/') {
return (match_line_comment(lexer, valid));
}
if (c == '*') {
return (match_block_comment(lexer, valid));
}
if (c == '+') {
return (match_nest_comment(lexer, valid));
}
return (false);
}
return (false);
}