#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "tree_sitter/parser.h"
#include "tree_sitter/alloc.h"
#include "tree_sitter/array.h"
enum TokenType {
DOCUMENT_STRUCTURE_COMMENT,
LITERAL_STRING,
HEXADECIMAL_STRING,
BASE85_STRING,
NUMERIC,
};
typedef struct ScannerState {
int paren_level;
} ScannerState;
static void skip_whitespace(TSLexer *lexer) {
while ((!lexer->eof(lexer)) && (isspace(lexer->lookahead))) {
lexer->advance(lexer, true);
}
}
static bool isdelimiter(char ch) {
switch (ch) {
case U'%':
case U'/':
case U'(':
case U')':
case U'[':
case U']':
case U'{':
case U'}':
case U'<':
case U'>':
return true;
default:
return isspace(ch);
}
}
static bool document_structure_comment(TSLexer *lexer) {
bool found_dsc = false;
skip_whitespace(lexer);
if (lexer->eof(lexer)) return false;
if ((lexer->lookahead == U'%') && (lexer->get_column(lexer) == 0)) {
lexer->advance(lexer, false);
if (lexer->lookahead == U'!') {
lexer->advance(lexer, false);
found_dsc = true;
}
else if (lexer->lookahead == U'%') {
lexer->advance(lexer, false);
found_dsc = (lexer->lookahead == U'+') ||
(lexer->lookahead == U'?') ||
(isalpha(lexer->lookahead) && isupper(lexer->lookahead));
}
while ((!lexer->eof(lexer)) &&
(lexer->lookahead != U'\r') && (lexer->lookahead != U'\n')) {
lexer->advance(lexer, false);
}
}
return found_dsc;
}
static bool literal_string(TSLexer *lexer, ScannerState *state) {
for(bool has_content=false;; has_content=true) {
if (lexer->eof(lexer)) return false;
switch (lexer->lookahead) {
case U'\\':
return has_content;
break;
case U'(':
state->paren_level++; break;
case U')':
if (state->paren_level == 0) return has_content;
state->paren_level--;
break;
default:
break;
}
lexer->advance(lexer, false);
}
}
static bool hexadecimal_string(TSLexer *lexer) {
bool has_content = false;
skip_whitespace(lexer);
while ((!lexer->eof(lexer)) && (isxdigit(lexer->lookahead))) {
lexer->advance(lexer, false);
has_content = true;
}
return has_content;
}
static bool base85_string(TSLexer *lexer) {
skip_whitespace(lexer);
for(bool has_content=false;; has_content=true) {
if (lexer->eof(lexer)) return false;
if ((lexer->lookahead != 122) &&
((lexer->lookahead < 33) || (lexer->lookahead > 117)))
return has_content;
lexer->advance(lexer, false);
}
}
static bool numeric(TSLexer *lexer) {
bool has_sign = false;
bool valid_number = true;
bool valid_radix = false;
bool valid_exponent = false;
unsigned int base = 0;
typedef enum NumericState {
SIGN,
INTEGER,
RADIX,
REAL,
MAGNITUDE,
EXPONENT,
} NumericState;
skip_whitespace(lexer);
if (lexer->eof(lexer)) return false;
for(NumericState state=SIGN;;) {
switch (state) {
case SIGN:
if ((lexer->lookahead == U'+') || (lexer->lookahead == U'-')) {
lexer->advance(lexer, false);
has_sign = true;
}
else if (isdigit(lexer->lookahead)) {
state = INTEGER;
}
else if (lexer->lookahead == U'.') {
lexer->advance(lexer, false);
state = REAL;
}
else {
return false;
}
break;
case INTEGER:
if ((lexer->eof(lexer)) || (isdelimiter(lexer->lookahead))) {
return valid_number;
}
else if (isdigit(lexer->lookahead)) {
if (base <= 36) {
base *= 10;
base += (lexer->lookahead - U'0');
}
lexer->advance(lexer, false);
valid_number = true;
}
else if (lexer->lookahead == U'#') {
lexer->advance(lexer, false);
state = RADIX;
}
else if (lexer->lookahead == U'.') {
lexer->advance(lexer, false);
state = REAL;
}
else if (tolower(lexer->lookahead) == U'e') {
lexer->advance(lexer, false);
state = MAGNITUDE;
}
else {
return false;
}
break;
case RADIX:
if ((base < 2) || (base > 36) || has_sign) return false;
if ((lexer->eof(lexer)) || (isdelimiter(lexer->lookahead))) {
return valid_radix;
}
else if (isdigit(lexer->lookahead)) {
if ((lexer->lookahead - U'0') > base) return false;
lexer->advance(lexer, false);
valid_radix = true;
}
else if (isalpha(lexer->lookahead)) {
if ((tolower(lexer->lookahead) - U'a' + 10) > base) return false;
lexer->advance(lexer, false);
valid_radix = true;
}
else {
return false;
}
break;
case REAL:
if ((lexer->eof(lexer)) || (isdelimiter(lexer->lookahead))) {
return valid_number;
}
else if (isdigit(lexer->lookahead)) {
lexer->advance(lexer, false);
valid_number = true;
}
else if (tolower(lexer->lookahead) == U'e') {
lexer->advance(lexer, false);
state = MAGNITUDE;
}
else {
return false;
}
break;
case MAGNITUDE:
if ((lexer->lookahead == U'+') || (lexer->lookahead == U'-')) {
lexer->advance(lexer, false);
}
state = EXPONENT;
case EXPONENT:
if ((lexer->eof(lexer)) || (isdelimiter(lexer->lookahead))) {
return valid_number && valid_exponent;
}
else if (isdigit(lexer->lookahead)) {
lexer->advance(lexer, false);
valid_exponent = true;
}
else {
return false;
}
break;
}
}
}
void *tree_sitter_postscript_external_scanner_create() {
ScannerState *state = ts_malloc(sizeof(ScannerState));
if (state) {
state->paren_level = 0;
}
return state;
}
void tree_sitter_postscript_external_scanner_destroy(void *payload) {
ScannerState *state = (ScannerState*)payload;
if (state) {
ts_free(state);
}
}
unsigned tree_sitter_postscript_external_scanner_serialize(void *payload, char *buffer) {
ScannerState *state = (ScannerState*)payload;
unsigned length = 0; unsigned objsiz;
objsiz = sizeof(*state);
memcpy(buffer, payload, objsiz);
buffer += objsiz;
length += objsiz;
return length;
}
void tree_sitter_postscript_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
ScannerState *state = (ScannerState*)payload;
unsigned objsiz;
state->paren_level = 0;
if (length >= sizeof(ScannerState)) {
objsiz = sizeof(ScannerState);
memcpy(payload, buffer, objsiz);
buffer += objsiz;
length -= objsiz;
}
}
bool tree_sitter_postscript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
ScannerState *state = (ScannerState*)payload;
if (valid_symbols[DOCUMENT_STRUCTURE_COMMENT]) {
if (document_structure_comment(lexer)) {
lexer->result_symbol = DOCUMENT_STRUCTURE_COMMENT;
return true;
}
}
if (valid_symbols[LITERAL_STRING]) {
if (literal_string(lexer, state)) {
lexer->result_symbol = LITERAL_STRING;
return true;
}
}
if (valid_symbols[HEXADECIMAL_STRING]) {
if (hexadecimal_string(lexer)) {
lexer->result_symbol = HEXADECIMAL_STRING;
return true;
}
}
if (valid_symbols[BASE85_STRING]) {
if (base85_string(lexer)) {
lexer->result_symbol = BASE85_STRING;
return true;
}
}
if (valid_symbols[NUMERIC]) {
if (numeric(lexer)) {
lexer->result_symbol = NUMERIC;
return true;
}
}
return false;
}