#include "dtob_internal.h"
void lexer_init(Lexer *l, const uint8_t *buf, size_t len)
{
l->buf = buf;
l->len = len;
l->pos = 0;
}
Token lexer_next(Lexer *l)
{
Token tok = { TOK_ERROR, 0, NULL, 0 };
top:
if (l->pos >= l->len) { tok.type = TOK_END; return tok; }
uint8_t b = l->buf[l->pos];
if (DTOB_IS_CTRL(b)) {
l->pos++;
if (l->pos >= l->len) { tok.type = TOK_ERROR; return tok; }
uint8_t b2 = l->buf[l->pos];
l->pos++;
uint16_t code = ((uint16_t)(b & 0x3F) << 8) | b2;
if (code == DTOB_CODE_BLAST) goto top;
switch (code) {
case DTOB_CODE_OPEN: tok.type = TOK_OPEN; return tok;
case DTOB_CODE_ARR_CLOSE: tok.type = TOK_ARR_CLOSE; return tok;
case DTOB_CODE_KV_CLOSE: tok.type = TOK_KV_CLOSE; return tok;
case DTOB_CODE_TYPES_CLOSE: tok.type = TOK_TYPES_CLOSE; return tok;
case DTOB_CODE_UQT:
fprintf(stderr, "dtob: uqt (opcode 4) unimplemented\n");
return tok;
case DTOB_CODE_RAW: tok.type = TOK_T_RAW; return tok;
case DTOB_CODE_FLOAT: tok.type = TOK_T_FLOAT; return tok;
case DTOB_CODE_DOUBLE: tok.type = TOK_T_DOUBLE; return tok;
case DTOB_CODE_INT8: tok.type = TOK_T_INT8; return tok;
case DTOB_CODE_INT16: tok.type = TOK_T_INT16; return tok;
case DTOB_CODE_INT32: tok.type = TOK_T_INT32; return tok;
case DTOB_CODE_INT64: tok.type = TOK_T_INT64; return tok;
case DTOB_CODE_UINT8: tok.type = TOK_T_UINT8; return tok;
case DTOB_CODE_UINT16: tok.type = TOK_T_UINT16; return tok;
case DTOB_CODE_UINT32: tok.type = TOK_T_UINT32; return tok;
case DTOB_CODE_UINT64: tok.type = TOK_T_UINT64; return tok;
default:
if (code >= 16 && code <= DTOB_CUSTOM_MAX) {
tok.type = TOK_CUSTOM;
tok.custom_code = code;
return tok;
}
return tok;
}
}
size_t start = l->pos;
while (l->pos < l->len && !DTOB_IS_CTRL(l->buf[l->pos])) {
uint8_t db = l->buf[l->pos];
l->pos++;
int in_pad = 0;
for (int shift = 6; shift >= 0; shift -= 2) {
uint8_t pair = (db >> shift) & 3;
if (pair == 3) {
in_pad = 1;
} else if (in_pad) {
return tok;
}
}
if (in_pad) break;
}
size_t raw_len = l->pos - start;
uint8_t *decoded = NULL;
size_t decoded_len = trit_decode_padded(l->buf + start, raw_len, &decoded);
if (!decoded && raw_len > 0) return tok;
tok.type = TOK_DATA;
tok.data = decoded;
tok.data_len = decoded_len;
return tok;
}