#define _POSIX_C_SOURCE 200809L
#include "dtob_internal.h"
typedef struct {
Token *toks;
size_t count;
size_t cap;
} TokBuf;
static void tokbuf_init(TokBuf *tb)
{
tb->toks = NULL;
tb->count = 0;
tb->cap = 0;
}
static void tokbuf_push(TokBuf *tb, Token t)
{
if (tb->count >= tb->cap) {
tb->cap = tb->cap ? tb->cap * 2 : 8;
tb->toks = realloc(tb->toks, tb->cap * sizeof(Token));
}
tb->toks[tb->count++] = t;
}
static void tokbuf_free_data(TokBuf *tb)
{
for (size_t i = 0; i < tb->count; i++)
free(tb->toks[i].data);
free(tb->toks);
tb->toks = NULL;
tb->count = tb->cap = 0;
}
static void tokbuf_free_shell(TokBuf *tb)
{
free(tb->toks);
tb->toks = NULL;
tb->count = tb->cap = 0;
}
typedef struct {
Lexer *lexer;
Token current;
int has_current;
DtobTypesHeader *types;
int types_parsed;
int has_types;
const uint8_t *base_buf;
size_t base_len;
PtrCache *ptr_cache;
int ptr_cache_owned;
int is_root_parse;
Token *replay;
size_t replay_count;
size_t replay_pos;
} Parser;
static Token peek(Parser *p) {
if (!p->has_current) {
if (p->replay && p->replay_pos < p->replay_count) {
p->current = p->replay[p->replay_pos++];
} else {
p->current = lexer_next(p->lexer);
}
p->has_current = 1;
}
return p->current;
}
static Token consume(Parser *p) {
Token t = peek(p); p->has_current = 0; return t;
}
static DtobValue *parse_value(Parser *p);
static uint16_t tok_to_opcode(const Token *t)
{
if (t->type == TOK_CUSTOM) return t->custom_code;
switch (t->type) {
case TOK_T_RAW: return DTOB_CODE_RAW;
case TOK_T_FLOAT: return DTOB_CODE_FLOAT;
case TOK_T_DOUBLE: return DTOB_CODE_DOUBLE;
case TOK_T_INT8: return DTOB_CODE_INT8;
case TOK_T_INT16: return DTOB_CODE_INT16;
case TOK_T_INT32: return DTOB_CODE_INT32;
case TOK_T_INT64: return DTOB_CODE_INT64;
case TOK_T_UINT8: return DTOB_CODE_UINT8;
case TOK_T_UINT16: return DTOB_CODE_UINT16;
case TOK_T_UINT32: return DTOB_CODE_UINT32;
case TOK_T_UINT64: return DTOB_CODE_UINT64;
default: return 0;
}
}
static int register_types_from_buf(Parser *p, TokBuf *tb)
{
p->has_types = 1;
p->types_parsed = 1;
size_t i = (tb->count > 0 && tb->toks[0].type == TOK_OPEN) ? 1 : 0;
while (i < tb->count) {
Token t = tb->toks[i];
if (t.type != TOK_OPEN) {
fprintf(stderr, "dtob: expected OPEN for type def in types header\n");
goto err;
}
i++;
if (i >= tb->count || tb->toks[i].type != TOK_CUSTOM) {
fprintf(stderr, "dtob: expected type code in types header\n");
goto err;
}
uint16_t code = tb->toks[i].custom_code;
i++;
if (i >= tb->count || tb->toks[i].type != TOK_DATA) {
fprintf(stderr, "dtob: expected type name after custom code %d\n", code);
goto err;
}
Token name_tok = tb->toks[i];
char *name = malloc(name_tok.data_len + 1);
memcpy(name, name_tok.data, name_tok.data_len);
name[name_tok.data_len] = '\0';
i++;
uint16_t opcodes[15];
size_t n_opcodes = 0;
while (i < tb->count &&
tb->toks[i].type != TOK_KV_CLOSE &&
tb->toks[i].type != TOK_ARR_CLOSE) {
uint16_t op = tok_to_opcode(&tb->toks[i]);
if (op == 0) break;
if (n_opcodes >= 15) {
fprintf(stderr, "dtob: too many opcodes in type def\n");
free(name);
goto err;
}
opcodes[n_opcodes++] = op;
i++;
}
uint8_t is_struct = 0;
if (i < tb->count && tb->toks[i].type == TOK_ARR_CLOSE) {
is_struct = 1;
i++;
} else if (i < tb->count && tb->toks[i].type == TOK_KV_CLOSE) {
i++;
} else {
fprintf(stderr, "dtob: expected CLOSE_KV or CLOSE_ARR for type def\n");
free(name);
goto err;
}
if (p->types && code >= DTOB_CUSTOM_MIN) {
dtob_types_add(p->types, code, name, opcodes, n_opcodes);
if (is_struct)
p->types->entries[p->types->count - 1].is_struct = 1;
}
free(name);
}
tokbuf_free_data(tb);
return 0;
err:
tokbuf_free_data(tb);
return -1;
}
static DtobValue *interpret_arr_kv(Parser *p, TokBuf *tb, TokenType close_type)
{
Token *saved_replay = p->replay;
size_t saved_replay_count = p->replay_count;
size_t saved_replay_pos = p->replay_pos;
p->replay = tb->toks;
p->replay_count = tb->count;
p->replay_pos = 0;
p->has_current = 0;
DtobValue **items = NULL;
size_t count = 0, cap = 0;
for (;;) {
Token t = peek(p);
if (p->replay_pos >= p->replay_count && !p->has_current)
break;
if (t.type == TOK_END || t.type == TOK_ERROR)
break;
DtobValue *v = parse_value(p);
if (!v) break;
if (count >= cap) {
cap = cap ? cap * 2 : 8;
items = realloc(items, cap * sizeof(DtobValue *));
}
items[count++] = v;
}
p->replay = saved_replay;
p->replay_count = saved_replay_count;
p->replay_pos = saved_replay_pos;
p->has_current = 0;
tokbuf_free_shell(tb);
if (close_type == TOK_ARR_CLOSE) {
DtobValue *arr = ast_make(DTOB_ARRAY);
for (size_t i = 0; i < count; i++) ast_add_element(arr, items[i]);
free(items);
return arr;
}
DtobValue *kvs = ast_make(DTOB_KV_SET);
for (size_t i = 0; i < count; ) {
if (items[i]->type != DTOB_RAW) {
ast_add_element(kvs, items[i]);
i++;
continue;
} else if (i + 1 < count) {
ast_add_pair(kvs, items[i]->data, items[i]->data_len, items[i + 1]);
items[i]->data = NULL;
free(items[i]);
i += 2;
} else {
dtob_free(items[i]);
i++;
}
}
free(items);
return kvs;
}
static DtobValue *parse_collection(Parser *p)
{
int is_root = p->is_root_parse;
p->is_root_parse = 0;
TokBuf tb;
tokbuf_init(&tb);
int depth = 0;
size_t types_open_idx = 0;
for (;;) {
Token t = peek(p);
if (t.type == TOK_END || t.type == TOK_ERROR) {
fprintf(stderr, "dtob: invalid: stream ended with unclosed open\n");
tokbuf_free_data(&tb);
return NULL;
}
if (t.type == TOK_ARR_CLOSE || t.type == TOK_KV_CLOSE ||
t.type == TOK_TYPES_CLOSE) {
consume(p);
if (depth > 0) {
depth--;
if (t.type == TOK_TYPES_CLOSE && depth == 0) {
if (!is_root) {
fprintf(stderr, "dtob: types header only allowed in root collection\n");
tokbuf_free_data(&tb);
return NULL;
}
if (types_open_idx != 0) {
fprintf(stderr, "dtob: types block must be first element in root collection\n");
tokbuf_free_data(&tb);
return NULL;
}
if (register_types_from_buf(p, &tb) != 0) return NULL;
tokbuf_init(&tb);
continue;
}
tokbuf_push(&tb, t);
continue;
}
if (t.type == TOK_TYPES_CLOSE) {
if (!is_root) {
fprintf(stderr, "dtob: types header only allowed in root collection\n");
tokbuf_free_data(&tb);
return NULL;
}
tokbuf_free_data(&tb);
return ast_make(DTOB_ARRAY);
}
return interpret_arr_kv(p, &tb, t.type);
}
if (t.type == TOK_OPEN) {
consume(p);
if (is_root && depth == 0)
types_open_idx = tb.count;
tokbuf_push(&tb, t);
depth++;
continue;
}
consume(p);
tokbuf_push(&tb, t);
}
}
static DtobValue *parse_value(Parser *p)
{
Token t = peek(p);
if (t.type == TOK_OPEN) {
consume(p);
return parse_collection(p);
}
if (t.type == TOK_T_RAW ||
(t.type >= TOK_T_INT8 && t.type <= TOK_T_UINT64) ||
t.type == TOK_T_FLOAT || t.type == TOK_T_DOUBLE) {
consume(p);
DtobType dt;
if (t.type >= TOK_T_INT8 && t.type <= TOK_T_UINT64) dt = DTOB_INT;
else if (t.type == TOK_T_FLOAT || t.type == TOK_T_DOUBLE) dt = DTOB_FLOAT;
else dt = DTOB_RAW;
DtobValue *v = ast_make(dt);
int is_unsigned = (t.type >= TOK_T_UINT8 && t.type <= TOK_T_UINT64);
Token d = peek(p);
if (d.type == TOK_ERROR) {
dtob_free(v);
return NULL;
}
if (d.type == TOK_DATA) {
d = consume(p);
if (is_unsigned && d.data_len > 0 && (d.data[0] & 0x80)) {
v->data = malloc(d.data_len + 1);
v->data[0] = 0x00;
memcpy(v->data + 1, d.data, d.data_len);
v->data_len = d.data_len + 1;
free(d.data);
} else {
v->data = d.data;
v->data_len = d.data_len;
}
}
return v;
}
if (t.type == TOK_CUSTOM) {
uint16_t code = t.custom_code;
consume(p);
if (!p->has_types) {
fprintf(stderr, "dtob: custom code %d used without types header\n", code);
return NULL;
}
DtobCustomType *ct = p->types ? dtob_types_get(p->types, code) : NULL;
if (!ct) {
fprintf(stderr, "dtob: undefined custom type %d\n", code);
return NULL;
}
DtobValue *v = ast_make(DTOB_CUSTOM);
v->custom_code = code;
if (ct->is_struct) {
Token open_tok = peek(p);
if (open_tok.type != TOK_OPEN) {
fprintf(stderr, "dtob: struct type %d: expected OPEN\n", code);
dtob_free(v);
return NULL;
}
consume(p);
uint8_t seen[15] = {0};
while (1) {
Token next = peek(p);
if (next.type == TOK_ARR_CLOSE) {
consume(p);
break;
}
if (next.type == TOK_END || next.type == TOK_ERROR) {
fprintf(stderr, "dtob: struct type %d: unexpected end\n", code);
dtob_free(v);
return NULL;
}
uint16_t mop = tok_to_opcode(&next);
if (mop == 0) {
fprintf(stderr, "dtob: struct type %d: expected member opcode\n", code);
dtob_free(v);
return NULL;
}
int found = 0;
for (size_t si = 0; si < ct->n_opcodes; si++) {
if (ct->opcodes[si] == mop && !seen[si]) { seen[si] = 1; found = 1; break; }
}
if (!found) {
fprintf(stderr, "dtob: struct type %d: unexpected opcode %d\n", code, mop);
dtob_free(v);
return NULL;
}
DtobValue *member = parse_value(p);
if (!member) { dtob_free(v); return NULL; }
ast_add_element(v, member);
}
for (size_t si = 0; si < ct->n_opcodes; si++) {
if (!seen[si]) {
fprintf(stderr, "dtob: struct type %d: missing opcode %d\n",
code, ct->opcodes[si]);
dtob_free(v);
return NULL;
}
}
} else if (ct->n_opcodes == 0) {
v->inner_code = 0;
} else if (ct->n_opcodes == 1) {
v->inner_code = ct->opcodes[0];
int expected = dtob_opcode_data_size(ct->opcodes[0]);
if (expected == -2 && p->types) {
DtobCustomType *inner_ct = dtob_types_get(p->types, ct->opcodes[0]);
if (inner_ct && inner_ct->n_opcodes == 0)
expected = 0;
else
expected = -1;
}
Token d = peek(p);
if (d.type == TOK_DATA) {
d = consume(p);
if (expected > 0 && (int)d.data_len != expected) {
fprintf(stderr, "dtob: custom type %d: expected %d bytes, got %zu\n",
code, expected, d.data_len);
free(d.data);
dtob_free(v);
return NULL;
}
v->data = d.data;
v->data_len = d.data_len;
} else if (expected > 0) {
fprintf(stderr, "dtob: custom type %d: expected data payload\n", code);
dtob_free(v);
return NULL;
}
} else {
Token inner = peek(p);
uint16_t iop = tok_to_opcode(&inner);
if (iop == 0) {
fprintf(stderr, "dtob: custom type %d: expected inner opcode\n", code);
dtob_free(v);
return NULL;
}
consume(p);
v->inner_code = iop;
DtobCustomType *inner_ct = p->types ? dtob_types_get(p->types, iop) : NULL;
if (inner_ct && inner_ct->is_struct) {
Token open_tok = peek(p);
if (open_tok.type != TOK_OPEN) {
fprintf(stderr, "dtob: enum type %d inner struct %d: expected OPEN\n", code, iop);
dtob_free(v);
return NULL;
}
consume(p);
while (1) {
Token next = peek(p);
if (next.type == TOK_ARR_CLOSE) {
consume(p);
break;
}
if (next.type == TOK_END || next.type == TOK_ERROR) {
fprintf(stderr, "dtob: enum type %d inner struct %d: unexpected end\n", code, iop);
dtob_free(v);
return NULL;
}
DtobValue *member = parse_value(p);
if (!member) { dtob_free(v); return NULL; }
ast_add_element(v, member);
}
} else {
int expected = dtob_opcode_data_size(iop);
if (expected == -2 && p->types) {
if (inner_ct && inner_ct->n_opcodes == 0)
expected = 0;
else
expected = -1;
}
if (expected != 0) {
Token d = peek(p);
if (d.type == TOK_DATA) {
d = consume(p);
if (expected > 0 && (int)d.data_len != expected) {
fprintf(stderr, "dtob: custom type %d inner %d: expected %d bytes, got %zu\n",
code, iop, expected, d.data_len);
free(d.data);
dtob_free(v);
return NULL;
}
v->data = d.data;
v->data_len = d.data_len;
} else if (expected > 0) {
fprintf(stderr, "dtob: custom type %d: expected data for inner opcode %d\n",
code, iop);
dtob_free(v);
return NULL;
}
}
}
}
return v;
}
if (t.type == TOK_DATA) {
Token d = consume(p);
DtobValue *v = ast_make(DTOB_RAW);
v->data = d.data;
v->data_len = d.data_len;
return v;
}
if (t.type != TOK_END)
fprintf(stderr, "dtob: unexpected token where value expected\n");
return NULL;
}
DtobValue *dtob_decode(const uint8_t *buf, size_t len)
{
return dtob_decode_with_types(buf, len, NULL);
}
DtobValue *dtob_decode_with_types(const uint8_t *buf, size_t len,
DtobTypesHeader *out_types)
{
if (len < DTOB_MAGIC_LEN || memcmp(buf, DTOB_MAGIC, DTOB_MAGIC_LEN) != 0) {
fprintf(stderr, "dtob: missing or invalid magic number\n");
return NULL;
}
buf += DTOB_MAGIC_LEN;
len -= DTOB_MAGIC_LEN;
Lexer lexer;
lexer_init(&lexer, buf, len);
DtobTypesHeader local_types;
dtob_types_init(&local_types);
PtrCache ptr_cache;
ptrcache_init(&ptr_cache);
Parser parser = {
.lexer = &lexer,
.current = { 0 },
.has_current = 0,
.types = out_types ? out_types : &local_types,
.types_parsed = 0,
.has_types = 0,
.base_buf = buf,
.base_len = len,
.ptr_cache = &ptr_cache,
.ptr_cache_owned = 1,
.is_root_parse = 1,
.replay = NULL, .replay_count = 0, .replay_pos = 0
};
DtobValue *result = parse_value(&parser);
ptrcache_free(&ptr_cache);
if (!out_types) {
for (size_t i = 0; i < local_types.count; i++)
free(local_types.entries[i].name);
}
return result;
}
int dtob_verify_file_types(const char *path, const DtobTypesHeader *types, int strict)
{
if (!strict) return 1;
if (!types || types->count == 0) return 1;
FILE *fp = fopen(path, "rb");
if (!fp) return 1;
size_t cap = 2048;
size_t len = 0;
uint8_t *buf = malloc(cap);
int found = 0;
while (1) {
if (len + 2048 > cap) {
cap *= 2;
buf = realloc(buf, cap);
}
size_t n = fread(buf + len, 1, 2048, fp);
if (n == 0) break;
size_t end = len + n;
for (size_t i = len; i < end - 1; i++) {
if (buf[i] == 0xC0 && buf[i+1] == 0x03) {
len = i + 2;
found = 1;
break;
}
}
if (found) break;
len = end;
}
fclose(fp);
if (!found) {
free(buf);
return 0;
}
DtobTypesHeader file_th;
dtob_types_init(&file_th);
DtobValue *dummy = dtob_decode_with_types(buf, len, &file_th);
if (dummy) dtob_free(dummy);
free(buf);
int valid = 1;
for (size_t i = 0; i < types->count; i++) {
uint16_t code = types->entries[i].code;
if (!dtob_types_get(&file_th, code)) {
fprintf(stderr, "dtob: verification failed! Target type %u is NOT identically declared inside %s!\n", code, path);
valid = 0;
break;
}
}
for (size_t i = 0; i < file_th.count; i++) {
free(file_th.entries[i].name);
}
return valid;
}