dtob-sys 0.1.0

Raw FFI bindings to the dtob C library (encoder + decoder).
Documentation
#include "dtob_internal.h"

void lexer_init(Lexer *l, const uint8_t *buf, size_t len)
{
    l->buf = buf;
    l->len = len;
    l->pos = 0;
}

Token lexer_next(Lexer *l)
{
    Token tok = { TOK_ERROR, 0, NULL, 0 };

top:
    if (l->pos >= l->len) { tok.type = TOK_END; return tok; }

    uint8_t b = l->buf[l->pos];

    /* control word: top two bits of first byte are 11 */
    if (DTOB_IS_CTRL(b)) {
        l->pos++;
        if (l->pos >= l->len) { tok.type = TOK_ERROR; return tok; }
        uint8_t b2 = l->buf[l->pos];
        l->pos++;
        uint16_t code = ((uint16_t)(b & 0x3F) << 8) | b2;

        /* blast (code 16383 = 0xFF 0xFF): silently skip */
        if (code == DTOB_CODE_BLAST) goto top;

        switch (code) {
        case DTOB_CODE_OPEN:        tok.type = TOK_OPEN;        return tok;
        case DTOB_CODE_ARR_CLOSE:   tok.type = TOK_ARR_CLOSE;   return tok;
        case DTOB_CODE_KV_CLOSE:    tok.type = TOK_KV_CLOSE;    return tok;
        case DTOB_CODE_TYPES_CLOSE: tok.type = TOK_TYPES_CLOSE; return tok;
        case DTOB_CODE_UQT:
            fprintf(stderr, "dtob: uqt (opcode 4) unimplemented\n");
            return tok; /* TOK_ERROR */

        /* data types (5) */
        case DTOB_CODE_RAW:         tok.type = TOK_T_RAW;       return tok;

        /* float types (6-7) */
        case DTOB_CODE_FLOAT:       tok.type = TOK_T_FLOAT;     return tok;
        case DTOB_CODE_DOUBLE:      tok.type = TOK_T_DOUBLE;    return tok;

        /* integer types (8-15) */
        case DTOB_CODE_INT8:        tok.type = TOK_T_INT8;      return tok;
        case DTOB_CODE_INT16:       tok.type = TOK_T_INT16;     return tok;
        case DTOB_CODE_INT32:       tok.type = TOK_T_INT32;     return tok;
        case DTOB_CODE_INT64:       tok.type = TOK_T_INT64;     return tok;
        case DTOB_CODE_UINT8:       tok.type = TOK_T_UINT8;     return tok;
        case DTOB_CODE_UINT16:      tok.type = TOK_T_UINT16;    return tok;
        case DTOB_CODE_UINT32:      tok.type = TOK_T_UINT32;    return tok;
        case DTOB_CODE_UINT64:      tok.type = TOK_T_UINT64;    return tok;

        default:
            /* codes 16+: convention + custom */
            if (code >= 16 && code <= DTOB_CUSTOM_MAX) {
                tok.type = TOK_CUSTOM;
                tok.custom_code = code;
                return tok;
            }
            return tok; /* error: unknown code */
        }
    }

    /* data: read bytes until we hit a control byte OR a byte with
     * 11 padding (which terminates this data payload).
     *
     * Once a 11 pair appears within a data byte, all subsequent
     * pairs in that byte must also be 11.  After a padded byte,
     * the next byte must be a control byte (top two bits = 11). */
    size_t start = l->pos;

    while (l->pos < l->len && !DTOB_IS_CTRL(l->buf[l->pos])) {
        uint8_t db = l->buf[l->pos];
        l->pos++;

        /* scan the four trit pairs (bits 7-6, 5-4, 3-2, 1-0) */
        int in_pad = 0;
        for (int shift = 6; shift >= 0; shift -= 2) {
            uint8_t pair = (db >> shift) & 3;
            if (pair == 3) {
                in_pad = 1;
            } else if (in_pad) {
                return tok;  /* TOK_ERROR: data after padding (e.g. raw content bytes) */
            }
        }
        if (in_pad) break;  /* this byte is the last of this data payload */
    }

    size_t raw_len = l->pos - start;

    /* decode the trit-encoded, padded data */
    uint8_t *decoded = NULL;
    size_t decoded_len = trit_decode_padded(l->buf + start, raw_len, &decoded);

    if (!decoded && raw_len > 0) return tok;  /* trit decode error */

    tok.type     = TOK_DATA;
    tok.data     = decoded;
    tok.data_len = decoded_len;
    return tok;
}