dtob-sys 0.1.1

Raw FFI bindings to the dtob C library (encoder + decoder).
Documentation
#define _POSIX_C_SOURCE 200809L
#include "dtob_internal.h"
#include <ctype.h>
#include <math.h>
#include <float.h>

/* ================================================================== */
/*  Minimal JSON parser (no dependencies)                             */
/* ================================================================== */

typedef struct {
    const char *src;
    size_t      pos;
    size_t      len;
} JsonReader;

static void jr_skip_ws(JsonReader *r)
{
    while (r->pos < r->len && isspace((unsigned char)r->src[r->pos]))
        r->pos++;
}

static int jr_peek(JsonReader *r) {
    jr_skip_ws(r);
    return r->pos < r->len ? r->src[r->pos] : -1;
}

static int jr_consume(JsonReader *r) {
    jr_skip_ws(r);
    return r->pos < r->len ? r->src[r->pos++] : -1;
}

static int jr_expect(JsonReader *r, char c) {
    if (jr_consume(r) != c) return 0;
    return 1;
}

static int jr_match(JsonReader *r, const char *word) {
    size_t wlen = strlen(word);
    jr_skip_ws(r);
    if (r->pos + wlen > r->len) return 0;
    if (memcmp(r->src + r->pos, word, wlen) != 0) return 0;
    r->pos += wlen;
    return 1;
}

/* Parse a JSON string, returns malloc'd UTF-8 bytes and length */
static char *jr_parse_string(JsonReader *r, size_t *out_len)
{
    if (jr_consume(r) != '"') return NULL;

    size_t cap = 64, len = 0;
    char *buf = malloc(cap);

    while (r->pos < r->len) {
        char c = r->src[r->pos++];
        if (c == '"') {
            buf[len] = '\0';
            *out_len = len;
            return buf;
        }
        if (c == '\\' && r->pos < r->len) {
            char esc = r->src[r->pos++];
            switch (esc) {
            case '"':  c = '"';  break;
            case '\\': c = '\\'; break;
            case '/':  c = '/';  break;
            case 'b':  c = '\b'; break;
            case 'f':  c = '\f'; break;
            case 'n':  c = '\n'; break;
            case 'r':  c = '\r'; break;
            case 't':  c = '\t'; break;
            case 'u': {
                /* \uXXXX unicode escape */
                if (r->pos + 4 > r->len) { c = esc; break; }
                uint32_t cp = 0;
                for (int i = 0; i < 4; i++) {
                    char h = r->src[r->pos++];
                    cp <<= 4;
                    if (h >= '0' && h <= '9')      cp |= h - '0';
                    else if (h >= 'a' && h <= 'f') cp |= h - 'a' + 10;
                    else if (h >= 'A' && h <= 'F') cp |= h - 'A' + 10;
                    else { cp = 0xFFFFFFFF; break; }
                }
                if (cp == 0xFFFFFFFF) { c = '?'; break; }
                /* handle surrogate pairs */
                if (cp >= 0xD800 && cp <= 0xDBFF) {
                    if (r->pos + 6 <= r->len &&
                        r->src[r->pos] == '\\' && r->src[r->pos + 1] == 'u') {
                        r->pos += 2;
                        uint32_t lo = 0;
                        for (int i = 0; i < 4; i++) {
                            char h = r->src[r->pos++];
                            lo <<= 4;
                            if (h >= '0' && h <= '9')      lo |= h - '0';
                            else if (h >= 'a' && h <= 'f') lo |= h - 'a' + 10;
                            else if (h >= 'A' && h <= 'F') lo |= h - 'A' + 10;
                            else { lo = 0xFFFFFFFF; break; }
                        }
                        if (lo >= 0xDC00 && lo <= 0xDFFF)
                            cp = 0x10000 + ((cp - 0xD800) << 10) + (lo - 0xDC00);
                    }
                }
                /* emit UTF-8 */
                while (len + 4 >= cap) { cap *= 2; buf = realloc(buf, cap); }
                if (cp < 0x80) {
                    buf[len++] = (char)cp;
                } else if (cp < 0x800) {
                    buf[len++] = 0xC0 | (cp >> 6);
                    buf[len++] = 0x80 | (cp & 0x3F);
                } else if (cp < 0x10000) {
                    buf[len++] = 0xE0 | (cp >> 12);
                    buf[len++] = 0x80 | ((cp >> 6) & 0x3F);
                    buf[len++] = 0x80 | (cp & 0x3F);
                } else if (cp < 0x110000) {
                    buf[len++] = 0xF0 | (cp >> 18);
                    buf[len++] = 0x80 | ((cp >> 12) & 0x3F);
                    buf[len++] = 0x80 | ((cp >> 6) & 0x3F);
                    buf[len++] = 0x80 | (cp & 0x3F);
                }
                continue;
            }
            default:   c = esc;  break;
            }
        }
        if (len >= cap - 1) { cap *= 2; buf = realloc(buf, cap); }
        buf[len++] = c;
    }

    free(buf);
    return NULL;
}

static DtobValue *json_parse_value(JsonReader *r, const DtobSchema *schema,
                                   const char *path);

static DtobValue *json_parse_object(JsonReader *r, const DtobSchema *schema,
                                    const char *path)
{
    if (!jr_expect(r, '{')) return NULL;
    DtobValue *kvs = dtob_kvset();

    if (jr_peek(r) == '}') { jr_consume(r); return kvs; }

    for (;;) {
        size_t klen;
        char *key = jr_parse_string(r, &klen);
        if (!key) break;

        if (!jr_expect(r, ':')) { free(key); break; }

        /* build child path */
        size_t plen = path ? strlen(path) : 0;
        char *child_path = malloc(plen + 1 + klen + 1);
        if (plen > 0) {
            memcpy(child_path, path, plen);
            child_path[plen] = '.';
            memcpy(child_path + plen + 1, key, klen + 1);
        } else {
            memcpy(child_path, key, klen + 1);
        }

        DtobValue *val = json_parse_value(r, schema, child_path);
        free(child_path);

        if (!val) { free(key); break; }
        dtob_kvset_put(kvs, key, val);
        free(key);

        if (jr_peek(r) == ',') { jr_consume(r); continue; }
        break;
    }

    jr_expect(r, '}');
    return kvs;
}

static DtobValue *json_parse_array(JsonReader *r, const DtobSchema *schema,
                                   const char *path)
{
    if (!jr_expect(r, '[')) return NULL;
    DtobValue *arr = dtob_array();

    if (jr_peek(r) == ']') { jr_consume(r); return arr; }

    for (;;) {
        DtobValue *val = json_parse_value(r, schema, path);
        if (!val) break;
        dtob_array_push(arr, val);

        if (jr_peek(r) == ',') { jr_consume(r); continue; }
        break;
    }

    jr_expect(r, ']');
    return arr;
}

static DtobValue *json_parse_number(JsonReader *r)
{
    jr_skip_ws(r);
    const char *start = r->src + r->pos;
    char *end;

    /* decide: int or float */
    int is_float = 0;
    /* scan ahead to check for '.', 'e', 'E' */
    for (const char *p = start; p < r->src + r->len; p++) {
        if (*p == '.' || *p == 'e' || *p == 'E') { is_float = 1; break; }
        if (!isdigit((unsigned char)*p) && *p != '-' && *p != '+') break;
    }

    if (is_float) {
        double d = strtod(start, &end);
        if (end == start) return NULL;
        r->pos += (end - start);
        return dtob_float(d);
    } else if (*start == '-') {
        long long ll = strtoll(start, &end, 10);
        if (end == start) return NULL;
        r->pos += (end - start);
        return dtob_int((int64_t)ll);
    } else {
        unsigned long long ull = strtoull(start, &end, 10);
        if (end == start) return NULL;
        r->pos += (end - start);
        if (ull <= (uint64_t)INT64_MAX)
            return dtob_int((int64_t)ull);
        else
            return dtob_uint((uint64_t)ull);
    }
}

static DtobValue *json_parse_value(JsonReader *r, const DtobSchema *schema,
                                   const char *path)
{
    int c = jr_peek(r);

    if (c == '{') return json_parse_object(r, schema, path);
    if (c == '[') return json_parse_array(r, schema, path);
    if (c == '"') {
        size_t slen;
        char *s = jr_parse_string(r, &slen);
        if (!s) return NULL;

        DtobValue *v = json_string(s, slen);
        free(s);
        return v;
    }
    if (c == '-' || isdigit(c)) return json_parse_number(r);
    if (jr_match(r, "true"))    return json_true();
    if (jr_match(r, "false"))   return json_false();
    if (jr_match(r, "null"))    return json_null();

    return NULL;
}

/* --- Public: JSON → DTOB --- */

DtobValue *dtob_from_json(const char *json, const DtobSchema *schema)
{
    JsonReader r = { json, 0, strlen(json) };
    return json_parse_value(&r, schema, "");
}

/* ================================================================== */
/*  Schema (stub — will be fleshed out later)                         */
/* ================================================================== */

struct DtobSchema {
    char     **paths;
    DtobType  *types;
    size_t     count;
    size_t     cap;
};

DtobSchema *dtob_schema_new(void)
{
    DtobSchema *s = calloc(1, sizeof(DtobSchema));
    s->cap = 8;
    s->paths = malloc(s->cap * sizeof(char *));
    s->types = malloc(s->cap * sizeof(DtobType));
    return s;
}

void dtob_schema_add(DtobSchema *s, const char *path, DtobType type)
{
    if (s->count >= s->cap) {
        s->cap *= 2;
        s->paths = realloc(s->paths, s->cap * sizeof(char *));
        s->types = realloc(s->types, s->cap * sizeof(DtobType));
    }
    s->paths[s->count] = strdup(path);
    s->types[s->count] = type;
    s->count++;
}

void dtob_schema_free(DtobSchema *s)
{
    if (!s) return;
    for (size_t i = 0; i < s->count; i++) free(s->paths[i]);
    free(s->paths);
    free(s->types);
    free(s);
}