dtob-sys 0.1.0

Raw FFI bindings to the dtob C library (encoder + decoder).
Documentation
#include "cli.h"
#include <unistd.h>

static DtobValue *kv_get(const DtobValue *kv, const char *key)
{
    if (!kv || kv->type != DTOB_KV_SET) return NULL;
    size_t klen = strlen(key);
    for (size_t i = 0; i < kv->num_pairs; i++)
        if (kv->pairs[i].key_len == klen &&
            memcmp(kv->pairs[i].key, key, klen) == 0)
            return kv->pairs[i].value;
    return NULL;
}

static DtobValue *make_void_elem(const char *tag, const char *attr, const char *val)
{
    DtobValue *elem = dtob_kvset();
    dtob_kvset_put(elem, "t", json_string(tag, strlen(tag)));
    DtobValue *attrs = dtob_kvset();
    dtob_kvset_put(attrs, attr, json_string(val, strlen(val)));
    dtob_kvset_put(elem, "a", attrs);
    dtob_kvset_put(elem, "c", dtob_array());
    return elem;
}

static void prepend_head(DtobValue *head_c, DtobValue *elem)
{
    dtob_array_push(head_c, elem);
    memmove(&head_c->elements[1], &head_c->elements[0],
            (head_c->num_elements - 1) * sizeof(DtobValue *));
    head_c->elements[0] = elem;
}

/* Inject <meta charset="utf-8"> and optionally <base href="url">
 * as the first children of <head>. */
static void inject_head_tags(DtobValue *root, const char *base_url)
{
    DtobValue *html_c = kv_get(root, "c");
    if (!html_c || html_c->type != DTOB_ARRAY) return;

    DtobValue *head = NULL;
    for (size_t i = 0; i < html_c->num_elements; i++) {
        DtobValue *e = html_c->elements[i];
        if (!e || e->type != DTOB_KV_SET) continue;
        DtobValue *tv = kv_get(e, "t");
        if (tv && tv->data_len == 4 &&
            strncasecmp((char *)tv->data, "head", 4) == 0) {
            head = e; break;
        }
    }
    if (!head) return;

    DtobValue *head_c = kv_get(head, "c");
    if (!head_c || head_c->type != DTOB_ARRAY) return;

    /* inject in reverse order so both end up at the front in the right order:
     * <meta charset="utf-8"> first, then <base href="..."> */
    if (base_url)
        prepend_head(head_c, make_void_elem("base", "href", base_url));
    prepend_head(head_c, make_void_elem("meta", "charset", "utf-8"));
}

static int is_url(const char *s)
{
    return strstr(s, "://") != NULL;
}

int main(int argc, char **argv)
{
    int typed = 0;
    int infer_types = 1;
    int greedy = -1; /* -1 = unset; default depends on input type */
    int assume_correct = 0;
    const char *input = NULL;
    const char *output = NULL;
    const char *ft = NULL;

    for (int i = 1; i < argc; i++) {
        if (strcmp(argv[i], "--types") == 0) {
            typed = 1;
        } else if (strcmp(argv[i], "--greedy") == 0 ||
                   strcmp(argv[i], "--greedy=1") == 0) {
            greedy = 1;
        } else if (strcmp(argv[i], "--greedy=0") == 0) {
            greedy = 0;
        } else if (strncmp(argv[i], "--infer-types=", 14) == 0) {
            infer_types = atoi(argv[i] + 14);
        } else if (strcmp(argv[i], "--assume-correct-encoding") == 0) {
            assume_correct = 1;
        } else if (strcmp(argv[i], "--ft") == 0) {
            if (++i >= argc) {
                fprintf(stderr, "error: --ft requires a value\n");
                return 1;
            }
            ft = argv[i];
        } else if (!input) {
            input = argv[i];
        } else if (!output) {
            output = argv[i];
        }
    }

    if (!input) {
        fprintf(stderr, "usage: dtob encode <input> [output.dtob] [--types] [--infer-types=0|1]\n");
        fprintf(stderr, "       [--greedy[=0|1]] [--assume-correct-encoding] [--ft json|html|xml]\n");
        fprintf(stderr, "  if output is omitted, represents the result to stdout\n");
        fprintf(stderr, "  --assume-correct-encoding  parse HTML into dtob element tree (legacy)\n");
        return 1;
    }

    int url_input = is_url(input);

    /* default greedy: on for URLs, off for files */
    if (greedy == -1)
        greedy = url_input ? 1 : 0;

    /* determine filetype */
    const char *filetype = ft ? ft : detect_filetype(input);
    if (!filetype) {
        if (url_input) {
            filetype = "html"; /* sensible default for URLs */
        } else {
            const char *dot = strrchr(input, '.');
            fprintf(stderr, "unrecognized filetype: %s\n", dot ? dot : "(no extension)");
            return 1;
        }
    }

    int is_xml = 0, is_html = 0;
    if (strcmp(filetype, "json") == 0) {
        /* nothing */
    } else if (strcmp(filetype, "html") == 0) {
        is_xml = 1; is_html = 1;
    } else if (strcmp(filetype, "xml") == 0 || strcmp(filetype, "htmx") == 0) {
        is_xml = 1;
    } else {
        fprintf(stderr, "unrecognized filetype: %s\n", filetype);
        return 1;
    }

    /* fetch content */
    char *text;
    char *effective_url = NULL;
    if (url_input) {
        size_t raw_len;
        uint8_t *raw = fetch_url_bytes_ex(input, &raw_len, &effective_url);
        if (!raw) return 1;
        text = malloc(raw_len + 1);
        memcpy(text, raw, raw_len);
        text[raw_len] = '\0';
        free(raw);
    } else {
        text = read_text_file(input);
    }
    if (!text) return 1;

    DtobValue *root = NULL;
    uint8_t *encoded = NULL;
    size_t out_len;

    if (is_html && !assume_correct) {
        /* ---- raw HTML path (default): keep HTML as a string ---- */
        ResourceList rl;
        resource_list_init(&rl);

        char *final_html = text;
        if (greedy) {
            if (url_input)
                final_html = html_raw_greedify_url(text, strlen(text),
                    effective_url ? effective_url : input, &rl);
            else
                final_html = html_raw_greedify(text, strlen(text),
                    input, &rl);
        }

        if (html_raw_encode(final_html, strlen(final_html),
                            rl.count > 0 ? &rl : NULL,
                            &encoded, &out_len) != 0) {
            if (final_html != text) free(final_html);
            resource_list_free(&rl);
            free(text);
            free(effective_url);
            return 1;
        }

        if (final_html != text) free(final_html);
        resource_list_free(&rl);
        free(text);
    } else {
        /* ---- AST path: parse into dtob tree ---- */
        if (is_html) {
            root = dtob_from_html(text);
        } else if (is_xml) {
            root = dtob_from_xml(text);
        } else {
            root = dtob_from_json(text, NULL);
        }
        free(text);

        if (!root) {
            fprintf(stderr, "error: failed to parse %s\n",
                    is_html ? "HTML" : (is_xml ? "XML" : "JSON"));
            free(effective_url);
            return 1;
        }

        if (is_html)
            inject_head_tags(root, url_input ? (effective_url ? effective_url : input) : NULL);

        if (greedy && is_html) {
            if (url_input)
                html_greedify_url(root, effective_url ? effective_url : input);
            else
                html_greedify(root, input);
        }

        if (is_html) {
            if (html_typed_encode(root, &encoded, &out_len) != 0) {
                dtob_free(root);
                free(effective_url);
                return 1;
            }
        } else if (typed) {
            if (typed_encode(root, &encoded, &out_len) != 0) {
                dtob_free(root);
                free(effective_url);
                return 1;
            }
        } else if (infer_types) {
            if (infer_typed_encode(root, &encoded, &out_len) != 0) {
                dtob_free(root);
                free(effective_url);
                return 1;
            }
        } else {
            encoded = dtob_encode(root, &out_len);
            if (!encoded) {
                dtob_free(root);
                free(effective_url);
                return 1;
            }
        }
        dtob_free(root);
    }

    free(effective_url);

    if (!output) {
        /* write to temp file, exec dtob-represent */
        char tmppath[] = "/tmp/dtob-XXXXXX";
        int fd = mkstemp(tmppath);
        if (fd < 0) { perror("mkstemp"); free(encoded); return 1; }
        if (write(fd, encoded, out_len) < 0) { perror("write"); close(fd); free(encoded); return 1; }
        close(fd);
        free(encoded);

        char repr[512];
        const char *slash = strrchr(argv[0], '/');
        if (slash) {
            size_t dlen = (size_t)(slash - argv[0] + 1);
            if (dlen + 14 < sizeof(repr)) {
                memcpy(repr, argv[0], dlen);
                memcpy(repr + dlen, "dtob-represent", 15);
                char *av[] = { repr, tmppath, NULL };
                execv(repr, av);
            }
        }
        char *av[] = { "dtob-represent", tmppath, NULL };
        execvp("dtob-represent", av);
        fprintf(stderr, "error: dtob-represent not found\n");
        return 1;
    }

    int rc = write_file(output, encoded, out_len);
    fprintf(stderr, "encoded %zu bytes\n", out_len);
    free(encoded);
    return rc;
}