dtob-sys 0.1.1

Raw FFI bindings to the dtob C library (encoder + decoder).
Documentation
#include "html_rules.h"
#include <strings.h>
#include <string.h>
#include <ctype.h>

/* ------------------------------------------------------------------ */
/*  Void elements (no closing tag)                                     */
/* ------------------------------------------------------------------ */

static const char *void_tags[] = {
    "area", "base", "br", "col", "embed", "hr", "img", "input",
    "link", "meta", "param", "source", "track", "wbr", NULL
};

int html_is_void(const char *tag)
{
    for (int i = 0; void_tags[i]; i++)
        if (strcasecmp(tag, void_tags[i]) == 0) return 1;
    return 0;
}

/* ------------------------------------------------------------------ */
/*  Raw text elements (no entity decoding, read until </tag>)          */
/* ------------------------------------------------------------------ */

int html_is_raw_text(const char *tag)
{
    return strcasecmp(tag, "style") == 0 ||
           strcasecmp(tag, "script") == 0 ||
           strcasecmp(tag, "xmp") == 0;
}

/* ------------------------------------------------------------------ */
/*  Escapable raw text (entities decoded, but no child elements)       */
/* ------------------------------------------------------------------ */

int html_is_escapable_raw_text(const char *tag)
{
    return strcasecmp(tag, "textarea") == 0 ||
           strcasecmp(tag, "title") == 0;
}

/* ------------------------------------------------------------------ */
/*  Implicit closing rules                                             */
/* ------------------------------------------------------------------ */

/* helper: case-insensitive check against a NULL-terminated list */
static int tag_in(const char *tag, const char **list)
{
    for (int i = 0; list[i]; i++)
        if (strcasecmp(tag, list[i]) == 0) return 1;
    return 0;
}

/* Tags that implicitly close <p> */
static const char *p_closers[] = {
    "address", "article", "aside", "blockquote", "details", "dialog",
    "div", "dl", "fieldset", "figcaption", "figure", "footer", "form",
    "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr",
    "main", "menu", "nav", "ol", "p", "pre", "search", "section",
    "table", "ul", NULL
};

int html_implicitly_closes(const char *parent, const char *child)
{
    if (strcasecmp(parent, "p") == 0)
        return tag_in(child, p_closers);

    if (strcasecmp(parent, "li") == 0)
        return strcasecmp(child, "li") == 0;

    if (strcasecmp(parent, "dt") == 0)
        return strcasecmp(child, "dt") == 0 || strcasecmp(child, "dd") == 0;

    if (strcasecmp(parent, "dd") == 0)
        return strcasecmp(child, "dt") == 0 || strcasecmp(child, "dd") == 0;

    if (strcasecmp(parent, "td") == 0)
        return strcasecmp(child, "td") == 0 || strcasecmp(child, "th") == 0 ||
               strcasecmp(child, "tr") == 0;

    if (strcasecmp(parent, "th") == 0)
        return strcasecmp(child, "td") == 0 || strcasecmp(child, "th") == 0 ||
               strcasecmp(child, "tr") == 0;

    if (strcasecmp(parent, "tr") == 0)
        return strcasecmp(child, "tr") == 0;

    if (strcasecmp(parent, "thead") == 0)
        return strcasecmp(child, "tbody") == 0 || strcasecmp(child, "tfoot") == 0;

    if (strcasecmp(parent, "tbody") == 0)
        return strcasecmp(child, "tbody") == 0 || strcasecmp(child, "tfoot") == 0;

    if (strcasecmp(parent, "tfoot") == 0)
        return strcasecmp(child, "tbody") == 0;

    if (strcasecmp(parent, "option") == 0)
        return strcasecmp(child, "option") == 0 || strcasecmp(child, "optgroup") == 0;

    if (strcasecmp(parent, "optgroup") == 0)
        return strcasecmp(child, "optgroup") == 0;

    if (strcasecmp(parent, "rt") == 0)
        return strcasecmp(child, "rt") == 0 || strcasecmp(child, "rp") == 0;

    if (strcasecmp(parent, "rp") == 0)
        return strcasecmp(child, "rt") == 0 || strcasecmp(child, "rp") == 0;

    if (strcasecmp(parent, "caption") == 0) {
        static const char *cap_closers[] = {
            "col", "colgroup", "thead", "tbody", "tfoot", "tr", NULL
        };
        return tag_in(child, cap_closers);
    }

    if (strcasecmp(parent, "colgroup") == 0)
        return strcasecmp(child, "col") != 0 && strcasecmp(child, "template") != 0;

    if (strcasecmp(parent, "head") == 0)
        return strcasecmp(child, "body") == 0;

    return 0;
}

/* ------------------------------------------------------------------ */
/*  Tag name normalization (lowercase in-place)                        */
/* ------------------------------------------------------------------ */

void html_normalize_tag(char *tag, size_t len)
{
    for (size_t i = 0; i < len; i++)
        tag[i] = (char)tolower((unsigned char)tag[i]);
}