#ifndef TREE_SITTER_HTML_TAG_H_
#define TREE_SITTER_HTML_TAG_H_
#include "tree_sitter/array.h"
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
typedef enum {
AREA,
BASE,
BR,
COL,
EMBED,
HR,
IMG,
INPUT,
KEYGEN,
LINK,
META,
PARAM,
SOURCE,
TRACK,
WBR,
END_OF_VOID_TAGS,
SCRIPT,
STYLE,
END_OF_RAW_TEXT_TAGS,
TEXTAREA,
TITLE,
END_OF_ESCAPABLE_RAW_TEXT_TAGS,
TEMPLATE,
A,
ABBR,
ADDRESS,
ARTICLE,
ASIDE,
AUDIO,
B,
BDI,
BDO,
BLOCKQUOTE,
BODY,
BUTTON,
CANVAS,
CAPTION,
CITE,
CODE,
COLGROUP,
DATA,
DATALIST,
DD,
DEL,
DETAILS,
DFN,
DIALOG,
DIV,
DL,
DT,
EM,
FIELDSET,
FIGCAPTION,
FIGURE,
FOOTER,
FORM,
H1,
H2,
H3,
H4,
H5,
H6,
HEAD,
HEADER,
HGROUP,
HTML,
I,
IFRAME,
INS,
KBD,
LABEL,
LEGEND,
LI,
MAIN,
MAP,
MARK,
MATH, MENU,
METER,
NAV,
NOSCRIPT,
OBJECT,
OL,
OPTGROUP,
OPTION,
OUTPUT,
P,
PICTURE,
PRE,
PROGRESS,
Q,
RB,
RP,
RT,
RTC,
RUBY,
S,
SAMP,
SEARCH, SECTION,
SELECT,
SLOT,
SMALL,
SPAN,
STRONG,
SUB,
SUMMARY,
SUP,
SVG, TABLE,
TBODY,
TD,
TFOOT,
TH,
THEAD,
TIME,
TR,
U,
UL,
VAR,
VIDEO,
CUSTOM,
END_, } TagType;
typedef Array(char) String;
typedef struct {
const char *tag_name;
uint8_t length;
TagType tag_type;
} TagMapEntry;
typedef struct {
TagType type;
String custom_tag_name;
} Tag;
static const TagMapEntry TAG_TABLE[] = {
{"A", 1, A},
{"ABBR", 4, ABBR},
{"ADDRESS", 7, ADDRESS},
{"AREA", 4, AREA},
{"ARTICLE", 7, ARTICLE},
{"ASIDE", 5, ASIDE},
{"AUDIO", 5, AUDIO},
{"B", 1, B},
{"BASE", 4, BASE},
{"BDI", 3, BDI},
{"BDO", 3, BDO},
{"BLOCKQUOTE", 10, BLOCKQUOTE},
{"BODY", 4, BODY},
{"BR", 2, BR},
{"BUTTON", 6, BUTTON},
{"CANVAS", 6, CANVAS},
{"CAPTION", 7, CAPTION},
{"CITE", 4, CITE},
{"CODE", 4, CODE},
{"COL", 3, COL},
{"COLGROUP", 8, COLGROUP},
{"DATA", 4, DATA},
{"DATALIST", 8, DATALIST},
{"DD", 2, DD},
{"DEL", 3, DEL},
{"DETAILS", 7, DETAILS},
{"DFN", 3, DFN},
{"DIALOG", 6, DIALOG},
{"DIV", 3, DIV},
{"DL", 2, DL},
{"DT", 2, DT},
{"EM", 2, EM},
{"EMBED", 5, EMBED},
{"FIELDSET", 8, FIELDSET},
{"FIGCAPTION", 10, FIGCAPTION},
{"FIGURE", 6, FIGURE},
{"FOOTER", 6, FOOTER},
{"FORM", 4, FORM},
{"H1", 2, H1},
{"H2", 2, H2},
{"H3", 2, H3},
{"H4", 2, H4},
{"H5", 2, H5},
{"H6", 2, H6},
{"HEAD", 4, HEAD},
{"HEADER", 6, HEADER},
{"HGROUP", 6, HGROUP},
{"HR", 2, HR},
{"HTML", 4, HTML},
{"I", 1, I},
{"IFRAME", 6, IFRAME},
{"IMG", 3, IMG},
{"INPUT", 5, INPUT},
{"INS", 3, INS},
{"KBD", 3, KBD},
{"KEYGEN", 6, KEYGEN},
{"LABEL", 5, LABEL},
{"LEGEND", 6, LEGEND},
{"LI", 2, LI},
{"LINK", 4, LINK},
{"MAIN", 4, MAIN},
{"MAP", 3, MAP},
{"MARK", 4, MARK},
{"MATH", 4, MATH},
{"MENU", 4, MENU},
{"META", 4, META},
{"METER", 5, METER},
{"NAV", 3, NAV},
{"NOSCRIPT", 8, NOSCRIPT},
{"OBJECT", 6, OBJECT},
{"OL", 2, OL},
{"OPTGROUP", 8, OPTGROUP},
{"OPTION", 6, OPTION},
{"OUTPUT", 6, OUTPUT},
{"P", 1, P},
{"PARAM", 5, PARAM},
{"PICTURE", 7, PICTURE},
{"PRE", 3, PRE},
{"PROGRESS", 8, PROGRESS},
{"Q", 1, Q},
{"RB", 2, RB},
{"RP", 2, RP},
{"RT", 2, RT},
{"RTC", 3, RTC},
{"RUBY", 4, RUBY},
{"S", 1, S},
{"SAMP", 4, SAMP},
{"SCRIPT", 6, SCRIPT},
{"SEARCH", 6, SEARCH},
{"SECTION", 7, SECTION},
{"SELECT", 6, SELECT},
{"SLOT", 4, SLOT},
{"SMALL", 5, SMALL},
{"SOURCE", 6, SOURCE},
{"SPAN", 4, SPAN},
{"STRONG", 6, STRONG},
{"STYLE", 5, STYLE},
{"SUB", 3, SUB},
{"SUMMARY", 7, SUMMARY},
{"SUP", 3, SUP},
{"SVG", 3, SVG},
{"TABLE", 5, TABLE},
{"TBODY", 5, TBODY},
{"TD", 2, TD},
{"TEMPLATE", 8, TEMPLATE},
{"TEXTAREA", 8, TEXTAREA},
{"TFOOT", 5, TFOOT},
{"TH", 2, TH},
{"THEAD", 5, THEAD},
{"TIME", 4, TIME},
{"TITLE", 5, TITLE},
{"TR", 2, TR},
{"TRACK", 5, TRACK},
{"U", 1, U},
{"UL", 2, UL},
{"VAR", 3, VAR},
{"VIDEO", 5, VIDEO},
{"WBR", 3, WBR},
};
#define TAG_TABLE_SIZE (sizeof(TAG_TABLE) / sizeof(TagMapEntry))
typedef struct {
uint8_t start;
uint8_t end; } CharBucket;
static const CharBucket CHAR_INDEX[26] = {
{0, 7}, {7, 15}, {15, 21}, {21, 31}, {31, 33}, {33, 38}, {38, 38}, {38, 49}, {49, 54}, {54, 54}, {54, 56}, {56, 60}, {60, 67}, {67, 69}, {69, 74}, {74, 79}, {79, 80}, {80, 85}, {85, 101}, {101, 113}, {113, 115}, {115, 117}, {117, 118}, {118, 118}, {118, 118}, {118, 118}, };
static inline TagType tag_type_for_name(const String *tag_name) {
if (tag_name->size == 0 || tag_name->size > 10) {
return CUSTOM;
}
char first = tag_name->contents[0];
if (first < 'A' || first > 'Z') {
return CUSTOM;
}
for (uint32_t i = 0; i < tag_name->size; i++) {
if (tag_name->contents[i] == '-') {
return CUSTOM;
}
}
const CharBucket *bucket = &CHAR_INDEX[first - 'A'];
for (uint8_t i = bucket->start; i < bucket->end; i++) {
const TagMapEntry *entry = &TAG_TABLE[i];
if (entry->length != tag_name->size) {
continue;
}
if (memcmp(tag_name->contents, entry->tag_name, tag_name->size) == 0) {
return entry->tag_type;
}
}
return CUSTOM;
}
static const TagType TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[] = {
ADDRESS, ARTICLE, ASIDE, BLOCKQUOTE, DETAILS, DIV, DL, FIELDSET,
FIGCAPTION, FIGURE, FOOTER, FORM, H1, H2, H3, H4,
H5, H6, HEADER, HGROUP, HR, MAIN, MENU, NAV,
OL, P, PRE, SEARCH, SECTION, TABLE, UL,
};
#define P_CLOSING_TAGS_SIZE \
(sizeof(TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS) / sizeof(TagType))
static inline Tag tag_new(void) {
Tag tag;
tag.type = END_;
tag.custom_tag_name = (String)array_new();
return tag;
}
static inline Tag tag_for_name(String name) {
Tag tag = tag_new();
tag.type = tag_type_for_name(&name);
if (tag.type == CUSTOM) {
tag.custom_tag_name = name;
} else {
array_delete(&name);
}
return tag;
}
static inline void tag_free(Tag *tag) {
if (tag->type == CUSTOM) {
array_delete(&tag->custom_tag_name);
}
}
static inline bool tag_is_void(const Tag *self) {
return self->type < END_OF_VOID_TAGS;
}
static inline bool tag_is_raw_text(const Tag *self) {
return self->type > END_OF_VOID_TAGS && self->type < END_OF_RAW_TEXT_TAGS;
}
static inline bool tag_is_escapable_raw_text(const Tag *self) {
return self->type > END_OF_RAW_TEXT_TAGS &&
self->type < END_OF_ESCAPABLE_RAW_TEXT_TAGS;
}
static inline bool tag_eq(const Tag *self, const Tag *other) {
if (self->type != other->type)
return false;
if (self->type == CUSTOM) {
if (self->custom_tag_name.size != other->custom_tag_name.size) {
return false;
}
if (memcmp(self->custom_tag_name.contents, other->custom_tag_name.contents,
self->custom_tag_name.size) != 0) {
return false;
}
}
return true;
}
static inline bool tag_can_contain(Tag *self, const Tag *other) {
TagType child = other->type;
switch (self->type) {
case LI:
return child != LI;
case DT:
case DD:
return child != DT && child != DD;
case P:
for (unsigned int i = 0; i < P_CLOSING_TAGS_SIZE; i++) {
if (child == TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[i]) {
return false;
}
}
return true;
case COLGROUP:
return child == COL || child == TEMPLATE;
case RB:
case RT:
case RP:
case RTC:
return child != RB && child != RT && child != RP && child != RTC;
case OPTGROUP:
return child != OPTGROUP;
case OPTION:
return child != OPTION && child != OPTGROUP;
case TR:
return child != TR;
case TD:
case TH:
return child != TD && child != TH && child != TR;
case THEAD:
case TBODY:
case TFOOT:
return child != THEAD && child != TBODY && child != TFOOT;
case CAPTION:
return child != THEAD && child != TBODY && child != TFOOT && child != TR &&
child != COLGROUP && child != COL;
case HEAD:
return child != BODY;
default:
return true;
}
}
#endif