brik 0.10.0

HTML tree manipulation library - a building block for HTML parsing and manipulation
Documentation
// Pest grammar for minimal HTML parsing to locate and modify the <html> tag
// Used for namespace injection before html5ever parsing

document = { SOI ~ preamble ~ html_tag ~ rest }

preamble = { (pi | doctype | comment | (!(pi | doctype | comment | html_tag) ~ ANY))* }

pi = { "<?" ~ (!"?>" ~ ANY)* ~ "?>" }

doctype = { "<!" ~ ^"DOCTYPE" ~ (!(">" | "[") ~ ANY)* ~ internal_subset? ~ ">" }

internal_subset = { "[" ~ (!"]>" ~ ANY)* ~ "]>" }

comment = { "<!--" ~ (!"-->" ~ ANY)* ~ "-->" }

html_tag = { "<" ~ ^"html" ~ attributes ~ whitespace? ~ tag_close }

attributes = { (whitespace ~ attribute)* ~ whitespace? }

attribute = { attr_name ~ (whitespace? ~ "=" ~ whitespace? ~ attr_value?)? }

attr_name = @{ (ASCII_ALPHA | "_" | ":") ~ (ASCII_ALPHANUMERIC | "_" | ":" | "-" | ".")* }

attr_value = @{
    ("\"" ~ (!"\"" ~ ANY)* ~ "\"") |
    ("'" ~ (!"'" ~ ANY)* ~ "'") |
    (!(whitespace | ">" | "/" | "=") ~ ANY)+
}

tag_close = { ">" | "/>" }
whitespace = _{ WHITE_SPACE+ }
rest = { ANY* }