use std::fmt::{self, Debug, Display, Formatter};
use ecow::{EcoString, EcoVec};
use typst_syntax::Span;
use typst_utils::{PicoStr, ResolvedPicoStr};
use crate::diag::{bail, HintedStrResult, StrResult};
use crate::foundations::{cast, Dict, Repr, Str};
use crate::introspection::{Introspector, Tag};
use crate::layout::Frame;
use crate::model::DocumentInfo;
#[derive(Debug, Clone)]
pub struct HtmlDocument {
pub root: HtmlElement,
pub info: DocumentInfo,
pub introspector: Introspector,
}
#[derive(Debug, Clone, Hash)]
pub enum HtmlNode {
Tag(Tag),
Text(EcoString, Span),
Element(HtmlElement),
Frame(Frame),
}
impl HtmlNode {
pub fn text(text: impl Into<EcoString>, span: Span) -> Self {
Self::Text(text.into(), span)
}
}
impl From<HtmlElement> for HtmlNode {
fn from(element: HtmlElement) -> Self {
Self::Element(element)
}
}
#[derive(Debug, Clone, Hash)]
pub struct HtmlElement {
pub tag: HtmlTag,
pub attrs: HtmlAttrs,
pub children: Vec<HtmlNode>,
pub span: Span,
}
impl HtmlElement {
pub fn new(tag: HtmlTag) -> Self {
Self {
tag,
attrs: HtmlAttrs::default(),
children: vec![],
span: Span::detached(),
}
}
pub fn with_children(mut self, children: Vec<HtmlNode>) -> Self {
self.children = children;
self
}
pub fn with_attr(mut self, key: HtmlAttr, value: impl Into<EcoString>) -> Self {
self.attrs.push(key, value);
self
}
pub fn spanned(mut self, span: Span) -> Self {
self.span = span;
self
}
}
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
pub struct HtmlTag(PicoStr);
impl HtmlTag {
pub fn intern(string: &str) -> StrResult<Self> {
if string.is_empty() {
bail!("tag name must not be empty");
}
if let Some(c) = string.chars().find(|&c| !charsets::is_valid_in_tag_name(c)) {
bail!("the character {} is not valid in a tag name", c.repr());
}
Ok(Self(PicoStr::intern(string)))
}
#[track_caller]
pub const fn constant(string: &'static str) -> Self {
if string.is_empty() {
panic!("tag name must not be empty");
}
let bytes = string.as_bytes();
let mut i = 0;
while i < bytes.len() {
if !bytes[i].is_ascii() || !charsets::is_valid_in_tag_name(bytes[i] as char) {
panic!("not all characters are valid in a tag name");
}
i += 1;
}
Self(PicoStr::constant(string))
}
pub fn resolve(self) -> ResolvedPicoStr {
self.0.resolve()
}
pub const fn into_inner(self) -> PicoStr {
self.0
}
}
impl Debug for HtmlTag {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
Display::fmt(self, f)
}
}
impl Display for HtmlTag {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "<{}>", self.resolve())
}
}
cast! {
HtmlTag,
self => self.0.resolve().as_str().into_value(),
v: Str => Self::intern(&v)?,
}
#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)]
pub struct HtmlAttrs(pub EcoVec<(HtmlAttr, EcoString)>);
impl HtmlAttrs {
pub fn push(&mut self, attr: HtmlAttr, value: impl Into<EcoString>) {
self.0.push((attr, value.into()));
}
}
cast! {
HtmlAttrs,
self => self.0
.into_iter()
.map(|(key, value)| (key.resolve().as_str().into(), value.into_value()))
.collect::<Dict>()
.into_value(),
values: Dict => Self(values
.into_iter()
.map(|(k, v)| {
let attr = HtmlAttr::intern(&k)?;
let value = v.cast::<EcoString>()?;
Ok((attr, value))
})
.collect::<HintedStrResult<_>>()?),
}
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
pub struct HtmlAttr(PicoStr);
impl HtmlAttr {
pub fn intern(string: &str) -> StrResult<Self> {
if string.is_empty() {
bail!("attribute name must not be empty");
}
if let Some(c) =
string.chars().find(|&c| !charsets::is_valid_in_attribute_name(c))
{
bail!("the character {} is not valid in an attribute name", c.repr());
}
Ok(Self(PicoStr::intern(string)))
}
#[track_caller]
pub const fn constant(string: &'static str) -> Self {
if string.is_empty() {
panic!("attribute name must not be empty");
}
let bytes = string.as_bytes();
let mut i = 0;
while i < bytes.len() {
if !bytes[i].is_ascii()
|| !charsets::is_valid_in_attribute_name(bytes[i] as char)
{
panic!("not all characters are valid in an attribute name");
}
i += 1;
}
Self(PicoStr::constant(string))
}
pub fn resolve(self) -> ResolvedPicoStr {
self.0.resolve()
}
pub const fn into_inner(self) -> PicoStr {
self.0
}
}
impl Debug for HtmlAttr {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
Display::fmt(self, f)
}
}
impl Display for HtmlAttr {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.resolve())
}
}
cast! {
HtmlAttr,
self => self.0.resolve().as_str().into_value(),
v: Str => Self::intern(&v)?,
}
pub mod charsets {
pub const fn is_valid_in_tag_name(c: char) -> bool {
c.is_ascii_alphanumeric()
}
pub const fn is_valid_in_attribute_name(c: char) -> bool {
match c {
'\0' | ' ' | '"' | '\'' | '>' | '/' | '=' => false,
c if is_whatwg_control_char(c) => false,
c if is_whatwg_non_char(c) => false,
_ => true,
}
}
pub const fn is_valid_in_attribute_value(c: char) -> bool {
match c {
'&' => false,
'"' => false,
c => is_w3c_text_char(c),
}
}
pub const fn is_valid_in_normal_element_text(c: char) -> bool {
match c {
'&' => false,
'<' => false,
c => is_w3c_text_char(c),
}
}
pub const fn is_w3c_text_char(c: char) -> bool {
match c {
c if is_whatwg_non_char(c) => false,
c if is_whatwg_control_char(c) => c.is_ascii_whitespace(),
_ => true,
}
}
const fn is_whatwg_non_char(c: char) -> bool {
match c {
'\u{fdd0}'..='\u{fdef}' => true,
c if c as u32 & 0xfffe == 0xfffe && c as u32 <= 0x10ffff => true,
_ => false,
}
}
const fn is_whatwg_control_char(c: char) -> bool {
match c {
'\u{00}'..='\u{1f}' => true,
'\u{7f}'..='\u{9f}' => true,
_ => false,
}
}
}
pub mod tag {
use super::HtmlTag;
macro_rules! tags {
($($tag:ident)*) => {
$(#[allow(non_upper_case_globals)]
pub const $tag: HtmlTag = HtmlTag::constant(
stringify!($tag)
);)*
}
}
tags! {
a
abbr
address
area
article
aside
audio
b
base
bdi
bdo
blockquote
body
br
button
canvas
caption
cite
code
col
colgroup
data
datalist
dd
del
details
dfn
dialog
div
dl
dt
em
embed
fieldset
figcaption
figure
footer
form
h1
h2
h3
h4
h5
h6
head
header
hgroup
hr
html
i
iframe
img
input
ins
kbd
label
legend
li
link
main
map
mark
menu
meta
meter
nav
noscript
object
ol
optgroup
option
output
p
param
picture
pre
progress
q
rp
rt
ruby
s
samp
script
search
section
select
slot
small
source
span
strong
style
sub
summary
sup
table
tbody
td
template
textarea
tfoot
th
thead
time
title
tr
track
u
ul
var
video
wbr
}
pub fn is_void(tag: HtmlTag) -> bool {
matches!(
tag,
self::area
| self::base
| self::br
| self::col
| self::embed
| self::hr
| self::img
| self::input
| self::link
| self::meta
| self::param
| self::source
| self::track
| self::wbr
)
}
pub fn is_raw(tag: HtmlTag) -> bool {
matches!(tag, self::script | self::style)
}
pub fn is_escapable_raw(tag: HtmlTag) -> bool {
matches!(tag, self::textarea | self::title)
}
pub fn is_metadata(tag: HtmlTag) -> bool {
matches!(
tag,
self::base
| self::link
| self::meta
| self::noscript
| self::script
| self::style
| self::template
| self::title
)
}
pub fn is_block_by_default(tag: HtmlTag) -> bool {
matches!(
tag,
self::html
| self::head
| self::body
| self::article
| self::aside
| self::h1
| self::h2
| self::h3
| self::h4
| self::h5
| self::h6
| self::hgroup
| self::nav
| self::section
| self::dd
| self::dl
| self::dt
| self::menu
| self::ol
| self::ul
| self::address
| self::blockquote
| self::dialog
| self::div
| self::fieldset
| self::figure
| self::figcaption
| self::footer
| self::form
| self::header
| self::hr
| self::legend
| self::main
| self::p
| self::pre
| self::search
)
}
pub fn is_inline_by_default(tag: HtmlTag) -> bool {
matches!(
tag,
self::abbr
| self::a
| self::bdi
| self::b
| self::br
| self::bdo
| self::code
| self::cite
| self::dfn
| self::data
| self::i
| self::em
| self::mark
| self::kbd
| self::rp
| self::q
| self::ruby
| self::rt
| self::samp
| self::s
| self::span
| self::small
| self::sub
| self::strong
| self::time
| self::sup
| self::var
| self::u
)
}
pub fn is_tabular_by_default(tag: HtmlTag) -> bool {
matches!(
tag,
self::table
| self::thead
| self::tbody
| self::tfoot
| self::tr
| self::th
| self::td
| self::caption
| self::col
| self::colgroup
)
}
}
#[allow(non_upper_case_globals)]
pub mod attr {
use super::HtmlAttr;
macro_rules! attrs {
($($attr:ident)*) => {
$(#[allow(non_upper_case_globals)]
pub const $attr: HtmlAttr = HtmlAttr::constant(
stringify!($attr)
);)*
}
}
attrs! {
charset
cite
colspan
content
href
name
reversed
role
rowspan
start
style
value
}
pub const aria_level: HtmlAttr = HtmlAttr::constant("aria-level");
}