typst_html/
dom.rs

1use std::fmt::{self, Debug, Display, Formatter};
2
3use ecow::{EcoString, EcoVec};
4use typst_library::diag::{HintedStrResult, StrResult, bail};
5use typst_library::foundations::{Dict, Repr, Str, StyleChain, cast};
6use typst_library::introspection::{Introspector, Location, Tag};
7use typst_library::layout::{Abs, Frame, Point};
8use typst_library::model::DocumentInfo;
9use typst_library::text::TextElem;
10use typst_syntax::Span;
11use typst_utils::{PicoStr, ResolvedPicoStr};
12
13use crate::{attr, charsets, css};
14
15/// An HTML document.
16#[derive(Debug, Clone)]
17pub struct HtmlDocument {
18    /// The document's root HTML element.
19    pub root: HtmlElement,
20    /// Details about the document.
21    pub info: DocumentInfo,
22    /// Provides the ability to execute queries on the document.
23    pub introspector: Introspector,
24}
25
26/// A child of an HTML element.
27#[derive(Debug, Clone, Hash)]
28pub enum HtmlNode {
29    /// An introspectable element that produced something within this node.
30    Tag(Tag),
31    /// Plain text.
32    Text(EcoString, Span),
33    /// Another element.
34    Element(HtmlElement),
35    /// Layouted content that will be embedded into HTML as an SVG.
36    Frame(HtmlFrame),
37}
38
39impl HtmlNode {
40    /// Create a plain text node.
41    pub fn text(text: impl Into<EcoString>, span: Span) -> Self {
42        Self::Text(text.into(), span)
43    }
44
45    /// Returns the span, if any.
46    pub fn span(&self) -> Span {
47        match self {
48            Self::Tag(_) => Span::detached(),
49            Self::Text(_, span) => *span,
50            Self::Element(element) => element.span,
51            Self::Frame(frame) => frame.span,
52        }
53    }
54}
55
56impl From<Tag> for HtmlNode {
57    fn from(tag: Tag) -> Self {
58        Self::Tag(tag)
59    }
60}
61
62impl From<HtmlElement> for HtmlNode {
63    fn from(element: HtmlElement) -> Self {
64        Self::Element(element)
65    }
66}
67
68impl From<HtmlFrame> for HtmlNode {
69    fn from(frame: HtmlFrame) -> Self {
70        Self::Frame(frame)
71    }
72}
73
74/// An HTML element.
75#[derive(Debug, Clone, Hash)]
76pub struct HtmlElement {
77    /// The HTML tag.
78    pub tag: HtmlTag,
79    /// The element's attributes.
80    pub attrs: HtmlAttrs,
81    /// The element's children.
82    pub children: EcoVec<HtmlNode>,
83    /// The element's logical parent. For introspection purposes, this element
84    /// is logically ordered immediately after the parent's start location.
85    pub parent: Option<Location>,
86    /// The span from which the element originated, if any.
87    pub span: Span,
88    /// Whether this is a span with `white-space: pre-wrap`  generated by the
89    /// compiler to prevent whitespace from being collapsed.
90    ///
91    /// For such spans, spaces and tabs in the element are emitted as escape
92    /// sequences. While this does not matter for browser engine rendering (as
93    /// the `white-space` CSS property is enough), it ensures that formatters
94    /// won't mess up the output.
95    pub pre_span: bool,
96}
97
98impl HtmlElement {
99    /// Create a new, blank element without attributes or children.
100    pub fn new(tag: HtmlTag) -> Self {
101        Self {
102            tag,
103            attrs: HtmlAttrs::default(),
104            children: EcoVec::new(),
105            parent: None,
106            span: Span::detached(),
107            pre_span: false,
108        }
109    }
110
111    /// Attach children to the element.
112    ///
113    /// Note: This overwrites potential previous children.
114    pub fn with_children(mut self, children: EcoVec<HtmlNode>) -> Self {
115        self.children = children;
116        self
117    }
118
119    /// Add an attribute to the element.
120    pub fn with_attr(mut self, key: HtmlAttr, value: impl Into<EcoString>) -> Self {
121        self.attrs.push(key, value);
122        self
123    }
124
125    /// Adds CSS styles to an element.
126    pub(crate) fn with_styles(self, properties: css::Properties) -> Self {
127        if let Some(value) = properties.into_inline_styles() {
128            self.with_attr(attr::style, value)
129        } else {
130            self
131        }
132    }
133
134    /// Attach a span to the element.
135    pub fn spanned(mut self, span: Span) -> Self {
136        self.span = span;
137        self
138    }
139}
140
141/// The tag of an HTML element.
142#[derive(Copy, Clone, Eq, PartialEq, Hash)]
143pub struct HtmlTag(PicoStr);
144
145impl HtmlTag {
146    /// Intern an HTML tag string at runtime.
147    pub fn intern(string: &str) -> StrResult<Self> {
148        if string.is_empty() {
149            bail!("tag name must not be empty");
150        }
151
152        let mut has_hyphen = false;
153        let mut has_uppercase = false;
154
155        for c in string.chars() {
156            if c == '-' {
157                has_hyphen = true;
158            } else if !charsets::is_valid_in_tag_name(c) {
159                bail!("the character {} is not valid in a tag name", c.repr());
160            } else {
161                has_uppercase |= c.is_ascii_uppercase();
162            }
163        }
164
165        // If we encounter a hyphen, we are dealing with a custom element rather
166        // than a standard HTML element.
167        //
168        // A valid custom element name must:
169        // - Contain at least one hyphen (U+002D)
170        // - Start with an ASCII lowercase letter (a-z)
171        // - Not contain any ASCII uppercase letters (A-Z)
172        // - Not be one of the reserved names
173        // - Only contain valid characters (ASCII alphanumeric and hyphens)
174        //
175        // See https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
176        if has_hyphen {
177            if !string.starts_with(|c: char| c.is_ascii_lowercase()) {
178                bail!("custom element name must start with a lowercase letter");
179            }
180            if has_uppercase {
181                bail!("custom element name must not contain uppercase letters");
182            }
183
184            // These names are used in SVG and MathML. Since `html.elem` only
185            // supports creation of _HTML_ elements, they are forbidden.
186            if matches!(
187                string,
188                "annotation-xml"
189                    | "color-profile"
190                    | "font-face"
191                    | "font-face-src"
192                    | "font-face-uri"
193                    | "font-face-format"
194                    | "font-face-name"
195                    | "missing-glyph"
196            ) {
197                bail!("name is reserved and not valid for a custom element");
198            }
199        }
200
201        Ok(Self(PicoStr::intern(string)))
202    }
203
204    /// Creates a compile-time constant `HtmlTag`.
205    ///
206    /// Should only be used in const contexts because it can panic.
207    #[track_caller]
208    pub const fn constant(string: &'static str) -> Self {
209        if string.is_empty() {
210            panic!("tag name must not be empty");
211        }
212
213        let bytes = string.as_bytes();
214        let mut i = 0;
215        while i < bytes.len() {
216            if !bytes[i].is_ascii() || !charsets::is_valid_in_tag_name(bytes[i] as char) {
217                panic!("not all characters are valid in a tag name");
218            }
219            i += 1;
220        }
221
222        Self(PicoStr::constant(string))
223    }
224
225    /// Resolves the tag to a string.
226    pub fn resolve(self) -> ResolvedPicoStr {
227        self.0.resolve()
228    }
229
230    /// Turns the tag into its inner interned string.
231    pub const fn into_inner(self) -> PicoStr {
232        self.0
233    }
234}
235
236impl Debug for HtmlTag {
237    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
238        Display::fmt(self, f)
239    }
240}
241
242impl Display for HtmlTag {
243    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
244        write!(f, "<{}>", self.resolve())
245    }
246}
247
248cast! {
249    HtmlTag,
250    self => self.0.resolve().as_str().into_value(),
251    v: Str => Self::intern(&v)?,
252}
253
254/// Attributes of an HTML element.
255#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)]
256pub struct HtmlAttrs(pub EcoVec<(HtmlAttr, EcoString)>);
257
258impl HtmlAttrs {
259    /// Creates an empty attribute list.
260    pub fn new() -> Self {
261        Self::default()
262    }
263
264    /// Adds an attribute.
265    pub fn push(&mut self, attr: HtmlAttr, value: impl Into<EcoString>) {
266        self.0.push((attr, value.into()));
267    }
268
269    /// Adds an attribute to the start of the list.
270    pub fn push_front(&mut self, attr: HtmlAttr, value: impl Into<EcoString>) {
271        self.0.insert(0, (attr, value.into()));
272    }
273
274    /// Finds an attribute value.
275    pub fn get(&self, attr: HtmlAttr) -> Option<&EcoString> {
276        self.0.iter().find(|&&(k, _)| k == attr).map(|(_, v)| v)
277    }
278}
279
280cast! {
281    HtmlAttrs,
282    self => self.0
283        .into_iter()
284        .map(|(key, value)| (key.resolve().as_str().into(), value.into_value()))
285        .collect::<Dict>()
286        .into_value(),
287    values: Dict => Self(values
288        .into_iter()
289        .map(|(k, v)| {
290            let attr = HtmlAttr::intern(&k)?;
291            let value = v.cast::<EcoString>()?;
292            Ok((attr, value))
293        })
294        .collect::<HintedStrResult<_>>()?),
295}
296
297/// An attribute of an HTML element.
298#[derive(Copy, Clone, Eq, PartialEq, Hash)]
299pub struct HtmlAttr(PicoStr);
300
301impl HtmlAttr {
302    /// Intern an HTML attribute string at runtime.
303    pub fn intern(string: &str) -> StrResult<Self> {
304        if string.is_empty() {
305            bail!("attribute name must not be empty");
306        }
307
308        if let Some(c) =
309            string.chars().find(|&c| !charsets::is_valid_in_attribute_name(c))
310        {
311            bail!("the character {} is not valid in an attribute name", c.repr());
312        }
313
314        Ok(Self(PicoStr::intern(string)))
315    }
316
317    /// Creates a compile-time constant `HtmlAttr`.
318    ///
319    /// Must only be used in const contexts (in a constant definition or
320    /// explicit `const { .. }` block) because otherwise a panic for a malformed
321    /// attribute or not auto-internible constant will only be caught at
322    /// runtime.
323    #[track_caller]
324    pub const fn constant(string: &'static str) -> Self {
325        if string.is_empty() {
326            panic!("attribute name must not be empty");
327        }
328
329        let bytes = string.as_bytes();
330        let mut i = 0;
331        while i < bytes.len() {
332            if !bytes[i].is_ascii()
333                || !charsets::is_valid_in_attribute_name(bytes[i] as char)
334            {
335                panic!("not all characters are valid in an attribute name");
336            }
337            i += 1;
338        }
339
340        Self(PicoStr::constant(string))
341    }
342
343    /// Resolves the attribute to a string.
344    pub fn resolve(self) -> ResolvedPicoStr {
345        self.0.resolve()
346    }
347
348    /// Turns the attribute into its inner interned string.
349    pub const fn into_inner(self) -> PicoStr {
350        self.0
351    }
352}
353
354impl Debug for HtmlAttr {
355    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
356        Display::fmt(self, f)
357    }
358}
359
360impl Display for HtmlAttr {
361    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
362        write!(f, "{}", self.resolve())
363    }
364}
365
366cast! {
367    HtmlAttr,
368    self => self.0.resolve().as_str().into_value(),
369    v: Str => Self::intern(&v)?,
370}
371
372/// Layouted content that will be embedded into HTML as an SVG.
373#[derive(Debug, Clone, Hash)]
374pub struct HtmlFrame {
375    /// The frame that will be displayed as an SVG.
376    pub inner: Frame,
377    /// The text size where the frame was defined. This is used to size the
378    /// frame with em units to make text in and outside of the frame sized
379    /// consistently.
380    pub text_size: Abs,
381    /// An ID to assign to the SVG itself.
382    pub id: Option<EcoString>,
383    /// IDs to assign to destination jump points within the SVG.
384    pub link_points: EcoVec<(Point, EcoString)>,
385    /// The span from which the frame originated.
386    pub span: Span,
387}
388
389impl HtmlFrame {
390    /// Wraps a laid-out frame.
391    pub fn new(inner: Frame, styles: StyleChain, span: Span) -> Self {
392        Self {
393            inner,
394            text_size: styles.resolve(TextElem::size),
395            id: None,
396            link_points: EcoVec::new(),
397            span,
398        }
399    }
400}