Skip to main content

typst_html/
dom.rs

1use std::fmt::{self, Debug, Display, Formatter};
2use std::sync::Arc;
3
4use ecow::{EcoString, EcoVec};
5use typst_library::diag::{HintedStrResult, SourceResult, StrResult, bail};
6use typst_library::engine::Engine;
7use typst_library::foundations::{
8    Content, Dict, Fold, Output, Repr, Str, StyleChain, Target, cast,
9};
10use typst_library::introspection::{Introspector, Location, Tag};
11use typst_library::layout::{Abs, Frame, Point};
12use typst_library::model::{Document, DocumentInfo};
13use typst_library::text::TextElem;
14use typst_syntax::Span;
15use typst_utils::{PicoStr, ResolvedPicoStr};
16
17use crate::document::HtmlOutput;
18use crate::{HtmlIntrospector, charsets, css};
19
20/// An HTML document.
21///
22/// Unlike the `PagedDocument`, this does not implement `Hash` because the HTML
23/// introspector is neither hashable nor guaranteed to be 100% derived from the
24/// output (due to the presence of `root_mut` which is used for cross-linking).
25#[derive(Debug, Clone)]
26pub struct HtmlDocument {
27    output: HtmlOutput,
28    info: DocumentInfo,
29    introspector: Arc<HtmlIntrospector>,
30}
31
32impl HtmlDocument {
33    /// Creates a new paged document from its parts.
34    ///
35    /// Internally builds the introspector.
36    pub fn new(output: HtmlOutput, info: DocumentInfo) -> Self {
37        let introspector = HtmlIntrospector::new(output.nodes());
38        Self { output, info, introspector: Arc::new(introspector) }
39    }
40
41    /// The document's root HTML element.
42    pub fn root(&self) -> &HtmlElement {
43        self.output.root()
44    }
45
46    /// The document's root HTML element, mutably.
47    ///
48    /// Technically, mutating the root can mess up the introspector. This should
49    /// be fixed at some point (<https://github.com/typst/typst/issues/7951>).
50    pub fn root_mut(&mut self) -> &mut HtmlElement {
51        self.output.root_mut()
52    }
53
54    /// The document's root HTML element, in its containing node wrapper.
55    pub fn root_node(&self) -> &HtmlNode {
56        self.output.root_node()
57    }
58
59    /// Details about the document, mutably.
60    pub fn info_mut(&mut self) -> &mut DocumentInfo {
61        &mut self.info
62    }
63
64    /// Provides the ability to execute queries on the document.
65    pub fn introspector(&self) -> &Arc<HtmlIntrospector> {
66        &self.introspector
67    }
68
69    /// Provides the ability to execute queries on the document.
70    pub fn introspector_mut(&mut self) -> &mut HtmlIntrospector {
71        Arc::make_mut(&mut self.introspector)
72    }
73}
74
75impl Document for HtmlDocument {
76    fn info(&self) -> &DocumentInfo {
77        &self.info
78    }
79}
80
81impl Output for HtmlDocument {
82    fn introspector(&self) -> &dyn Introspector {
83        self.introspector.as_ref()
84    }
85
86    fn target() -> Target {
87        Target::Html
88    }
89
90    fn create(
91        engine: &mut Engine,
92        content: &Content,
93        styles: StyleChain,
94    ) -> SourceResult<Self> {
95        crate::html_document(engine, content, styles)
96    }
97}
98
99/// A child of an HTML element.
100#[derive(Debug, Clone, Hash)]
101pub enum HtmlNode {
102    /// An introspectable element that produced something within this node.
103    Tag(Tag),
104    /// Plain text.
105    Text(EcoString, Span),
106    /// Another element.
107    Element(HtmlElement),
108    /// Layouted content that will be embedded into HTML as an SVG.
109    Frame(HtmlFrame),
110}
111
112impl HtmlNode {
113    /// Create a plain text node.
114    pub fn text(text: impl Into<EcoString>, span: Span) -> Self {
115        Self::Text(text.into(), span)
116    }
117
118    /// Returns the span, if any.
119    pub fn span(&self) -> Span {
120        match self {
121            Self::Tag(_) => Span::detached(),
122            Self::Text(_, span) => *span,
123            Self::Element(element) => element.span,
124            Self::Frame(frame) => frame.span,
125        }
126    }
127}
128
129impl From<Tag> for HtmlNode {
130    fn from(tag: Tag) -> Self {
131        Self::Tag(tag)
132    }
133}
134
135impl From<HtmlElement> for HtmlNode {
136    fn from(element: HtmlElement) -> Self {
137        Self::Element(element)
138    }
139}
140
141impl From<HtmlFrame> for HtmlNode {
142    fn from(frame: HtmlFrame) -> Self {
143        Self::Frame(frame)
144    }
145}
146
147/// An extension trait for `[HtmlNode]`.
148pub trait HtmlSliceExt {
149    /// Iterates over nodes alongside the indices as they would be observed in
150    /// the final DOM.
151    ///
152    /// - Tags receive the index of the preceding node and don't advance the
153    ///   cursor.
154    ///
155    /// - For indexing purposes, consecutive text nodes are considered as
156    ///   groups. They receive the same index as they are not distinguishable on
157    ///   the DOM level.
158    fn iter_with_dom_indices(&self) -> impl Iterator<Item = (&HtmlNode, usize)>;
159}
160
161impl HtmlSliceExt for [HtmlNode] {
162    fn iter_with_dom_indices(&self) -> impl Iterator<Item = (&HtmlNode, usize)> {
163        let mut cursor = 0;
164        let mut was_text = false;
165        self.iter().map(move |child| {
166            let mut i = cursor;
167            match child {
168                HtmlNode::Tag(_) => {}
169                HtmlNode::Text(..) => was_text = true,
170                _ => {
171                    cursor += usize::from(was_text);
172                    i = cursor;
173                    cursor += 1;
174                    was_text = false;
175                }
176            }
177            (child, i)
178        })
179    }
180}
181
182/// An HTML element.
183#[derive(Debug, Clone, Hash)]
184pub struct HtmlElement {
185    /// The HTML tag.
186    pub tag: HtmlTag,
187    /// The element's attributes.
188    pub attrs: HtmlAttrs,
189    /// The element's CSS properties. Currently only used for generated styles.
190    pub css: css::Properties,
191    /// The element's children.
192    pub children: EcoVec<HtmlNode>,
193    /// The element's logical parent. For introspection purposes, this element
194    /// is logically ordered immediately after the parent's start location.
195    pub parent: Option<Location>,
196    /// The span from which the element originated, if any.
197    pub span: Span,
198    /// Whether this is a span with `white-space: pre-wrap`  generated by the
199    /// compiler to prevent whitespace from being collapsed.
200    ///
201    /// For such spans, spaces and tabs in the element are emitted as escape
202    /// sequences. While this does not matter for browser engine rendering (as
203    /// the `white-space` CSS property is enough), it ensures that formatters
204    /// won't mess up the output.
205    pub pre_span: bool,
206}
207
208impl HtmlElement {
209    /// Create a new, blank element without attributes or children.
210    pub fn new(tag: HtmlTag) -> Self {
211        Self {
212            tag,
213            attrs: HtmlAttrs::default(),
214            css: css::Properties::default(),
215            children: EcoVec::new(),
216            parent: None,
217            span: Span::detached(),
218            pre_span: false,
219        }
220    }
221
222    /// Attach children to the element.
223    ///
224    /// Note: This overwrites potential previous children.
225    pub fn with_children(mut self, children: EcoVec<HtmlNode>) -> Self {
226        self.children = children;
227        self
228    }
229
230    /// Add an attribute to the element.
231    pub fn with_attr(mut self, key: HtmlAttr, value: impl Into<EcoString>) -> Self {
232        self.attrs.push(key, value);
233        self
234    }
235
236    /// Adds CSS styles to an element.
237    pub(crate) fn with_css(mut self, css: css::Properties) -> Self {
238        self.css = css;
239        self
240    }
241
242    /// Attach a span to the element.
243    pub fn spanned(mut self, span: Span) -> Self {
244        self.span = span;
245        self
246    }
247}
248
249/// The tag of an HTML element.
250#[derive(Copy, Clone, Eq, PartialEq, Hash)]
251pub struct HtmlTag(PicoStr);
252
253impl HtmlTag {
254    /// Intern an HTML tag string at runtime.
255    pub fn intern(string: &str) -> StrResult<Self> {
256        if string.is_empty() {
257            bail!("tag name must not be empty");
258        }
259
260        let mut has_hyphen = false;
261        let mut has_uppercase = false;
262
263        for c in string.chars() {
264            if c == '-' {
265                has_hyphen = true;
266            } else if !charsets::is_valid_in_tag_name(c) {
267                bail!("the character {} is not valid in a tag name", c.repr());
268            } else {
269                has_uppercase |= c.is_ascii_uppercase();
270            }
271        }
272
273        // If we encounter a hyphen, we are dealing with a custom element rather
274        // than a standard HTML element.
275        //
276        // A valid custom element name must:
277        // - Contain at least one hyphen (U+002D)
278        // - Start with an ASCII lowercase letter (a-z)
279        // - Not contain any ASCII uppercase letters (A-Z)
280        // - Not be one of the reserved names
281        // - Only contain valid characters (ASCII alphanumeric and hyphens)
282        //
283        // See https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
284        if has_hyphen {
285            if !string.starts_with(|c: char| c.is_ascii_lowercase()) {
286                bail!("custom element name must start with a lowercase letter");
287            }
288            if has_uppercase {
289                bail!("custom element name must not contain uppercase letters");
290            }
291
292            // These names are used in SVG and MathML. Since `html.elem` only
293            // supports creation of _HTML_ elements, they are forbidden.
294            if matches!(
295                string,
296                "annotation-xml"
297                    | "color-profile"
298                    | "font-face"
299                    | "font-face-src"
300                    | "font-face-uri"
301                    | "font-face-format"
302                    | "font-face-name"
303                    | "missing-glyph"
304            ) {
305                bail!("name is reserved and not valid for a custom element");
306            }
307        }
308
309        Ok(Self(PicoStr::intern(string)))
310    }
311
312    /// Creates a compile-time constant `HtmlTag`.
313    ///
314    /// Should only be used in const contexts because it can panic.
315    #[track_caller]
316    pub const fn constant(string: &'static str) -> Self {
317        if string.is_empty() {
318            panic!("tag name must not be empty");
319        }
320
321        let bytes = string.as_bytes();
322        let mut i = 0;
323        while i < bytes.len() {
324            if !bytes[i].is_ascii() || !charsets::is_valid_in_tag_name(bytes[i] as char) {
325                panic!("not all characters are valid in a tag name");
326            }
327            i += 1;
328        }
329
330        Self(PicoStr::constant(string))
331    }
332
333    /// Resolves the tag to a string.
334    pub fn resolve(self) -> ResolvedPicoStr {
335        self.0.resolve()
336    }
337
338    /// Turns the tag into its inner interned string.
339    pub const fn into_inner(self) -> PicoStr {
340        self.0
341    }
342}
343
344impl Debug for HtmlTag {
345    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
346        Display::fmt(self, f)
347    }
348}
349
350impl Display for HtmlTag {
351    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
352        write!(f, "<{}>", self.resolve())
353    }
354}
355
356cast! {
357    HtmlTag,
358    self => self.0.resolve().as_str().into_value(),
359    v: Str => Self::intern(&v)?,
360}
361
362/// Attributes of an HTML element.
363#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)]
364pub struct HtmlAttrs(pub EcoVec<(HtmlAttr, EcoString)>);
365
366impl HtmlAttrs {
367    /// Creates an empty attribute list.
368    pub fn new() -> Self {
369        Self::default()
370    }
371
372    /// Adds an attribute.
373    pub fn push(&mut self, attr: HtmlAttr, value: impl Into<EcoString>) {
374        self.0.push((attr, value.into()));
375    }
376
377    /// Adds an attribute to the start of the list.
378    pub fn push_front(&mut self, attr: HtmlAttr, value: impl Into<EcoString>) {
379        self.0.insert(0, (attr, value.into()));
380    }
381
382    /// Finds an attribute value.
383    pub fn get(&self, attr: HtmlAttr) -> Option<&EcoString> {
384        self.0.iter().find(|&&(k, _)| k == attr).map(|(_, v)| v)
385    }
386
387    /// Finds an attribute value.
388    pub fn get_mut(&mut self, attr: HtmlAttr) -> Option<&mut EcoString> {
389        self.0
390            .make_mut()
391            .iter_mut()
392            .find(|&&mut (k, _)| k == attr)
393            .map(|(_, v)| v)
394    }
395}
396
397impl Fold for HtmlAttrs {
398    fn fold(mut self, outer: Self) -> Self {
399        // TODO: We might want to use a data structure where this is more
400        // efficient (while keeping small attribute lists efficient, too), but
401        // for now, this is okay.
402        self.0.reserve(outer.0.len());
403        for pair in outer.0 {
404            if !self.0.iter().any(|&(attr, _)| attr == pair.0) {
405                self.0.push(pair);
406            }
407        }
408        self
409    }
410}
411
412cast! {
413    HtmlAttrs,
414    self => self.0
415        .into_iter()
416        .map(|(key, value)| (key.resolve().as_str().into(), value.into_value()))
417        .collect::<Dict>()
418        .into_value(),
419    values: Dict => Self(values
420        .into_iter()
421        .map(|(k, v)| {
422            let attr = HtmlAttr::intern(&k)?;
423            let value = v.cast::<EcoString>()?;
424            Ok((attr, value))
425        })
426        .collect::<HintedStrResult<_>>()?),
427}
428
429/// An attribute of an HTML element.
430#[derive(Copy, Clone, Eq, PartialEq, Hash)]
431pub struct HtmlAttr(PicoStr);
432
433impl HtmlAttr {
434    /// Intern an HTML attribute string at runtime.
435    pub fn intern(string: &str) -> StrResult<Self> {
436        if string.is_empty() {
437            bail!("attribute name must not be empty");
438        }
439
440        if let Some(c) =
441            string.chars().find(|&c| !charsets::is_valid_in_attribute_name(c))
442        {
443            bail!("the character {} is not valid in an attribute name", c.repr());
444        }
445
446        Ok(Self(PicoStr::intern(string)))
447    }
448
449    /// Creates a compile-time constant `HtmlAttr`.
450    ///
451    /// Must only be used in const contexts (in a constant definition or
452    /// explicit `const { .. }` block) because otherwise a panic for a malformed
453    /// attribute or not auto-internible constant will only be caught at
454    /// runtime.
455    #[track_caller]
456    pub const fn constant(string: &'static str) -> Self {
457        if string.is_empty() {
458            panic!("attribute name must not be empty");
459        }
460
461        let bytes = string.as_bytes();
462        let mut i = 0;
463        while i < bytes.len() {
464            if !bytes[i].is_ascii()
465                || !charsets::is_valid_in_attribute_name(bytes[i] as char)
466            {
467                panic!("not all characters are valid in an attribute name");
468            }
469            i += 1;
470        }
471
472        Self(PicoStr::constant(string))
473    }
474
475    /// Resolves the attribute to a string.
476    pub fn resolve(self) -> ResolvedPicoStr {
477        self.0.resolve()
478    }
479
480    /// Turns the attribute into its inner interned string.
481    pub const fn into_inner(self) -> PicoStr {
482        self.0
483    }
484}
485
486impl Debug for HtmlAttr {
487    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
488        Display::fmt(self, f)
489    }
490}
491
492impl Display for HtmlAttr {
493    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
494        write!(f, "{}", self.resolve())
495    }
496}
497
498cast! {
499    HtmlAttr,
500    self => self.0.resolve().as_str().into_value(),
501    v: Str => Self::intern(&v)?,
502}
503
504/// Layouted content that will be embedded into HTML as an SVG.
505#[derive(Debug, Clone, Hash)]
506pub struct HtmlFrame {
507    /// The frame that will be displayed as an SVG.
508    pub inner: Frame,
509    /// The text size where the frame was defined. This is used to size the
510    /// frame with em units to make text in and outside of the frame sized
511    /// consistently.
512    pub text_size: Abs,
513    /// An ID to assign to the SVG itself.
514    pub id: Option<EcoString>,
515    /// The element's CSS properties.
516    pub css: css::Properties,
517    /// IDs to assign to destination jump points within the SVG.
518    pub anchors: EcoVec<(Point, EcoString)>,
519    /// The span from which the frame originated.
520    pub span: Span,
521}
522
523impl HtmlFrame {
524    /// Wraps a laid-out frame.
525    pub fn new(inner: Frame, styles: StyleChain, span: Span) -> Self {
526        Self {
527            inner,
528            text_size: styles.resolve(TextElem::size),
529            id: None,
530            css: css::Properties::new(),
531            anchors: EcoVec::new(),
532            span,
533        }
534    }
535}
536
537#[cfg(test)]
538mod tests {
539    use typst_library::foundations::Content;
540    use typst_library::introspection::TagFlags;
541
542    use super::*;
543    use crate::tag;
544
545    #[test]
546    fn test_iter_with_dom_indices() {
547        let text = |s| HtmlNode::text(s, Span::detached());
548        let nodes = [
549            text("A"),
550            HtmlElement::new(tag::span).into(),
551            text("hi"),
552            text(" you"),
553            HtmlNode::Tag(Tag::Start(
554                Content::default(),
555                TagFlags { introspectable: true, tagged: true },
556            )),
557            text(" there"),
558            HtmlElement::new(tag::span).into(),
559            text(" my"),
560            text(" friend!"),
561        ];
562
563        assert_eq!(
564            nodes.iter_with_dom_indices().map(|(_, i)| i).collect::<Vec<_>>(),
565            [0, 1, 2, 2, 2, 2, 3, 4, 4]
566        );
567    }
568}