typst_html/
encode.rs

1use std::fmt::Write;
2
3use typst_library::diag::{bail, At, SourceResult, StrResult};
4use typst_library::foundations::Repr;
5use typst_library::html::{charsets, tag, HtmlDocument, HtmlElement, HtmlNode, HtmlTag};
6use typst_library::layout::Frame;
7use typst_syntax::Span;
8
9/// Encodes an HTML document into a string.
10pub fn html(document: &HtmlDocument) -> SourceResult<String> {
11    let mut w = Writer { pretty: true, ..Writer::default() };
12    w.buf.push_str("<!DOCTYPE html>");
13    write_indent(&mut w);
14    write_element(&mut w, &document.root)?;
15    if w.pretty {
16        w.buf.push('\n');
17    }
18    Ok(w.buf)
19}
20
21#[derive(Default)]
22struct Writer {
23    /// The output buffer.
24    buf: String,
25    /// The current indentation level
26    level: usize,
27    /// Whether pretty printing is enabled.
28    pretty: bool,
29}
30
31/// Write a newline and indent, if pretty printing is enabled.
32fn write_indent(w: &mut Writer) {
33    if w.pretty {
34        w.buf.push('\n');
35        for _ in 0..w.level {
36            w.buf.push_str("  ");
37        }
38    }
39}
40
41/// Encode an HTML node into the writer.
42fn write_node(w: &mut Writer, node: &HtmlNode) -> SourceResult<()> {
43    match node {
44        HtmlNode::Tag(_) => {}
45        HtmlNode::Text(text, span) => write_text(w, text, *span)?,
46        HtmlNode::Element(element) => write_element(w, element)?,
47        HtmlNode::Frame(frame) => write_frame(w, frame),
48    }
49    Ok(())
50}
51
52/// Encode plain text into the writer.
53fn write_text(w: &mut Writer, text: &str, span: Span) -> SourceResult<()> {
54    for c in text.chars() {
55        if charsets::is_valid_in_normal_element_text(c) {
56            w.buf.push(c);
57        } else {
58            write_escape(w, c).at(span)?;
59        }
60    }
61    Ok(())
62}
63
64/// Encode one element into the write.
65fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
66    w.buf.push('<');
67    w.buf.push_str(&element.tag.resolve());
68
69    for (attr, value) in &element.attrs.0 {
70        w.buf.push(' ');
71        w.buf.push_str(&attr.resolve());
72        w.buf.push('=');
73        w.buf.push('"');
74        for c in value.chars() {
75            if charsets::is_valid_in_attribute_value(c) {
76                w.buf.push(c);
77            } else {
78                write_escape(w, c).at(element.span)?;
79            }
80        }
81        w.buf.push('"');
82    }
83
84    w.buf.push('>');
85
86    if tag::is_void(element.tag) {
87        return Ok(());
88    }
89
90    let pretty = w.pretty;
91    if !element.children.is_empty() {
92        let pretty_inside = allows_pretty_inside(element.tag)
93            && element.children.iter().any(|node| match node {
94                HtmlNode::Element(child) => wants_pretty_around(child.tag),
95                _ => false,
96            });
97
98        w.pretty &= pretty_inside;
99        let mut indent = w.pretty;
100
101        w.level += 1;
102        for c in &element.children {
103            let pretty_around = match c {
104                HtmlNode::Tag(_) => continue,
105                HtmlNode::Element(child) => w.pretty && wants_pretty_around(child.tag),
106                HtmlNode::Text(..) | HtmlNode::Frame(_) => false,
107            };
108
109            if core::mem::take(&mut indent) || pretty_around {
110                write_indent(w);
111            }
112            write_node(w, c)?;
113            indent = pretty_around;
114        }
115        w.level -= 1;
116
117        write_indent(w);
118    }
119    w.pretty = pretty;
120
121    w.buf.push_str("</");
122    w.buf.push_str(&element.tag.resolve());
123    w.buf.push('>');
124
125    Ok(())
126}
127
128/// Whether we are allowed to add an extra newline at the start and end of the
129/// element's contents.
130///
131/// Technically, users can change CSS `display` properties such that the
132/// insertion of whitespace may actually impact the visual output. For example,
133/// <https://www.w3.org/TR/css-text-3/#example-af2745cd> shows how adding CSS
134/// rules to `<p>` can make it sensitive to whitespace. For this reason, we
135/// should also respect the `style` tag in the future.
136fn allows_pretty_inside(tag: HtmlTag) -> bool {
137    (tag::is_block_by_default(tag) && tag != tag::pre)
138        || tag::is_tabular_by_default(tag)
139        || tag == tag::li
140}
141
142/// Whether newlines should be added before and after the element if the parent
143/// allows it.
144///
145/// In contrast to `allows_pretty_inside`, which is purely spec-driven, this is
146/// more subjective and depends on preference.
147fn wants_pretty_around(tag: HtmlTag) -> bool {
148    allows_pretty_inside(tag) || tag::is_metadata(tag) || tag == tag::pre
149}
150
151/// Escape a character.
152fn write_escape(w: &mut Writer, c: char) -> StrResult<()> {
153    // See <https://html.spec.whatwg.org/multipage/syntax.html#syntax-charref>
154    match c {
155        '&' => w.buf.push_str("&amp;"),
156        '<' => w.buf.push_str("&lt;"),
157        '>' => w.buf.push_str("&gt;"),
158        '"' => w.buf.push_str("&quot;"),
159        '\'' => w.buf.push_str("&apos;"),
160        c if charsets::is_w3c_text_char(c) && c != '\r' => {
161            write!(w.buf, "&#x{:x};", c as u32).unwrap()
162        }
163        _ => bail!("the character {} cannot be encoded in HTML", c.repr()),
164    }
165    Ok(())
166}
167
168/// Encode a laid out frame into the writer.
169fn write_frame(w: &mut Writer, frame: &Frame) {
170    // FIXME: This string replacement is obviously a hack.
171    let svg = typst_svg::svg_frame(frame)
172        .replace("<svg class", "<svg style=\"overflow: visible;\" class");
173    w.buf.push_str(&svg);
174}