1use std::fmt::Write;
2
3use ecow::{EcoString, eco_format};
4use typst_library::diag::{At, SourceResult, StrResult, bail};
5use typst_library::foundations::Repr;
6use typst_library::introspection::Introspector;
7use typst_syntax::Span;
8
9use crate::{
10    HtmlDocument, HtmlElement, HtmlFrame, HtmlNode, HtmlTag, attr, charsets, tag,
11};
12
13pub fn html(document: &HtmlDocument) -> SourceResult<String> {
15    let mut w = Writer::new(&document.introspector, true);
16    w.buf.push_str("<!DOCTYPE html>");
17    write_indent(&mut w);
18    write_element(&mut w, &document.root)?;
19    if w.pretty {
20        w.buf.push('\n');
21    }
22    Ok(w.buf)
23}
24
25struct Writer<'a> {
27    buf: String,
29    level: usize,
31    introspector: &'a Introspector,
33    pretty: bool,
35}
36
37impl<'a> Writer<'a> {
38    fn new(introspector: &'a Introspector, pretty: bool) -> Self {
40        Self { buf: String::new(), level: 0, introspector, pretty }
41    }
42}
43
44fn write_indent(w: &mut Writer) {
46    if w.pretty {
47        w.buf.push('\n');
48        for _ in 0..w.level {
49            w.buf.push_str("  ");
50        }
51    }
52}
53
54fn write_node(w: &mut Writer, node: &HtmlNode, escape_text: bool) -> SourceResult<()> {
56    match node {
57        HtmlNode::Tag(_) => {}
58        HtmlNode::Text(text, span) => write_text(w, text, *span, escape_text)?,
59        HtmlNode::Element(element) => write_element(w, element)?,
60        HtmlNode::Frame(frame) => write_frame(w, frame),
61    }
62    Ok(())
63}
64
65fn write_text(w: &mut Writer, text: &str, span: Span, escape: bool) -> SourceResult<()> {
67    for c in text.chars() {
68        if escape || !charsets::is_valid_in_normal_element_text(c) {
69            write_escape(w, c).at(span)?;
70        } else {
71            w.buf.push(c);
72        }
73    }
74    Ok(())
75}
76
77fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
79    w.buf.push('<');
80    w.buf.push_str(&element.tag.resolve());
81
82    for (attr, value) in &element.attrs.0 {
83        w.buf.push(' ');
84        w.buf.push_str(&attr.resolve());
85
86        if !value.is_empty() {
89            w.buf.push('=');
90            w.buf.push('"');
91            for c in value.chars() {
92                if charsets::is_valid_in_attribute_value(c) {
93                    w.buf.push(c);
94                } else {
95                    write_escape(w, c).at(element.span)?;
96                }
97            }
98            w.buf.push('"');
99        }
100    }
101
102    w.buf.push('>');
103
104    if tag::is_void(element.tag) {
105        if !element.children.is_empty() {
106            bail!(element.span, "HTML void elements must not have children");
107        }
108        return Ok(());
109    }
110
111    if matches!(element.tag, tag::pre | tag::textarea) && starts_with_newline(element) {
113        w.buf.push('\n');
114    }
115
116    if tag::is_raw(element.tag) {
117        write_raw(w, element)?;
118    } else if tag::is_escapable_raw(element.tag) {
119        write_escapable_raw(w, element)?;
120    } else if !element.children.is_empty() {
121        write_children(w, element)?;
122    }
123
124    w.buf.push_str("</");
125    w.buf.push_str(&element.tag.resolve());
126    w.buf.push('>');
127
128    Ok(())
129}
130
131fn write_children(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
133    let pretty = w.pretty;
134    let pretty_inside = allows_pretty_inside(element.tag)
135        && element.children.iter().any(|node| match node {
136            HtmlNode::Element(child) => wants_pretty_around(child.tag),
137            HtmlNode::Frame(_) => true,
138            _ => false,
139        });
140
141    w.pretty &= pretty_inside;
142    let mut indent = w.pretty;
143
144    w.level += 1;
145    for c in &element.children {
146        let pretty_around = match c {
147            HtmlNode::Tag(_) => continue,
148            HtmlNode::Element(child) => w.pretty && wants_pretty_around(child.tag),
149            HtmlNode::Text(..) | HtmlNode::Frame(_) => false,
150        };
151
152        if core::mem::take(&mut indent) || pretty_around {
153            write_indent(w);
154        }
155        write_node(w, c, element.pre_span)?;
156        indent = pretty_around;
157    }
158    w.level -= 1;
159
160    write_indent(w);
161    w.pretty = pretty;
162
163    Ok(())
164}
165
166fn starts_with_newline(element: &HtmlElement) -> bool {
168    for child in &element.children {
169        match child {
170            HtmlNode::Tag(_) => {}
171            HtmlNode::Text(text, _) => return text.starts_with(['\n', '\r']),
172            _ => return false,
173        }
174    }
175    false
176}
177
178fn write_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
180    let text = collect_raw_text(element)?;
181
182    if let Some(closing) = find_closing_tag(&text, element.tag) {
183        bail!(
184            element.span,
185            "HTML raw text element cannot contain its own closing tag";
186            hint: "the sequence `{closing}` appears in the raw text",
187        )
188    }
189
190    let mode = if w.pretty { RawMode::of(element, &text) } else { RawMode::Keep };
191    match mode {
192        RawMode::Keep => {
193            w.buf.push_str(&text);
194        }
195        RawMode::Wrap => {
196            w.buf.push('\n');
197            w.buf.push_str(&text);
198            write_indent(w);
199        }
200        RawMode::Indent => {
201            w.level += 1;
202            for line in text.lines() {
203                write_indent(w);
204                w.buf.push_str(line);
205            }
206            w.level -= 1;
207            write_indent(w);
208        }
209    }
210
211    Ok(())
212}
213
214fn write_escapable_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
216    walk_raw_text(element, |piece, span| write_text(w, piece, span, false))
217}
218
219fn collect_raw_text(element: &HtmlElement) -> SourceResult<String> {
221    let mut text = String::new();
222    walk_raw_text(element, |piece, span| {
223        if let Some(c) = piece.chars().find(|&c| !charsets::is_w3c_text_char(c)) {
224            return Err(unencodable(c)).at(span);
225        }
226        text.push_str(piece);
227        Ok(())
228    })?;
229    Ok(text)
230}
231
232fn walk_raw_text(
234    element: &HtmlElement,
235    mut f: impl FnMut(&str, Span) -> SourceResult<()>,
236) -> SourceResult<()> {
237    for c in &element.children {
238        match c {
239            HtmlNode::Tag(_) => continue,
240            HtmlNode::Text(text, span) => f(text, *span)?,
241            HtmlNode::Element(HtmlElement { span, .. })
242            | HtmlNode::Frame(HtmlFrame { span, .. }) => {
243                bail!(*span, "HTML raw text element cannot have non-text children")
244            }
245        }
246    }
247    Ok(())
248}
249
250fn find_closing_tag(text: &str, tag: HtmlTag) -> Option<&str> {
254    let s = tag.resolve();
255    let len = s.len();
256    text.match_indices("</").find_map(|(i, _)| {
257        let rest = &text[i + 2..];
258        let disallowed = rest.len() >= len
259            && rest[..len].eq_ignore_ascii_case(&s)
260            && rest[len..].starts_with(['\t', '\n', '\u{c}', '\r', ' ', '>', '/']);
261        disallowed.then(|| &text[i..i + 2 + len])
262    })
263}
264
265enum RawMode {
267    Keep,
269    Wrap,
271    Indent,
273}
274
275impl RawMode {
276    fn of(element: &HtmlElement, text: &str) -> Self {
277        match element.tag {
278            tag::script
279                if !element.attrs.0.iter().any(|(attr, value)| {
280                    *attr == attr::r#type && value != "text/javascript"
281                }) =>
282            {
283                if text.contains('`') { Self::Wrap } else { Self::Indent }
286            }
287            tag::style => Self::Indent,
288            _ => Self::Keep,
289        }
290    }
291}
292
293fn allows_pretty_inside(tag: HtmlTag) -> bool {
302    (tag::is_block_by_default(tag) && tag != tag::pre)
303        || tag::is_tabular_by_default(tag)
304        || tag == tag::li
305}
306
307fn wants_pretty_around(tag: HtmlTag) -> bool {
313    allows_pretty_inside(tag) || tag::is_metadata(tag) || tag == tag::pre
314}
315
316fn write_escape(w: &mut Writer, c: char) -> StrResult<()> {
318    match c {
320        '&' => w.buf.push_str("&"),
321        '<' => w.buf.push_str("<"),
322        '>' => w.buf.push_str(">"),
323        '"' => w.buf.push_str("""),
324        '\'' => w.buf.push_str("'"),
325        c if charsets::is_w3c_text_char(c) && c != '\r' => {
326            write!(w.buf, "&#x{:x};", c as u32).unwrap()
327        }
328        _ => return Err(unencodable(c)),
329    }
330    Ok(())
331}
332
333#[cold]
335fn unencodable(c: char) -> EcoString {
336    eco_format!("the character `{}` cannot be encoded in HTML", c.repr())
337}
338
339fn write_frame(w: &mut Writer, frame: &HtmlFrame) {
341    let svg = typst_svg::svg_html_frame(
342        &frame.inner,
343        frame.text_size,
344        frame.id.as_deref(),
345        &frame.link_points,
346        w.introspector,
347    );
348    w.buf.push_str(&svg);
349}