1use std::fmt::Write;
2
3use ecow::{EcoString, eco_format};
4use typst_library::diag::{At, SourceResult, StrResult, bail};
5use typst_library::foundations::Repr;
6use typst_library::introspection::Introspector;
7use typst_syntax::Span;
8
9use crate::{
10 HtmlDocument, HtmlElement, HtmlFrame, HtmlNode, HtmlTag, attr, charsets, tag,
11};
12
13pub fn html(document: &HtmlDocument) -> SourceResult<String> {
15 let mut w = Writer::new(&document.introspector, true);
16 w.buf.push_str("<!DOCTYPE html>");
17 write_indent(&mut w);
18 write_element(&mut w, &document.root)?;
19 if w.pretty {
20 w.buf.push('\n');
21 }
22 Ok(w.buf)
23}
24
25struct Writer<'a> {
27 buf: String,
29 level: usize,
31 introspector: &'a Introspector,
33 pretty: bool,
35}
36
37impl<'a> Writer<'a> {
38 fn new(introspector: &'a Introspector, pretty: bool) -> Self {
40 Self { buf: String::new(), level: 0, introspector, pretty }
41 }
42}
43
44fn write_indent(w: &mut Writer) {
46 if w.pretty {
47 w.buf.push('\n');
48 for _ in 0..w.level {
49 w.buf.push_str(" ");
50 }
51 }
52}
53
54fn write_node(w: &mut Writer, node: &HtmlNode, escape_text: bool) -> SourceResult<()> {
56 match node {
57 HtmlNode::Tag(_) => {}
58 HtmlNode::Text(text, span) => write_text(w, text, *span, escape_text)?,
59 HtmlNode::Element(element) => write_element(w, element)?,
60 HtmlNode::Frame(frame) => write_frame(w, frame),
61 }
62 Ok(())
63}
64
65fn write_text(w: &mut Writer, text: &str, span: Span, escape: bool) -> SourceResult<()> {
67 for c in text.chars() {
68 if escape || !charsets::is_valid_in_normal_element_text(c) {
69 write_escape(w, c).at(span)?;
70 } else {
71 w.buf.push(c);
72 }
73 }
74 Ok(())
75}
76
77fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
79 w.buf.push('<');
80 w.buf.push_str(&element.tag.resolve());
81
82 for (attr, value) in &element.attrs.0 {
83 w.buf.push(' ');
84 w.buf.push_str(&attr.resolve());
85
86 if !value.is_empty() {
89 w.buf.push('=');
90 w.buf.push('"');
91 for c in value.chars() {
92 if charsets::is_valid_in_attribute_value(c) {
93 w.buf.push(c);
94 } else {
95 write_escape(w, c).at(element.span)?;
96 }
97 }
98 w.buf.push('"');
99 }
100 }
101
102 w.buf.push('>');
103
104 if tag::is_void(element.tag) {
105 if !element.children.is_empty() {
106 bail!(element.span, "HTML void elements must not have children");
107 }
108 return Ok(());
109 }
110
111 if matches!(element.tag, tag::pre | tag::textarea) && starts_with_newline(element) {
113 w.buf.push('\n');
114 }
115
116 if tag::is_raw(element.tag) {
117 write_raw(w, element)?;
118 } else if tag::is_escapable_raw(element.tag) {
119 write_escapable_raw(w, element)?;
120 } else if !element.children.is_empty() {
121 write_children(w, element)?;
122 }
123
124 w.buf.push_str("</");
125 w.buf.push_str(&element.tag.resolve());
126 w.buf.push('>');
127
128 Ok(())
129}
130
131fn write_children(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
133 let pretty = w.pretty;
134 let pretty_inside = allows_pretty_inside(element.tag)
135 && element.children.iter().any(|node| match node {
136 HtmlNode::Element(child) => wants_pretty_around(child.tag),
137 HtmlNode::Frame(_) => true,
138 _ => false,
139 });
140
141 w.pretty &= pretty_inside;
142 let mut indent = w.pretty;
143
144 w.level += 1;
145 for c in &element.children {
146 let pretty_around = match c {
147 HtmlNode::Tag(_) => continue,
148 HtmlNode::Element(child) => w.pretty && wants_pretty_around(child.tag),
149 HtmlNode::Text(..) | HtmlNode::Frame(_) => false,
150 };
151
152 if core::mem::take(&mut indent) || pretty_around {
153 write_indent(w);
154 }
155 write_node(w, c, element.pre_span)?;
156 indent = pretty_around;
157 }
158 w.level -= 1;
159
160 write_indent(w);
161 w.pretty = pretty;
162
163 Ok(())
164}
165
166fn starts_with_newline(element: &HtmlElement) -> bool {
168 for child in &element.children {
169 match child {
170 HtmlNode::Tag(_) => {}
171 HtmlNode::Text(text, _) => return text.starts_with(['\n', '\r']),
172 _ => return false,
173 }
174 }
175 false
176}
177
178fn write_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
180 let text = collect_raw_text(element)?;
181
182 if let Some(closing) = find_closing_tag(&text, element.tag) {
183 bail!(
184 element.span,
185 "HTML raw text element cannot contain its own closing tag";
186 hint: "the sequence `{closing}` appears in the raw text",
187 )
188 }
189
190 let mode = if w.pretty { RawMode::of(element, &text) } else { RawMode::Keep };
191 match mode {
192 RawMode::Keep => {
193 w.buf.push_str(&text);
194 }
195 RawMode::Wrap => {
196 w.buf.push('\n');
197 w.buf.push_str(&text);
198 write_indent(w);
199 }
200 RawMode::Indent => {
201 w.level += 1;
202 for line in text.lines() {
203 write_indent(w);
204 w.buf.push_str(line);
205 }
206 w.level -= 1;
207 write_indent(w);
208 }
209 }
210
211 Ok(())
212}
213
214fn write_escapable_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
216 walk_raw_text(element, |piece, span| write_text(w, piece, span, false))
217}
218
219fn collect_raw_text(element: &HtmlElement) -> SourceResult<String> {
221 let mut text = String::new();
222 walk_raw_text(element, |piece, span| {
223 if let Some(c) = piece.chars().find(|&c| !charsets::is_w3c_text_char(c)) {
224 return Err(unencodable(c)).at(span);
225 }
226 text.push_str(piece);
227 Ok(())
228 })?;
229 Ok(text)
230}
231
232fn walk_raw_text(
234 element: &HtmlElement,
235 mut f: impl FnMut(&str, Span) -> SourceResult<()>,
236) -> SourceResult<()> {
237 for c in &element.children {
238 match c {
239 HtmlNode::Tag(_) => continue,
240 HtmlNode::Text(text, span) => f(text, *span)?,
241 HtmlNode::Element(HtmlElement { span, .. })
242 | HtmlNode::Frame(HtmlFrame { span, .. }) => {
243 bail!(*span, "HTML raw text element cannot have non-text children")
244 }
245 }
246 }
247 Ok(())
248}
249
250fn find_closing_tag(text: &str, tag: HtmlTag) -> Option<&str> {
254 let s = tag.resolve();
255 let len = s.len();
256 text.match_indices("</").find_map(|(i, _)| {
257 let rest = &text[i + 2..];
258 let disallowed = rest.len() >= len
259 && rest[..len].eq_ignore_ascii_case(&s)
260 && rest[len..].starts_with(['\t', '\n', '\u{c}', '\r', ' ', '>', '/']);
261 disallowed.then(|| &text[i..i + 2 + len])
262 })
263}
264
265enum RawMode {
267 Keep,
269 Wrap,
271 Indent,
273}
274
275impl RawMode {
276 fn of(element: &HtmlElement, text: &str) -> Self {
277 match element.tag {
278 tag::script
279 if !element.attrs.0.iter().any(|(attr, value)| {
280 *attr == attr::r#type && value != "text/javascript"
281 }) =>
282 {
283 if text.contains('`') { Self::Wrap } else { Self::Indent }
286 }
287 tag::style => Self::Indent,
288 _ => Self::Keep,
289 }
290 }
291}
292
293fn allows_pretty_inside(tag: HtmlTag) -> bool {
302 (tag::is_block_by_default(tag) && tag != tag::pre)
303 || tag::is_tabular_by_default(tag)
304 || tag == tag::li
305}
306
307fn wants_pretty_around(tag: HtmlTag) -> bool {
313 allows_pretty_inside(tag) || tag::is_metadata(tag) || tag == tag::pre
314}
315
316fn write_escape(w: &mut Writer, c: char) -> StrResult<()> {
318 match c {
320 '&' => w.buf.push_str("&"),
321 '<' => w.buf.push_str("<"),
322 '>' => w.buf.push_str(">"),
323 '"' => w.buf.push_str("""),
324 '\'' => w.buf.push_str("'"),
325 c if charsets::is_w3c_text_char(c) && c != '\r' => {
326 write!(w.buf, "&#x{:x};", c as u32).unwrap()
327 }
328 _ => return Err(unencodable(c)),
329 }
330 Ok(())
331}
332
333#[cold]
335fn unencodable(c: char) -> EcoString {
336 eco_format!("the character `{}` cannot be encoded in HTML", c.repr())
337}
338
339fn write_frame(w: &mut Writer, frame: &HtmlFrame) {
341 let svg = typst_svg::svg_html_frame(
342 &frame.inner,
343 frame.text_size,
344 frame.id.as_deref(),
345 &frame.link_points,
346 w.introspector,
347 );
348 w.buf.push_str(&svg);
349}