1use std::fmt::Write;
2
3use comemo::{Track, Tracked};
4use ecow::{EcoString, eco_format};
5use typst_library::diag::{At, SourceResult, StrResult, bail};
6use typst_library::foundations::Repr;
7use typst_library::model::LateLinkResolver;
8use typst_syntax::Span;
9
10use crate::{
11 HtmlDocument, HtmlElement, HtmlFrame, HtmlNode, HtmlTag, attr, charsets, property,
12 tag,
13};
14
15#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)]
17pub struct HtmlOptions {
18 pub pretty: bool,
20}
21
22pub fn html(document: &HtmlDocument, options: &HtmlOptions) -> SourceResult<String> {
24 let link_resolver = LateLinkResolver::new(None, document.introspector().as_ref());
25 let w = Writer::new(link_resolver.track(), options.pretty);
26 html_impl(w, document.root())
27}
28
29pub fn html_in_bundle(
34 root: &HtmlElement,
35 options: &HtmlOptions,
36 link_resolver: Tracked<LateLinkResolver>,
37) -> SourceResult<String> {
38 let w = Writer::new(link_resolver, options.pretty);
39 html_impl(w, root)
40}
41
42fn html_impl(mut w: Writer, root: &HtmlElement) -> SourceResult<String> {
44 w.buf.push_str("<!DOCTYPE html>");
45 write_indent(&mut w);
46 write_element(&mut w, root)?;
47 if w.pretty {
48 w.buf.push('\n');
49 }
50 Ok(w.buf)
51}
52
53struct Writer<'a> {
55 buf: String,
57 level: usize,
59 link_resolver: Tracked<'a, LateLinkResolver<'a>>,
62 pretty: bool,
64}
65
66impl<'a> Writer<'a> {
67 fn new(link_resolver: Tracked<'a, LateLinkResolver<'a>>, pretty: bool) -> Self {
69 Self {
70 buf: String::new(),
71 level: 0,
72 link_resolver,
73 pretty,
74 }
75 }
76}
77
78fn write_indent(w: &mut Writer) {
80 if w.pretty {
81 w.buf.push('\n');
82 for _ in 0..w.level {
83 w.buf.push_str(" ");
84 }
85 }
86}
87
88fn write_node(w: &mut Writer, node: &HtmlNode, escape_text: bool) -> SourceResult<()> {
90 match node {
91 HtmlNode::Tag(_) => {}
92 HtmlNode::Text(text, span) => write_text(w, text, *span, escape_text)?,
93 HtmlNode::Element(element) => write_element(w, element)?,
94 HtmlNode::Frame(frame) => write_frame(w, frame),
95 }
96 Ok(())
97}
98
99fn write_text(w: &mut Writer, text: &str, span: Span, escape: bool) -> SourceResult<()> {
101 for c in text.chars() {
102 if escape || !charsets::is_valid_in_normal_element_text(c) {
103 write_escape(w, c).at(span)?;
104 } else {
105 w.buf.push(c);
106 }
107 }
108 Ok(())
109}
110
111fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
113 w.buf.push('<');
114 w.buf.push_str(&element.tag.resolve());
115
116 for (attr, value) in &element.attrs.0 {
117 w.buf.push(' ');
118 w.buf.push_str(&attr.resolve());
119
120 if !value.is_empty() {
123 w.buf.push('=');
124 w.buf.push('"');
125 for c in value.chars() {
126 if charsets::is_valid_in_attribute_value(c) {
127 w.buf.push(c);
128 } else {
129 write_escape(w, c).at(element.span)?;
130 }
131 }
132 w.buf.push('"');
133 }
134 }
135
136 if tag::is_foreign_self_closing(element.tag) {
137 w.buf.push('/');
138 }
139
140 w.buf.push('>');
141
142 if tag::is_void(element.tag) || tag::is_foreign_self_closing(element.tag) {
143 if !element.children.is_empty() {
144 bail!(element.span, "HTML void elements must not have children");
145 }
146 return Ok(());
147 }
148
149 if matches!(element.tag, tag::pre | tag::textarea) && starts_with_newline(element) {
151 w.buf.push('\n');
152 }
153
154 if tag::is_raw(element.tag) {
155 write_raw(w, element)?;
156 } else if tag::is_escapable_raw(element.tag) {
157 write_escapable_raw(w, element)?;
158 } else if !element.children.is_empty() {
159 write_children(w, element)?;
160 }
161
162 w.buf.push_str("</");
163 w.buf.push_str(&element.tag.resolve());
164 w.buf.push('>');
165
166 Ok(())
167}
168
169fn write_children(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
171 let pretty = w.pretty;
172 let pretty_inside = allows_pretty_inside(element.tag)
173 && element.children.iter().any(|node| match node {
174 HtmlNode::Element(child) => wants_pretty_around(child),
175 HtmlNode::Frame(_) => true,
176 _ => false,
177 });
178
179 w.pretty &= pretty_inside;
180 let mut indent = w.pretty;
181
182 w.level += 1;
183 for c in &element.children {
184 let pretty_around = match c {
185 HtmlNode::Tag(_) => continue,
186 HtmlNode::Element(child) => w.pretty && wants_pretty_around(child),
187 HtmlNode::Text(..) | HtmlNode::Frame(_) => false,
188 };
189
190 if core::mem::take(&mut indent) || pretty_around {
191 write_indent(w);
192 }
193 write_node(w, c, element.pre_span)?;
194 indent = pretty_around;
195 }
196 w.level -= 1;
197
198 write_indent(w);
199 w.pretty = pretty;
200
201 Ok(())
202}
203
204fn starts_with_newline(element: &HtmlElement) -> bool {
206 for child in &element.children {
207 match child {
208 HtmlNode::Tag(_) => {}
209 HtmlNode::Text(text, _) => return text.starts_with(['\n', '\r']),
210 _ => return false,
211 }
212 }
213 false
214}
215
216fn write_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
218 let text = collect_raw_text(element)?;
219
220 if let Some(closing) = find_closing_tag(&text, element.tag) {
221 bail!(
222 element.span,
223 "HTML raw text element cannot contain its own closing tag";
224 hint: "the sequence `{closing}` appears in the raw text";
225 )
226 }
227
228 let mode = if w.pretty { RawMode::of(element, &text) } else { RawMode::Keep };
229 match mode {
230 RawMode::Keep => {
231 w.buf.push_str(&text);
232 }
233 RawMode::Wrap => {
234 w.buf.push('\n');
235 w.buf.push_str(&text);
236 write_indent(w);
237 }
238 RawMode::Indent => {
239 w.level += 1;
240 for line in text.lines() {
241 write_indent(w);
242 w.buf.push_str(line);
243 }
244 w.level -= 1;
245 write_indent(w);
246 }
247 }
248
249 Ok(())
250}
251
252fn write_escapable_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
254 walk_raw_text(element, |piece, span| write_text(w, piece, span, false))
255}
256
257fn collect_raw_text(element: &HtmlElement) -> SourceResult<String> {
259 let mut text = String::new();
260 walk_raw_text(element, |piece, span| {
261 if let Some(c) = piece.chars().find(|&c| !charsets::is_w3c_text_char(c)) {
262 return Err(unencodable(c)).at(span);
263 }
264 text.push_str(piece);
265 Ok(())
266 })?;
267 Ok(text)
268}
269
270fn walk_raw_text(
272 element: &HtmlElement,
273 mut f: impl FnMut(&str, Span) -> SourceResult<()>,
274) -> SourceResult<()> {
275 for c in &element.children {
276 match c {
277 HtmlNode::Tag(_) => continue,
278 HtmlNode::Text(text, span) => f(text, *span)?,
279 HtmlNode::Element(HtmlElement { span, .. })
280 | HtmlNode::Frame(HtmlFrame { span, .. }) => {
281 bail!(*span, "HTML raw text element cannot have non-text children")
282 }
283 }
284 }
285 Ok(())
286}
287
288fn find_closing_tag(text: &str, tag: HtmlTag) -> Option<&str> {
292 let s = tag.resolve();
293 let len = s.len();
294 text.match_indices("</").find_map(|(i, _)| {
295 let rest = &text[i + 2..];
296 let disallowed = rest.len() >= len
297 && rest[..len].eq_ignore_ascii_case(&s)
298 && rest[len..].starts_with(['\t', '\n', '\u{c}', '\r', ' ', '>', '/']);
299 disallowed.then(|| &text[i..i + 2 + len])
300 })
301}
302
303enum RawMode {
305 Keep,
307 Wrap,
309 Indent,
311}
312
313impl RawMode {
314 fn of(element: &HtmlElement, text: &str) -> Self {
315 match element.tag {
316 tag::script
317 if !element.attrs.0.iter().any(|(attr, value)| {
318 *attr == attr::r#type && value != "text/javascript"
319 }) =>
320 {
321 if text.contains('`') { Self::Wrap } else { Self::Indent }
324 }
325 tag::style => Self::Indent,
326 _ => Self::Keep,
327 }
328 }
329}
330
331fn allows_pretty_inside(tag: HtmlTag) -> bool {
340 if tag::mathml::is_mathml(tag) && !tag::mathml::is_token(tag) {
341 return true;
342 }
343 let Some(display) = property::Display::default_for(tag) else { return false };
344 (display == property::Display::Block && tag != tag::pre)
345 || display.is_tabular()
346 || display == property::Display::ListItem
347 || tag == tag::head
348}
349
350fn wants_pretty_around(element: &HtmlElement) -> bool {
356 match element.tag {
357 tag::mathml::math => {
358 element.attrs.get(attr::mathml::display).is_some_and(|v| v == "block")
359 }
360 t if tag::mathml::is_mathml(t) => true,
361 tag::pre => true,
362 t if tag::is_metadata_content(t) => true,
363 t => allows_pretty_inside(t),
364 }
365}
366
367fn write_escape(w: &mut Writer, c: char) -> StrResult<()> {
369 match c {
371 '&' => w.buf.push_str("&"),
372 '<' => w.buf.push_str("<"),
373 '>' => w.buf.push_str(">"),
374 '"' => w.buf.push_str("""),
375 '\'' => w.buf.push_str("'"),
376 c if charsets::is_w3c_text_char(c) && c != '\r' => {
377 write!(w.buf, "&#x{:x};", c as u32).unwrap()
378 }
379 _ => return Err(unencodable(c)),
380 }
381 Ok(())
382}
383
384#[cold]
386fn unencodable(c: char) -> EcoString {
387 eco_format!("the character `{}` cannot be encoded in HTML", c.repr())
388}
389
390fn write_frame(w: &mut Writer, frame: &HtmlFrame) {
392 let svg = typst_svg::svg_in_html(
393 &frame.inner,
394 frame.text_size,
395 w.pretty,
396 frame.id.as_deref(),
397 &eco_format!("{}", frame.css.to_inline()),
398 &frame.anchors,
399 w.link_resolver,
400 );
401 w.buf.push_str(&svg);
402}