typst_html/
document.rs

1use std::num::NonZeroUsize;
2
3use comemo::{Tracked, TrackedMut};
4use ecow::{EcoVec, eco_vec};
5use rustc_hash::FxHashSet;
6use typst_library::World;
7use typst_library::diag::{SourceResult, bail};
8use typst_library::engine::{Engine, Route, Sink, Traced};
9use typst_library::foundations::{Content, StyleChain, Styles};
10use typst_library::introspection::{
11    Introspector, IntrospectorBuilder, Location, Locator,
12};
13use typst_library::layout::{Point, Position, Transform};
14use typst_library::model::DocumentInfo;
15use typst_library::routines::{Arenas, RealizationKind, Routines};
16use typst_syntax::Span;
17use typst_utils::NonZeroExt;
18
19use crate::convert::{ConversionLevel, Whitespace};
20use crate::rules::FootnoteContainer;
21use crate::{HtmlDocument, HtmlElem, HtmlElement, HtmlNode, attr, tag};
22
23/// Produce an HTML document from content.
24///
25/// This first performs root-level realization and then turns the resulting
26/// elements into HTML.
27#[typst_macros::time(name = "html document")]
28pub fn html_document(
29    engine: &mut Engine,
30    content: &Content,
31    styles: StyleChain,
32) -> SourceResult<HtmlDocument> {
33    html_document_impl(
34        engine.routines,
35        engine.world,
36        engine.introspector,
37        engine.traced,
38        TrackedMut::reborrow_mut(&mut engine.sink),
39        engine.route.track(),
40        content,
41        styles,
42    )
43}
44
45/// The internal implementation of `html_document`.
46#[comemo::memoize]
47#[allow(clippy::too_many_arguments)]
48fn html_document_impl(
49    routines: &Routines,
50    world: Tracked<dyn World + '_>,
51    introspector: Tracked<Introspector>,
52    traced: Tracked<Traced>,
53    sink: TrackedMut<Sink>,
54    route: Tracked<Route>,
55    content: &Content,
56    styles: StyleChain,
57) -> SourceResult<HtmlDocument> {
58    let mut locator = Locator::root().split();
59    let mut engine = Engine {
60        routines,
61        world,
62        introspector,
63        traced,
64        sink,
65        route: Route::extend(route).unnested(),
66    };
67
68    // Create this upfront to make it as stable as possible.
69    let footnote_locator = locator.next(&());
70
71    // Mark the external styles as "outside" so that they are valid at the
72    // document level.
73    let styles = styles.to_map().outside();
74    let styles = StyleChain::new(&styles);
75
76    let arenas = Arenas::default();
77    let mut info = DocumentInfo::default();
78    let children = (engine.routines.realize)(
79        RealizationKind::HtmlDocument { info: &mut info, is_inline: HtmlElem::is_inline },
80        &mut engine,
81        &mut locator,
82        &arenas,
83        content,
84        styles,
85    )?;
86
87    let nodes = crate::convert::convert_to_nodes(
88        &mut engine,
89        &mut locator,
90        children.iter().copied(),
91        ConversionLevel::Block,
92        Whitespace::Normal,
93    )?;
94
95    let mut output = classify_output(nodes.clone())?;
96    let introspectibles = if let OutputKind::Leaves(leaves) = &mut output {
97        // Add a footnote container at the end, but only if the user did not
98        // provide their own `<html>` or `<body>` element.
99        let notes = crate::fragment::html_block_fragment(
100            &mut engine,
101            FootnoteContainer::shared(),
102            footnote_locator,
103            StyleChain::new(&Styles::root(&children, styles)),
104            Whitespace::Normal,
105        )?;
106        leaves.extend(notes);
107        leaves
108    } else {
109        FootnoteContainer::unsupported_with_custom_dom(&engine)?;
110        &nodes
111    };
112
113    let mut link_targets = FxHashSet::default();
114    let mut introspector = introspect_html(introspectibles, &mut link_targets);
115    let mut root = root_element(output, &info);
116    crate::link::identify_link_targets(&mut root, &mut introspector, link_targets);
117
118    Ok(HtmlDocument { info, root, introspector })
119}
120
121/// Introspects HTML nodes.
122#[typst_macros::time(name = "introspect html")]
123fn introspect_html(
124    output: &[HtmlNode],
125    link_targets: &mut FxHashSet<Location>,
126) -> Introspector {
127    fn discover(
128        builder: &mut IntrospectorBuilder,
129        sink: &mut Vec<(Content, Position)>,
130        link_targets: &mut FxHashSet<Location>,
131        nodes: &[HtmlNode],
132    ) {
133        for node in nodes {
134            match node {
135                HtmlNode::Tag(tag) => {
136                    builder.discover_in_tag(
137                        sink,
138                        tag,
139                        Position { page: NonZeroUsize::ONE, point: Point::zero() },
140                    );
141                }
142                HtmlNode::Text(_, _) => {}
143                HtmlNode::Element(elem) => {
144                    if let Some(parent) = elem.parent {
145                        let mut nested = vec![];
146                        discover(builder, &mut nested, link_targets, &elem.children);
147                        builder.register_insertion(parent, nested);
148                    } else {
149                        discover(builder, sink, link_targets, &elem.children)
150                    }
151                }
152                HtmlNode::Frame(frame) => {
153                    builder.discover_in_frame(
154                        sink,
155                        &frame.inner,
156                        NonZeroUsize::ONE,
157                        Transform::identity(),
158                    );
159                    crate::link::introspect_frame_links(&frame.inner, link_targets);
160                }
161            }
162        }
163    }
164
165    let mut elems = Vec::new();
166    let mut builder = IntrospectorBuilder::new();
167    discover(&mut builder, &mut elems, link_targets, output);
168    builder.finalize(elems)
169}
170
171/// Wrap the nodes in `<html>` and `<body>` if they are not yet rooted,
172/// supplying a suitable `<head>`.
173fn root_element(output: OutputKind, info: &DocumentInfo) -> HtmlElement {
174    let head = head_element(info);
175    let body = match output {
176        OutputKind::Html(element) => return element,
177        OutputKind::Body(body) => body,
178        OutputKind::Leaves(leaves) => HtmlElement::new(tag::body).with_children(leaves),
179    };
180    HtmlElement::new(tag::html).with_children(eco_vec![head.into(), body.into()])
181}
182
183/// Generate a `<head>` element.
184fn head_element(info: &DocumentInfo) -> HtmlElement {
185    let mut children = EcoVec::new();
186
187    children.push(HtmlElement::new(tag::meta).with_attr(attr::charset, "utf-8").into());
188
189    children.push(
190        HtmlElement::new(tag::meta)
191            .with_attr(attr::name, "viewport")
192            .with_attr(attr::content, "width=device-width, initial-scale=1")
193            .into(),
194    );
195
196    if let Some(title) = &info.title {
197        children.push(
198            HtmlElement::new(tag::title)
199                .with_children(eco_vec![HtmlNode::Text(title.clone(), Span::detached())])
200                .into(),
201        );
202    }
203
204    if let Some(description) = &info.description {
205        children.push(
206            HtmlElement::new(tag::meta)
207                .with_attr(attr::name, "description")
208                .with_attr(attr::content, description.clone())
209                .into(),
210        );
211    }
212
213    if !info.author.is_empty() {
214        children.push(
215            HtmlElement::new(tag::meta)
216                .with_attr(attr::name, "authors")
217                .with_attr(attr::content, info.author.join(", "))
218                .into(),
219        )
220    }
221
222    if !info.keywords.is_empty() {
223        children.push(
224            HtmlElement::new(tag::meta)
225                .with_attr(attr::name, "keywords")
226                .with_attr(attr::content, info.keywords.join(", "))
227                .into(),
228        )
229    }
230
231    HtmlElement::new(tag::head).with_children(children)
232}
233
234/// Determine which kind of output the user generated.
235fn classify_output(output: EcoVec<HtmlNode>) -> SourceResult<OutputKind> {
236    let count = output.iter().filter(|node| !matches!(node, HtmlNode::Tag(_))).count();
237    for node in &output {
238        let HtmlNode::Element(elem) = node else { continue };
239        let tag = elem.tag;
240        match (tag, count) {
241            (tag::html, 1) => return Ok(OutputKind::Html(elem.clone())),
242            (tag::body, 1) => return Ok(OutputKind::Body(elem.clone())),
243            (tag::html | tag::body, _) => bail!(
244                elem.span,
245                "`{}` element must be the only element in the document",
246                elem.tag,
247            ),
248            _ => {}
249        }
250    }
251    Ok(OutputKind::Leaves(output))
252}
253
254/// What kinds of output the user generated.
255enum OutputKind {
256    /// The user generated their own `<html>` element. We do not need to supply
257    /// one.
258    Html(HtmlElement),
259    /// The user generate their own `<body>` element. We do not need to supply
260    /// one, but need supply the `<html>` element.
261    Body(HtmlElement),
262    /// The user generated leaves which we wrap in a `<body>` and `<html>`.
263    Leaves(EcoVec<HtmlNode>),
264}