Skip to main content

typst_html/
introspect.rs

1use std::fmt::{self, Debug, Formatter};
2use std::num::NonZeroUsize;
3
4use ecow::{EcoString, EcoVec};
5use rustc_hash::{FxHashMap, FxHashSet};
6use typst_library::diag::StrResult;
7use typst_library::foundations::{Content, Label, Selector};
8use typst_library::introspection::{
9    DocumentPosition, ElementIntrospector, ElementIntrospectorBuilder, HtmlPosition,
10    Introspector, Location,
11};
12use typst_library::layout::{Frame, FrameItem, Point, Transform};
13use typst_library::model::{Destination, LinkElem, Numbering};
14use typst_syntax::VirtualPath;
15
16use crate::{HtmlNode, HtmlSliceExt, tag};
17
18/// An introspector implementation for HTML documents.
19#[derive(Clone)]
20pub struct HtmlIntrospector {
21    /// The underlying target-agnostic introspector used for most queries.
22    elements: ElementIntrospector<HtmlPosition>,
23    /// Locations that are linked to via `FrameItem::Link`.
24    frame_link_targets: FxHashSet<Location>,
25    /// Maps from element locations to assigned HTML link anchors. This is used
26    /// to support intra-doc links.
27    anchors: FxHashMap<Location, EcoString>,
28}
29
30impl HtmlIntrospector {
31    /// Creates an introspector for an HTML document.
32    #[typst_macros::time(name = "introspect html")]
33    pub fn new(output: &[HtmlNode]) -> HtmlIntrospector {
34        let mut builder = HtmlIntrospectorBuilder::default();
35        builder.discover_nodes(output, &mut EcoVec::new());
36        builder.finish()
37    }
38
39    /// The underlying element introspector.
40    pub fn elements(&self) -> &ElementIntrospector<HtmlPosition> {
41        &self.elements
42    }
43
44    /// Resolves the position in the DOM of an element.
45    pub fn position(&self, location: Location) -> Option<HtmlPosition> {
46        self.elements.position(location).cloned()
47    }
48
49    /// Computes all locations that are referenced by intra-doc links of any
50    /// kind.
51    pub fn link_targets(&self) -> FxHashSet<Location> {
52        LinkElem::find_destinations(self)
53            .chain(self.frame_link_targets.iter().copied())
54            .collect()
55    }
56
57    /// Returns the locations that the HTML document links to via
58    /// `FrameItem::Link`.
59    pub fn frame_link_targets(&self) -> &FxHashSet<Location> {
60        &self.frame_link_targets
61    }
62
63    /// Enriches an existing introspector with HTML link anchors, which were
64    /// assigned to the DOM in a post-processing step.
65    pub fn set_anchors(&mut self, anchors: FxHashMap<Location, EcoString>) {
66        self.anchors = anchors;
67    }
68}
69
70impl Introspector for HtmlIntrospector {
71    fn query(&self, selector: &Selector) -> EcoVec<Content> {
72        self.elements.query(selector)
73    }
74
75    fn query_first(&self, selector: &Selector) -> Option<Content> {
76        self.elements.query_first(selector)
77    }
78
79    fn query_unique(&self, selector: &Selector) -> StrResult<Content> {
80        self.elements.query_unique(selector)
81    }
82
83    fn query_label(&self, label: Label) -> StrResult<&Content> {
84        self.elements.query_label(label)
85    }
86
87    fn query_labelled(&self) -> EcoVec<Content> {
88        self.elements.query_labelled()
89    }
90
91    fn query_count_before(&self, selector: &Selector, end: Location) -> usize {
92        self.elements.query_count_before(selector, end)
93    }
94
95    fn label_count(&self, label: Label) -> usize {
96        self.elements.label_count(label)
97    }
98
99    fn locator(&self, key: u128, base: Location) -> Option<Location> {
100        self.elements.locator(key, base)
101    }
102
103    fn pages(&self, _: Location) -> Option<NonZeroUsize> {
104        None
105    }
106
107    fn page(&self, _: Location) -> Option<NonZeroUsize> {
108        None
109    }
110
111    fn position(&self, location: Location) -> Option<DocumentPosition> {
112        self.position(location).map(DocumentPosition::Html)
113    }
114
115    fn page_numbering(&self, _: Location) -> Option<&Numbering> {
116        None
117    }
118
119    fn page_supplement(&self, _: Location) -> Option<&Content> {
120        None
121    }
122
123    fn anchor(&self, location: Location) -> Option<&EcoString> {
124        self.anchors.get(&location)
125    }
126
127    fn document(&self, _: Location) -> Option<Location> {
128        None
129    }
130
131    fn path(&self, _: Location) -> Option<&VirtualPath> {
132        None
133    }
134}
135
136impl Debug for HtmlIntrospector {
137    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
138        f.pad("HtmlIntrospector(..)")
139    }
140}
141
142/// Constructs the [`HtmlIntrospector`].
143#[derive(Default)]
144struct HtmlIntrospectorBuilder {
145    elements: ElementIntrospectorBuilder<HtmlPosition>,
146    frame_link_targets: FxHashSet<Location>,
147}
148
149impl HtmlIntrospectorBuilder {
150    /// Returns the resulting introspector.
151    fn finish(self) -> HtmlIntrospector {
152        HtmlIntrospector {
153            elements: self.elements.finalize(),
154            frame_link_targets: self.frame_link_targets,
155            anchors: FxHashMap::default(),
156        }
157    }
158
159    /// Discovers introspectibles in a collection of HTML nodes.
160    fn discover_nodes(
161        &mut self,
162        nodes: &[HtmlNode],
163        current_position: &mut EcoVec<usize>,
164    ) {
165        for (node, dom_index) in nodes.iter_with_dom_indices() {
166            match node {
167                HtmlNode::Tag(tag) => {
168                    current_position.push(dom_index);
169                    self.elements
170                        .discover_tag(tag, HtmlPosition::new(current_position.clone()));
171                    current_position.pop();
172                }
173                HtmlNode::Text(_, _) => {}
174                HtmlNode::Element(elem) => {
175                    let is_root = elem.tag == tag::html;
176                    if !is_root {
177                        current_position.push(dom_index);
178                    }
179
180                    if let Some(parent) = elem.parent {
181                        self.elements.start_insertion();
182                        self.discover_nodes(&elem.children, current_position);
183                        self.elements.end_insertion(parent);
184                    } else {
185                        self.discover_nodes(&elem.children, current_position);
186                    }
187
188                    if !is_root {
189                        current_position.pop();
190                    }
191                }
192                HtmlNode::Frame(frame) => {
193                    current_position.push(dom_index);
194                    self.discover_frame(
195                        &frame.inner,
196                        Transform::identity(),
197                        &mut |point| {
198                            HtmlPosition::new(current_position.clone()).in_frame(point)
199                        },
200                    );
201                    current_position.pop();
202                }
203            }
204        }
205    }
206
207    /// Discovers introspectibles in a frame.
208    fn discover_frame<F>(&mut self, frame: &Frame, ts: Transform, to_pos: &mut F)
209    where
210        F: FnMut(Point) -> HtmlPosition,
211    {
212        for (pos, item) in frame.items() {
213            match item {
214                FrameItem::Tag(tag) => {
215                    self.elements.discover_tag(tag, to_pos(pos.transform(ts)));
216                }
217                FrameItem::Group(group) => {
218                    let ts = ts
219                        .pre_concat(Transform::translate(pos.x, pos.y))
220                        .pre_concat(group.transform);
221
222                    if let Some(parent) = group.parent {
223                        self.elements.start_insertion();
224                        self.discover_frame(&group.frame, ts, to_pos);
225                        self.elements.end_insertion(parent.location);
226                    } else {
227                        self.discover_frame(&group.frame, ts, to_pos);
228                    }
229                }
230                FrameItem::Link(dest, _) => {
231                    if let Destination::Location(loc) = dest {
232                        self.frame_link_targets.insert(*loc);
233                    }
234                }
235                FrameItem::Text(..) | FrameItem::Shape(..) | FrameItem::Image(..) => {}
236            }
237        }
238    }
239}