1use std::fmt::{self, Debug, Formatter};
2use std::num::NonZeroUsize;
3
4use ecow::{EcoString, EcoVec};
5use rustc_hash::{FxHashMap, FxHashSet};
6use typst_library::diag::StrResult;
7use typst_library::foundations::{Content, Label, Selector};
8use typst_library::introspection::{
9 DocumentPosition, ElementIntrospector, ElementIntrospectorBuilder, HtmlPosition,
10 Introspector, Location,
11};
12use typst_library::layout::{Frame, FrameItem, Point, Transform};
13use typst_library::model::{Destination, LinkElem, Numbering};
14use typst_syntax::VirtualPath;
15
16use crate::{HtmlNode, HtmlSliceExt, tag};
17
18#[derive(Clone)]
20pub struct HtmlIntrospector {
21 elements: ElementIntrospector<HtmlPosition>,
23 frame_link_targets: FxHashSet<Location>,
25 anchors: FxHashMap<Location, EcoString>,
28}
29
30impl HtmlIntrospector {
31 #[typst_macros::time(name = "introspect html")]
33 pub fn new(output: &[HtmlNode]) -> HtmlIntrospector {
34 let mut builder = HtmlIntrospectorBuilder::default();
35 builder.discover_nodes(output, &mut EcoVec::new());
36 builder.finish()
37 }
38
39 pub fn elements(&self) -> &ElementIntrospector<HtmlPosition> {
41 &self.elements
42 }
43
44 pub fn position(&self, location: Location) -> Option<HtmlPosition> {
46 self.elements.position(location).cloned()
47 }
48
49 pub fn link_targets(&self) -> FxHashSet<Location> {
52 LinkElem::find_destinations(self)
53 .chain(self.frame_link_targets.iter().copied())
54 .collect()
55 }
56
57 pub fn frame_link_targets(&self) -> &FxHashSet<Location> {
60 &self.frame_link_targets
61 }
62
63 pub fn set_anchors(&mut self, anchors: FxHashMap<Location, EcoString>) {
66 self.anchors = anchors;
67 }
68}
69
70impl Introspector for HtmlIntrospector {
71 fn query(&self, selector: &Selector) -> EcoVec<Content> {
72 self.elements.query(selector)
73 }
74
75 fn query_first(&self, selector: &Selector) -> Option<Content> {
76 self.elements.query_first(selector)
77 }
78
79 fn query_unique(&self, selector: &Selector) -> StrResult<Content> {
80 self.elements.query_unique(selector)
81 }
82
83 fn query_label(&self, label: Label) -> StrResult<&Content> {
84 self.elements.query_label(label)
85 }
86
87 fn query_labelled(&self) -> EcoVec<Content> {
88 self.elements.query_labelled()
89 }
90
91 fn query_count_before(&self, selector: &Selector, end: Location) -> usize {
92 self.elements.query_count_before(selector, end)
93 }
94
95 fn label_count(&self, label: Label) -> usize {
96 self.elements.label_count(label)
97 }
98
99 fn locator(&self, key: u128, base: Location) -> Option<Location> {
100 self.elements.locator(key, base)
101 }
102
103 fn pages(&self, _: Location) -> Option<NonZeroUsize> {
104 None
105 }
106
107 fn page(&self, _: Location) -> Option<NonZeroUsize> {
108 None
109 }
110
111 fn position(&self, location: Location) -> Option<DocumentPosition> {
112 self.position(location).map(DocumentPosition::Html)
113 }
114
115 fn page_numbering(&self, _: Location) -> Option<&Numbering> {
116 None
117 }
118
119 fn page_supplement(&self, _: Location) -> Option<&Content> {
120 None
121 }
122
123 fn anchor(&self, location: Location) -> Option<&EcoString> {
124 self.anchors.get(&location)
125 }
126
127 fn document(&self, _: Location) -> Option<Location> {
128 None
129 }
130
131 fn path(&self, _: Location) -> Option<&VirtualPath> {
132 None
133 }
134}
135
136impl Debug for HtmlIntrospector {
137 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
138 f.pad("HtmlIntrospector(..)")
139 }
140}
141
142#[derive(Default)]
144struct HtmlIntrospectorBuilder {
145 elements: ElementIntrospectorBuilder<HtmlPosition>,
146 frame_link_targets: FxHashSet<Location>,
147}
148
149impl HtmlIntrospectorBuilder {
150 fn finish(self) -> HtmlIntrospector {
152 HtmlIntrospector {
153 elements: self.elements.finalize(),
154 frame_link_targets: self.frame_link_targets,
155 anchors: FxHashMap::default(),
156 }
157 }
158
159 fn discover_nodes(
161 &mut self,
162 nodes: &[HtmlNode],
163 current_position: &mut EcoVec<usize>,
164 ) {
165 for (node, dom_index) in nodes.iter_with_dom_indices() {
166 match node {
167 HtmlNode::Tag(tag) => {
168 current_position.push(dom_index);
169 self.elements
170 .discover_tag(tag, HtmlPosition::new(current_position.clone()));
171 current_position.pop();
172 }
173 HtmlNode::Text(_, _) => {}
174 HtmlNode::Element(elem) => {
175 let is_root = elem.tag == tag::html;
176 if !is_root {
177 current_position.push(dom_index);
178 }
179
180 if let Some(parent) = elem.parent {
181 self.elements.start_insertion();
182 self.discover_nodes(&elem.children, current_position);
183 self.elements.end_insertion(parent);
184 } else {
185 self.discover_nodes(&elem.children, current_position);
186 }
187
188 if !is_root {
189 current_position.pop();
190 }
191 }
192 HtmlNode::Frame(frame) => {
193 current_position.push(dom_index);
194 self.discover_frame(
195 &frame.inner,
196 Transform::identity(),
197 &mut |point| {
198 HtmlPosition::new(current_position.clone()).in_frame(point)
199 },
200 );
201 current_position.pop();
202 }
203 }
204 }
205 }
206
207 fn discover_frame<F>(&mut self, frame: &Frame, ts: Transform, to_pos: &mut F)
209 where
210 F: FnMut(Point) -> HtmlPosition,
211 {
212 for (pos, item) in frame.items() {
213 match item {
214 FrameItem::Tag(tag) => {
215 self.elements.discover_tag(tag, to_pos(pos.transform(ts)));
216 }
217 FrameItem::Group(group) => {
218 let ts = ts
219 .pre_concat(Transform::translate(pos.x, pos.y))
220 .pre_concat(group.transform);
221
222 if let Some(parent) = group.parent {
223 self.elements.start_insertion();
224 self.discover_frame(&group.frame, ts, to_pos);
225 self.elements.end_insertion(parent.location);
226 } else {
227 self.discover_frame(&group.frame, ts, to_pos);
228 }
229 }
230 FrameItem::Link(dest, _) => {
231 if let Destination::Location(loc) = dest {
232 self.frame_link_targets.insert(*loc);
233 }
234 }
235 FrameItem::Text(..) | FrameItem::Shape(..) | FrameItem::Image(..) => {}
236 }
237 }
238 }
239}