1use std::collections::{BTreeSet, HashMap, HashSet};
2use std::fmt::{self, Debug, Formatter};
3use std::hash::Hash;
4use std::num::NonZeroUsize;
5use std::sync::RwLock;
6
7use ecow::EcoVec;
8use smallvec::SmallVec;
9use typst_utils::NonZeroExt;
10
11use crate::diag::{bail, StrResult};
12use crate::foundations::{Content, Label, Repr, Selector};
13use crate::html::{HtmlElement, HtmlNode};
14use crate::introspection::{Location, Tag};
15use crate::layout::{Frame, FrameItem, Page, Point, Position, Transform};
16use crate::model::Numbering;
17
18#[derive(Default, Clone)]
20pub struct Introspector {
21 pages: usize,
23 page_numberings: Vec<Option<Numbering>>,
25 page_supplements: Vec<Content>,
27
28 elems: Vec<Pair>,
30 keys: MultiMap<u128, Location>,
33
34 locations: HashMap<Location, usize>,
36 labels: MultiMap<Label, usize>,
38
39 queries: QueryCache,
44}
45
46type Pair = (Content, Position);
48
49impl Introspector {
50 #[typst_macros::time(name = "introspect pages")]
52 pub fn paged(pages: &[Page]) -> Self {
53 IntrospectorBuilder::new().build_paged(pages)
54 }
55
56 #[typst_macros::time(name = "introspect html")]
58 pub fn html(root: &HtmlElement) -> Self {
59 IntrospectorBuilder::new().build_html(root)
60 }
61
62 pub fn all(&self) -> impl Iterator<Item = &Content> + '_ {
64 self.elems.iter().map(|(c, _)| c)
65 }
66
67 #[track_caller]
69 fn get_by_idx(&self, idx: usize) -> &Content {
70 &self.elems[idx].0
71 }
72
73 #[track_caller]
75 fn get_pos_by_idx(&self, idx: usize) -> Position {
76 self.elems[idx].1
77 }
78
79 fn get_by_loc(&self, location: &Location) -> Option<&Content> {
81 self.locations.get(location).map(|&idx| self.get_by_idx(idx))
82 }
83
84 fn get_pos_by_loc(&self, location: &Location) -> Option<Position> {
86 self.locations.get(location).map(|&idx| self.get_pos_by_idx(idx))
87 }
88
89 fn binary_search(&self, list: &[Content], elem: &Content) -> Result<usize, usize> {
91 list.binary_search_by_key(&self.elem_index(elem), |elem| self.elem_index(elem))
92 }
93
94 fn elem_index(&self, elem: &Content) -> usize {
96 self.loc_index(&elem.location().unwrap())
97 }
98
99 fn loc_index(&self, location: &Location) -> usize {
101 self.locations.get(location).copied().unwrap_or(usize::MAX)
102 }
103}
104
105#[comemo::track]
106impl Introspector {
107 pub fn query(&self, selector: &Selector) -> EcoVec<Content> {
109 let hash = typst_utils::hash128(selector);
110 if let Some(output) = self.queries.get(hash) {
111 return output;
112 }
113
114 let output = match selector {
115 Selector::Elem(..) => self
116 .all()
117 .filter(|elem| selector.matches(elem, None))
118 .cloned()
119 .collect(),
120 Selector::Location(location) => {
121 self.get_by_loc(location).cloned().into_iter().collect()
122 }
123 Selector::Label(label) => self
124 .labels
125 .get(label)
126 .iter()
127 .map(|&idx| self.get_by_idx(idx).clone())
128 .collect(),
129 Selector::Or(selectors) => selectors
130 .iter()
131 .flat_map(|sel| self.query(sel))
132 .map(|elem| self.elem_index(&elem))
133 .collect::<BTreeSet<usize>>()
134 .into_iter()
135 .map(|idx| self.get_by_idx(idx).clone())
136 .collect(),
137 Selector::And(selectors) => {
138 let mut results: Vec<_> =
139 selectors.iter().map(|sel| self.query(sel)).collect();
140
141 results
145 .iter()
146 .enumerate()
147 .min_by_key(|(_, vec)| vec.len())
148 .map(|(i, _)| i)
149 .map(|i| results.swap_remove(i))
150 .iter()
151 .flatten()
152 .filter(|candidate| {
153 results
154 .iter()
155 .all(|other| self.binary_search(other, candidate).is_ok())
156 })
157 .cloned()
158 .collect()
159 }
160 Selector::Before { selector, end, inclusive } => {
161 let mut list = self.query(selector);
162 if let Some(end) = self.query_first(end) {
163 let split = match self.binary_search(&list, &end) {
165 Ok(i) => i + *inclusive as usize,
167 Err(i) => i,
169 };
170 list = list[..split].into();
171 }
172 list
173 }
174 Selector::After { selector, start, inclusive } => {
175 let mut list = self.query(selector);
176 if let Some(start) = self.query_first(start) {
177 let split = match self.binary_search(&list, &start) {
179 Ok(i) => i + !*inclusive as usize,
181 Err(i) => i,
183 };
184 list = list[split..].into();
185 }
186 list
187 }
188 Selector::Can(_) | Selector::Regex(_) => EcoVec::new(),
190 };
191
192 self.queries.insert(hash, output.clone());
193 output
194 }
195
196 pub fn query_first(&self, selector: &Selector) -> Option<Content> {
198 match selector {
199 Selector::Location(location) => self.get_by_loc(location).cloned(),
200 Selector::Label(label) => self
201 .labels
202 .get(label)
203 .first()
204 .map(|&idx| self.get_by_idx(idx).clone()),
205 _ => self.query(selector).first().cloned(),
206 }
207 }
208
209 pub fn query_unique(&self, selector: &Selector) -> StrResult<Content> {
211 match selector {
212 Selector::Location(location) => self
213 .get_by_loc(location)
214 .cloned()
215 .ok_or_else(|| "element does not exist in the document".into()),
216 Selector::Label(label) => self.query_label(*label).cloned(),
217 _ => {
218 let elems = self.query(selector);
219 if elems.len() > 1 {
220 bail!("selector matches multiple elements",);
221 }
222 elems
223 .into_iter()
224 .next()
225 .ok_or_else(|| "selector does not match any element".into())
226 }
227 }
228 }
229
230 pub fn query_label(&self, label: Label) -> StrResult<&Content> {
232 match *self.labels.get(&label) {
233 [idx] => Ok(self.get_by_idx(idx)),
234 [] => bail!("label `{}` does not exist in the document", label.repr()),
235 _ => bail!("label `{}` occurs multiple times in the document", label.repr()),
236 }
237 }
238
239 pub fn query_count_before(&self, selector: &Selector, end: Location) -> usize {
242 let list = self.query(selector);
244 if let Some(end) = self.get_by_loc(&end) {
245 match self.binary_search(&list, end) {
246 Ok(i) => i + 1,
247 Err(i) => i,
248 }
249 } else {
250 list.len()
251 }
252 }
253
254 pub fn pages(&self) -> NonZeroUsize {
256 NonZeroUsize::new(self.pages).unwrap_or(NonZeroUsize::ONE)
257 }
258
259 pub fn page(&self, location: Location) -> NonZeroUsize {
261 self.position(location).page
262 }
263
264 pub fn position(&self, location: Location) -> Position {
266 self.get_pos_by_loc(&location)
267 .unwrap_or(Position { page: NonZeroUsize::ONE, point: Point::zero() })
268 }
269
270 pub fn page_numbering(&self, location: Location) -> Option<&Numbering> {
272 let page = self.page(location);
273 self.page_numberings
274 .get(page.get() - 1)
275 .and_then(|slot| slot.as_ref())
276 }
277
278 pub fn page_supplement(&self, location: Location) -> Content {
280 let page = self.page(location);
281 self.page_supplements.get(page.get() - 1).cloned().unwrap_or_default()
282 }
283
284 pub fn locator(&self, key: u128, anchor: Location) -> Option<Location> {
291 let anchor = self.loc_index(&anchor);
292 self.keys
293 .get(&key)
294 .iter()
295 .copied()
296 .min_by_key(|loc| self.loc_index(loc).wrapping_sub(anchor))
297 }
298}
299
300impl Debug for Introspector {
301 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
302 f.pad("Introspector(..)")
303 }
304}
305
306#[derive(Clone)]
308struct MultiMap<K, V>(HashMap<K, SmallVec<[V; 1]>>);
309
310impl<K, V> MultiMap<K, V>
311where
312 K: Hash + Eq,
313{
314 fn get(&self, key: &K) -> &[V] {
315 self.0.get(key).map_or(&[], |vec| vec.as_slice())
316 }
317
318 fn insert(&mut self, key: K, value: V) {
319 self.0.entry(key).or_default().push(value);
320 }
321
322 fn take(&mut self, key: &K) -> Option<impl Iterator<Item = V>> {
323 self.0.remove(key).map(|vec| vec.into_iter())
324 }
325}
326
327impl<K, V> Default for MultiMap<K, V> {
328 fn default() -> Self {
329 Self(HashMap::new())
330 }
331}
332
333#[derive(Default)]
335struct QueryCache(RwLock<HashMap<u128, EcoVec<Content>>>);
336
337impl QueryCache {
338 fn get(&self, hash: u128) -> Option<EcoVec<Content>> {
339 self.0.read().unwrap().get(&hash).cloned()
340 }
341
342 fn insert(&self, hash: u128, output: EcoVec<Content>) {
343 self.0.write().unwrap().insert(hash, output);
344 }
345}
346
347impl Clone for QueryCache {
348 fn clone(&self) -> Self {
349 Self(RwLock::new(self.0.read().unwrap().clone()))
350 }
351}
352
353#[derive(Default)]
355struct IntrospectorBuilder {
356 pages: usize,
357 page_numberings: Vec<Option<Numbering>>,
358 page_supplements: Vec<Content>,
359 seen: HashSet<Location>,
360 insertions: MultiMap<Location, Vec<Pair>>,
361 keys: MultiMap<u128, Location>,
362 locations: HashMap<Location, usize>,
363 labels: MultiMap<Label, usize>,
364}
365
366impl IntrospectorBuilder {
367 fn new() -> Self {
369 Self::default()
370 }
371
372 fn build_paged(mut self, pages: &[Page]) -> Introspector {
374 self.pages = pages.len();
375 self.page_numberings.reserve(pages.len());
376 self.page_supplements.reserve(pages.len());
377
378 let mut elems = Vec::new();
380 for (i, page) in pages.iter().enumerate() {
381 self.page_numberings.push(page.numbering.clone());
382 self.page_supplements.push(page.supplement.clone());
383 self.discover_in_frame(
384 &mut elems,
385 &page.frame,
386 NonZeroUsize::new(1 + i).unwrap(),
387 Transform::identity(),
388 );
389 }
390
391 self.finalize(elems)
392 }
393
394 fn build_html(mut self, root: &HtmlElement) -> Introspector {
396 let mut elems = Vec::new();
397 self.discover_in_html(&mut elems, root);
398 self.finalize(elems)
399 }
400
401 fn discover_in_frame(
403 &mut self,
404 sink: &mut Vec<Pair>,
405 frame: &Frame,
406 page: NonZeroUsize,
407 ts: Transform,
408 ) {
409 for (pos, item) in frame.items() {
410 match item {
411 FrameItem::Group(group) => {
412 let ts = ts
413 .pre_concat(Transform::translate(pos.x, pos.y))
414 .pre_concat(group.transform);
415
416 if let Some(parent) = group.parent {
417 let mut nested = vec![];
418 self.discover_in_frame(&mut nested, &group.frame, page, ts);
419 self.insertions.insert(parent, nested);
420 } else {
421 self.discover_in_frame(sink, &group.frame, page, ts);
422 }
423 }
424 FrameItem::Tag(tag) => {
425 self.discover_in_tag(
426 sink,
427 tag,
428 Position { page, point: pos.transform(ts) },
429 );
430 }
431 _ => {}
432 }
433 }
434 }
435
436 fn discover_in_html(&mut self, sink: &mut Vec<Pair>, elem: &HtmlElement) {
438 for child in &elem.children {
439 match child {
440 HtmlNode::Tag(tag) => self.discover_in_tag(
441 sink,
442 tag,
443 Position { page: NonZeroUsize::ONE, point: Point::zero() },
444 ),
445 HtmlNode::Text(_, _) => {}
446 HtmlNode::Element(elem) => self.discover_in_html(sink, elem),
447 HtmlNode::Frame(frame) => self.discover_in_frame(
448 sink,
449 frame,
450 NonZeroUsize::ONE,
451 Transform::identity(),
452 ),
453 }
454 }
455 }
456
457 fn discover_in_tag(&mut self, sink: &mut Vec<Pair>, tag: &Tag, position: Position) {
459 match tag {
460 Tag::Start(elem) => {
461 let loc = elem.location().unwrap();
462 if self.seen.insert(loc) {
463 sink.push((elem.clone(), position));
464 }
465 }
466 Tag::End(loc, key) => {
467 self.keys.insert(*key, *loc);
468 }
469 }
470 }
471
472 fn finalize(mut self, root: Vec<Pair>) -> Introspector {
475 self.locations.reserve(self.seen.len());
476
477 let mut elems = Vec::with_capacity(self.seen.len());
479 for pair in root {
480 self.visit(&mut elems, pair);
481 }
482
483 Introspector {
484 pages: self.pages,
485 page_numberings: self.page_numberings,
486 page_supplements: self.page_supplements,
487 elems,
488 keys: self.keys,
489 locations: self.locations,
490 labels: self.labels,
491 queries: QueryCache::default(),
492 }
493 }
494
495 fn visit(&mut self, elems: &mut Vec<Pair>, pair: Pair) {
498 let elem = &pair.0;
499 let loc = elem.location().unwrap();
500 let idx = elems.len();
501
502 self.locations.insert(loc, idx);
504
505 if let Some(label) = elem.label() {
507 self.labels.insert(label, idx);
508 }
509
510 elems.push(pair);
512
513 if let Some(insertions) = self.insertions.take(&loc) {
515 for pair in insertions.flatten() {
516 self.visit(elems, pair);
517 }
518 }
519 }
520}