1use std::collections::BTreeSet;
2use std::hash::Hash;
3use std::num::NonZeroUsize;
4use std::ops::Range;
5use std::sync::RwLock;
6
7use comemo::{Track, Tracked};
8use ecow::{EcoString, EcoVec};
9use rustc_hash::{FxHashMap, FxHashSet};
10use smallvec::SmallVec;
11use typst_syntax::VirtualPath;
12
13use crate::diag::{StrResult, bail};
14use crate::foundations::{Content, Label, Repr, Selector};
15use crate::introspection::{DocumentPosition, Location, Tag};
16use crate::model::Numbering;
17
18#[comemo::track]
29pub trait Introspector: Send + Sync {
30 fn query(&self, selector: &Selector) -> EcoVec<Content>;
32
33 fn query_first(&self, selector: &Selector) -> Option<Content>;
35
36 fn query_unique(&self, selector: &Selector) -> StrResult<Content>;
38
39 fn query_label(&self, label: Label) -> StrResult<&Content>;
41
42 fn query_labelled(&self) -> EcoVec<Content>;
44
45 fn query_count_before(&self, selector: &Selector, end: Location) -> usize;
48
49 fn label_count(&self, label: Label) -> usize;
51
52 fn locator(&self, key: u128, base: Location) -> Option<Location>;
59
60 fn pages(&self, location: Location) -> Option<NonZeroUsize>;
63
64 fn page(&self, location: Location) -> Option<NonZeroUsize>;
66
67 fn position(&self, location: Location) -> Option<DocumentPosition>;
69
70 fn page_numbering(&self, location: Location) -> Option<&Numbering>;
72
73 fn page_supplement(&self, location: Location) -> Option<&Content>;
75
76 fn anchor(&self, location: Location) -> Option<&EcoString>;
78
79 fn document(&self, location: Location) -> Option<Location>;
82
83 fn path(&self, location: Location) -> Option<&VirtualPath>;
89}
90
91pub struct EmptyIntrospector;
93
94impl EmptyIntrospector {
95 pub fn track(&self) -> Tracked<'_, dyn Introspector + '_> {
96 (self as &dyn Introspector).track()
97 }
98}
99
100impl Introspector for EmptyIntrospector {
101 fn query(&self, _: &Selector) -> EcoVec<Content> {
102 EcoVec::new()
103 }
104
105 fn query_first(&self, _: &Selector) -> Option<Content> {
106 None
107 }
108
109 fn query_unique(&self, _: &Selector) -> StrResult<Content> {
110 bail!("selector does not match any element");
111 }
112
113 fn query_label(&self, label: Label) -> StrResult<&Content> {
114 bail!("label `{}` does not exist in the document", label.repr());
115 }
116
117 fn query_labelled(&self) -> EcoVec<Content> {
118 EcoVec::new()
119 }
120
121 fn query_count_before(&self, _: &Selector, _: Location) -> usize {
122 0
123 }
124
125 fn label_count(&self, _: Label) -> usize {
126 0
127 }
128
129 fn locator(&self, _: u128, _: Location) -> Option<Location> {
130 None
131 }
132
133 fn pages(&self, _: Location) -> Option<NonZeroUsize> {
134 None
135 }
136
137 fn page(&self, _: Location) -> Option<NonZeroUsize> {
138 None
139 }
140
141 fn position(&self, _: Location) -> Option<DocumentPosition> {
142 None
143 }
144
145 fn page_numbering(&self, _: Location) -> Option<&Numbering> {
146 None
147 }
148
149 fn page_supplement(&self, _: Location) -> Option<&Content> {
150 None
151 }
152
153 fn anchor(&self, _: Location) -> Option<&EcoString> {
154 None
155 }
156
157 fn document(&self, _: Location) -> Option<Location> {
158 None
159 }
160
161 fn path(&self, _: Location) -> Option<&VirtualPath> {
162 None
163 }
164}
165
166#[derive(Clone)]
170pub struct ElementIntrospector<P> {
171 elems: Vec<(Content, P)>,
173 keys: MultiMap<u128, Location>,
176 locations: FxHashMap<Location, Range<usize>>,
183 labels: MultiMap<Label, usize>,
185 queries: QueryCache,
190}
191
192impl<P> ElementIntrospector<P> {
193 pub fn query(&self, selector: &Selector) -> EcoVec<Content> {
195 let hash = typst_utils::hash128(selector);
196 if let Some(output) = self.queries.get(hash) {
197 return output;
198 }
199
200 let output = match selector {
201 Selector::Elem(..) => self
202 .all()
203 .filter(|elem| selector.matches(elem, None))
204 .cloned()
205 .collect(),
206 Selector::Location(location) => {
207 self.get_by_loc(location).cloned().into_iter().collect()
208 }
209 Selector::Label(label) => self
210 .labels
211 .get(label)
212 .iter()
213 .map(|&idx| self.get_by_idx(idx).clone())
214 .collect(),
215 Selector::Or(selectors) => selectors
216 .iter()
217 .flat_map(|sel| self.query(sel))
218 .map(|elem| self.elem_index(&elem))
219 .collect::<BTreeSet<usize>>()
220 .into_iter()
221 .map(|idx| self.get_by_idx(idx).clone())
222 .collect(),
223 Selector::And(selectors) => {
224 let mut results: Vec<_> =
225 selectors.iter().map(|sel| self.query(sel)).collect();
226
227 results
231 .iter()
232 .enumerate()
233 .min_by_key(|(_, vec)| vec.len())
234 .map(|(i, _)| i)
235 .map(|i| results.swap_remove(i))
236 .iter()
237 .flatten()
238 .filter(|candidate| {
239 results
240 .iter()
241 .all(|other| self.binary_search(other, candidate).is_ok())
242 })
243 .cloned()
244 .collect()
245 }
246 Selector::Before { selector, end, inclusive } => {
247 let mut list = self.query(selector);
248 if let Some(end) = self.query_first(end) {
249 let split = match self.binary_search(&list, &end) {
251 Ok(i) => i + *inclusive as usize,
253 Err(i) => i,
255 };
256 list = list[..split].into();
257 }
258 list
259 }
260 Selector::After { selector, start, inclusive } => {
261 let mut list = self.query(selector);
262 if let Some(start) = self.query_first(start) {
263 let split = match self.binary_search(&list, &start) {
265 Ok(i) => i + !*inclusive as usize,
267 Err(i) => i,
269 };
270 list = list[split..].into();
271 }
272 list
273 }
274 Selector::Within { selector, ancestor } => {
275 let list = self.query(selector);
276 let ancestors = self.query(ancestor);
277
278 let mut out = EcoVec::new();
279 let mut visited = 0;
280
281 for ancestor in &ancestors {
286 let loc = ancestor.location().unwrap();
287 let Range { start, end } = self.loc_range(&loc);
288
289 let start_in_list = match list
290 .binary_search_by_key(&start, |elem| self.elem_index(elem))
291 {
292 Ok(i) => i + 1,
296 Err(i) => i,
300 };
301
302 let end_in_list = match list.binary_search_by_key(&end, |elem| {
303 self.loc_range(&elem.location().unwrap()).end
304 }) {
305 Ok(i) => i + 1,
312 Err(i) => i,
316 };
317
318 if end_in_list < start_in_list {
322 debug_assert_eq!(end_in_list + 1, start_in_list);
323 continue;
324 }
325
326 let start_in_list = start_in_list.max(visited);
329 let end_in_list = end_in_list.max(visited);
330 out.extend(list[start_in_list..end_in_list].iter().cloned());
331 visited = end_in_list;
332 }
333
334 out
335 }
336 Selector::Can(_) | Selector::Regex(_) => EcoVec::new(),
338 };
339
340 self.queries.insert(hash, output.clone());
341 output
342 }
343
344 pub fn query_first(&self, selector: &Selector) -> Option<Content> {
346 match selector {
347 Selector::Location(location) => self.get_by_loc(location).cloned(),
348 Selector::Label(label) => self
349 .labels
350 .get(label)
351 .first()
352 .map(|&idx| self.get_by_idx(idx).clone()),
353 _ => self.query(selector).first().cloned(),
354 }
355 }
356
357 pub fn query_unique(&self, selector: &Selector) -> StrResult<Content> {
359 match selector {
360 Selector::Location(location) => self
361 .get_by_loc(location)
362 .cloned()
363 .ok_or_else(|| "element does not exist in the document".into()),
364 Selector::Label(label) => self.query_label(*label).cloned(),
365 _ => {
366 let elems = self.query(selector);
367 if elems.len() > 1 {
368 bail!("selector matches multiple elements",);
369 }
370 elems
371 .into_iter()
372 .next()
373 .ok_or_else(|| "selector does not match any element".into())
374 }
375 }
376 }
377
378 pub fn query_label(&self, label: Label) -> StrResult<&Content> {
380 match *self.labels.get(&label) {
381 [idx] => Ok(self.get_by_idx(idx)),
382 [] => bail!("label `{}` does not exist in the document", label.repr()),
383 _ => bail!("label `{}` occurs multiple times in the document", label.repr()),
384 }
385 }
386
387 pub fn query_labelled(&self) -> EcoVec<Content> {
389 self.all().filter(|c| c.label().is_some()).cloned().collect()
390 }
391
392 pub fn query_count_before(&self, selector: &Selector, end: Location) -> usize {
395 let list = self.query(selector);
397 if let Some(end) = self.get_by_loc(&end) {
398 match self.binary_search(&list, end) {
399 Ok(i) => i + 1,
400 Err(i) => i,
401 }
402 } else {
403 list.len()
404 }
405 }
406
407 pub fn label_count(&self, label: Label) -> usize {
409 self.labels.get(&label).len()
410 }
411
412 pub fn locator(&self, key: u128, base: Location) -> Option<Location> {
415 let base = self.loc_index(&base);
416 self.keys
417 .get(&key)
418 .iter()
419 .copied()
420 .min_by_key(|loc| self.loc_index(loc).wrapping_sub(base))
421 }
422
423 pub fn position(&self, location: Location) -> Option<&P> {
426 self.locations.get(&location).map(|r| self.get_pos_by_idx(r.start))
427 }
428
429 pub fn all(&self) -> impl Iterator<Item = &Content> + '_ {
431 self.elems.iter().map(|(c, _)| c)
432 }
433
434 #[track_caller]
436 pub fn get_by_idx(&self, idx: usize) -> &Content {
437 &self.elems[idx].0
438 }
439
440 #[track_caller]
442 pub fn get_pos_by_idx(&self, idx: usize) -> &P {
443 &self.elems[idx].1
444 }
445
446 pub fn get_by_loc(&self, location: &Location) -> Option<&Content> {
448 self.locations.get(location).map(|r| self.get_by_idx(r.start))
449 }
450
451 pub fn binary_search(
453 &self,
454 list: &[Content],
455 elem: &Content,
456 ) -> Result<usize, usize> {
457 list.binary_search_by_key(&self.elem_index(elem), |elem| self.elem_index(elem))
458 }
459
460 pub fn elem_index(&self, elem: &Content) -> usize {
462 self.loc_index(&elem.location().unwrap())
463 }
464
465 pub fn loc_index(&self, location: &Location) -> usize {
467 self.locations.get(location).map(|r| r.start).unwrap_or(usize::MAX)
468 }
469
470 pub fn loc_range(&self, location: &Location) -> Range<usize> {
472 self.locations
473 .get(location)
474 .cloned()
475 .unwrap_or(usize::MAX..usize::MAX)
476 }
477}
478
479pub struct ElementIntrospectorBuilder<P> {
481 stack: Vec<Vec<BuilderItem<P>>>,
482 sink: Vec<BuilderItem<P>>,
483 seen: FxHashSet<Location>,
484 insertions: MultiMap<Location, Vec<BuilderItem<P>>>,
485 keys: MultiMap<u128, Location>,
486 locations: FxHashMap<Location, Range<usize>>,
487 labels: MultiMap<Label, usize>,
488}
489
490enum BuilderItem<P> {
492 Start(Content, P),
494 End(Location),
496}
497
498impl<P> ElementIntrospectorBuilder<P> {
499 pub fn new() -> Self {
501 Self {
502 stack: Vec::new(),
503 sink: Vec::new(),
504 seen: FxHashSet::default(),
505 insertions: MultiMap::default(),
506 keys: MultiMap::default(),
507 locations: FxHashMap::default(),
508 labels: MultiMap::default(),
509 }
510 }
511
512 pub fn discover_tag(&mut self, tag: &Tag, position: P) {
514 match tag {
515 Tag::Start(elem, flags) => {
516 if flags.introspectable {
517 let loc = elem.location().unwrap();
518 if self.seen.insert(loc) {
519 self.sink.push(BuilderItem::Start(elem.clone(), position));
520 }
521 }
522 }
523 Tag::End(loc, key, flags) => {
524 if flags.introspectable {
525 self.keys.insert(*key, *loc);
526 self.sink.push(BuilderItem::End(*loc));
527 }
528 }
529 }
530 }
531
532 pub fn discover_elements<Q>(
534 &mut self,
535 elements: &ElementIntrospector<Q>,
536 map_position: impl Fn(&Q) -> P,
537 ) {
538 self.sink.reserve(2 * elements.elems.len());
543 let mut queued = MultiMap::default();
544 for (i, (elem, q)) in elements.elems.iter().enumerate() {
545 let loc = elem.location().unwrap();
546 if self.seen.insert(loc) {
547 let range = elements.locations.get(&loc).unwrap();
548 let position = map_position(q);
549 self.sink.push(BuilderItem::Start(elem.clone(), position));
550 debug_assert_eq!(range.start, i);
551 queued.insert(range.end, loc);
552 }
553 for &end in queued.get(&(i + 1)).iter().rev() {
554 self.sink.push(BuilderItem::End(end));
555 }
556 }
557 self.keys.extend(&elements.keys);
558 }
559
560 pub fn start_insertion(&mut self) {
563 self.stack.push(std::mem::take(&mut self.sink));
564 }
565
566 #[track_caller]
568 pub fn end_insertion(&mut self, parent: Location) {
569 let elems = std::mem::replace(
570 &mut self.sink,
571 self.stack.pop().expect("insertion to have been started"),
572 );
573 self.insertions.insert(parent, elems);
574 }
575
576 pub fn finalize(mut self) -> ElementIntrospector<P> {
579 self.locations.reserve(self.seen.len());
580
581 let mut elems = Vec::with_capacity(self.seen.len());
583 for item in std::mem::take(&mut self.sink) {
584 self.visit(&mut elems, item);
585 }
586
587 ElementIntrospector {
588 elems,
589 keys: self.keys,
590 locations: self.locations,
591 labels: self.labels,
592 queries: QueryCache::default(),
593 }
594 }
595
596 fn visit(&mut self, elems: &mut Vec<(Content, P)>, item: BuilderItem<P>) {
599 match item {
600 BuilderItem::Start(elem, pos) => {
601 let loc = elem.location().unwrap();
602 let idx = elems.len();
603
604 self.locations.insert(loc, idx..idx + 1);
608
609 if let Some(label) = elem.label() {
611 self.labels.insert(label, idx);
612 }
613
614 elems.push((elem, pos));
616
617 if let Some(insertions) = self.insertions.take(&loc) {
619 for pair in insertions.flatten() {
620 self.visit(elems, pair);
621 }
622 }
623 }
624 BuilderItem::End(loc) => {
625 if let Some(entry) = self.locations.get_mut(&loc) {
627 entry.end = elems.len();
628 }
629 }
630 }
631 }
632}
633
634impl<P> Default for ElementIntrospectorBuilder<P> {
635 fn default() -> Self {
636 Self::new()
637 }
638}
639
640#[derive(Clone)]
642struct MultiMap<K, V>(FxHashMap<K, SmallVec<[V; 1]>>);
643
644impl<K, V> MultiMap<K, V>
645where
646 K: Hash + Eq,
647{
648 fn get(&self, key: &K) -> &[V] {
649 self.0.get(key).map_or(&[], |vec| vec.as_slice())
650 }
651
652 fn iter<'a>(&'a self) -> impl Iterator<Item = (&'a K, &'a [V])> + use<'a, K, V> {
653 self.0.iter().map(|(k, v)| (k, v.as_slice()))
654 }
655
656 fn insert(&mut self, key: K, value: V) {
657 self.0.entry(key).or_default().push(value);
658 }
659
660 fn insert_iter(&mut self, key: K, values: impl IntoIterator<Item = V>) {
661 self.0.entry(key).or_default().extend(values);
662 }
663
664 fn take(&mut self, key: &K) -> Option<impl Iterator<Item = V> + use<K, V>> {
665 self.0.remove(key).map(|vec| vec.into_iter())
666 }
667
668 fn extend(&mut self, other: &Self)
669 where
670 K: Clone,
671 V: Clone,
672 {
673 for (key, locs) in other.iter() {
674 self.insert_iter(key.clone(), locs.iter().cloned());
675 }
676 }
677}
678
679impl<K, V> Default for MultiMap<K, V> {
680 fn default() -> Self {
681 Self(FxHashMap::default())
682 }
683}
684
685#[derive(Default)]
687struct QueryCache(RwLock<FxHashMap<u128, EcoVec<Content>>>);
688
689impl QueryCache {
690 fn get(&self, hash: u128) -> Option<EcoVec<Content>> {
691 self.0.read().unwrap().get(&hash).cloned()
692 }
693
694 fn insert(&self, hash: u128, output: EcoVec<Content>) {
695 self.0.write().unwrap().insert(hash, output);
696 }
697}
698
699impl Clone for QueryCache {
700 fn clone(&self) -> Self {
701 Self(RwLock::new(self.0.read().unwrap().clone()))
702 }
703}