Skip to main content

typst_library/introspection/
introspector.rs

1use std::collections::BTreeSet;
2use std::hash::Hash;
3use std::num::NonZeroUsize;
4use std::ops::Range;
5use std::sync::RwLock;
6
7use comemo::{Track, Tracked};
8use ecow::{EcoString, EcoVec};
9use rustc_hash::{FxHashMap, FxHashSet};
10use smallvec::SmallVec;
11use typst_syntax::VirtualPath;
12
13use crate::diag::{StrResult, bail};
14use crate::foundations::{Content, Label, Repr, Selector};
15use crate::introspection::{DocumentPosition, Location, Tag};
16use crate::model::Numbering;
17
18/// Serves inquiries for pieces of information from the compilation output.
19///
20/// See [`Introspect`](crate::introspection::Introspect) for general information
21/// about introspection.
22///
23/// This trait is implemented by [target-specific](crate::foundations::Target)
24/// introspectors. These must implement all methods to provide a unified
25/// interface to the Typst standard library, but may return `None` or error in
26/// some methods, depending on the specifics of the target. The HTML target, for
27/// instance, will return `None` for [`page`](Self::page) requests.
28#[comemo::track]
29pub trait Introspector: Send + Sync {
30    /// Queries for all matching elements.
31    fn query(&self, selector: &Selector) -> EcoVec<Content>;
32
33    /// Queries for the first element that matches the selector.
34    fn query_first(&self, selector: &Selector) -> Option<Content>;
35
36    /// Queries for the first element that matches the selector.
37    fn query_unique(&self, selector: &Selector) -> StrResult<Content>;
38
39    /// Queries for a unique element with the label.
40    fn query_label(&self, label: Label) -> StrResult<&Content>;
41
42    /// Queries for all elements with a label.
43    fn query_labelled(&self) -> EcoVec<Content>;
44
45    /// An optimized version of `query(selector.before(end, true).len()` used by
46    /// counters and state.
47    fn query_count_before(&self, selector: &Selector, end: Location) -> usize;
48
49    /// Checks how many times a label exists.
50    fn label_count(&self, label: Label) -> usize;
51
52    /// Tries to find a location for an element with the given `key` hash
53    /// that is closest after the `base`.
54    ///
55    /// This is used for introspector-assisted location assignment during
56    /// measurement. See the "Dealing with Measurement" section of the
57    /// [`Locator`](crate::introspection::Locator) docs for more details.
58    fn locator(&self, key: u128, base: Location) -> Option<Location>;
59
60    /// Returns the total number of pages in the document that contains the
61    /// given location.
62    fn pages(&self, location: Location) -> Option<NonZeroUsize>;
63
64    /// Returns the page number for the given location.
65    fn page(&self, location: Location) -> Option<NonZeroUsize>;
66
67    /// Returns the position for the given location.
68    fn position(&self, location: Location) -> Option<DocumentPosition>;
69
70    /// Returns the page numbering for the given location, if any.
71    fn page_numbering(&self, location: Location) -> Option<&Numbering>;
72
73    /// Returns the page supplement for the given location, if any.
74    fn page_supplement(&self, location: Location) -> Option<&Content>;
75
76    /// Retrieves the anchor to link to for this location in HTML export.
77    fn anchor(&self, location: Location) -> Option<&EcoString>;
78
79    /// Returns the location of the document which has/contains the given
80    /// location.
81    fn document(&self, location: Location) -> Option<Location>;
82
83    /// Returns the file path of the document/asset which has or contains the
84    /// given location.
85    ///
86    /// Returns `None` in a single document (not a bundle) or if the location is
87    /// not associated with a document or asset (top-level in a bundle).
88    fn path(&self, location: Location) -> Option<&VirtualPath>;
89}
90
91/// An introspector that returns empty results for all inquiries.
92pub struct EmptyIntrospector;
93
94impl EmptyIntrospector {
95    pub fn track(&self) -> Tracked<'_, dyn Introspector + '_> {
96        (self as &dyn Introspector).track()
97    }
98}
99
100impl Introspector for EmptyIntrospector {
101    fn query(&self, _: &Selector) -> EcoVec<Content> {
102        EcoVec::new()
103    }
104
105    fn query_first(&self, _: &Selector) -> Option<Content> {
106        None
107    }
108
109    fn query_unique(&self, _: &Selector) -> StrResult<Content> {
110        bail!("selector does not match any element");
111    }
112
113    fn query_label(&self, label: Label) -> StrResult<&Content> {
114        bail!("label `{}` does not exist in the document", label.repr());
115    }
116
117    fn query_labelled(&self) -> EcoVec<Content> {
118        EcoVec::new()
119    }
120
121    fn query_count_before(&self, _: &Selector, _: Location) -> usize {
122        0
123    }
124
125    fn label_count(&self, _: Label) -> usize {
126        0
127    }
128
129    fn locator(&self, _: u128, _: Location) -> Option<Location> {
130        None
131    }
132
133    fn pages(&self, _: Location) -> Option<NonZeroUsize> {
134        None
135    }
136
137    fn page(&self, _: Location) -> Option<NonZeroUsize> {
138        None
139    }
140
141    fn position(&self, _: Location) -> Option<DocumentPosition> {
142        None
143    }
144
145    fn page_numbering(&self, _: Location) -> Option<&Numbering> {
146        None
147    }
148
149    fn page_supplement(&self, _: Location) -> Option<&Content> {
150        None
151    }
152
153    fn anchor(&self, _: Location) -> Option<&EcoString> {
154        None
155    }
156
157    fn document(&self, _: Location) -> Option<Location> {
158        None
159    }
160
161    fn path(&self, _: Location) -> Option<&VirtualPath> {
162        None
163    }
164}
165
166/// An underlying target-agnostic introspector used for most queries.
167///
168/// The parameter `P` represents a position type for the relevant target.
169#[derive(Clone)]
170pub struct ElementIntrospector<P> {
171    /// All introspectable elements.
172    elems: Vec<(Content, P)>,
173    /// Lists all elements with a specific hash key. This is used for
174    /// introspector-assisted location assignment during measurement.
175    keys: MultiMap<u128, Location>,
176    /// Accelerates lookup of elements by location.
177    ///
178    /// Holds a range pointing into `elements` that covers the element and all
179    /// its conceptual descendants. The first element in the range (i.e.
180    /// `elems[range.start]` is the element with the location itself while the
181    /// last element is its right-most descendants).
182    locations: FxHashMap<Location, Range<usize>>,
183    /// Accelerates lookup of elements by label.
184    labels: MultiMap<Label, usize>,
185    /// Caches queries done on the introspector. This is important because
186    /// even if all top-level queries are distinct, they often have shared
187    /// subqueries. Example: Individual counter queries with `before` that
188    /// all depend on a global counter query.
189    queries: QueryCache,
190}
191
192impl<P> ElementIntrospector<P> {
193    /// Queries for all matching elements.
194    pub fn query(&self, selector: &Selector) -> EcoVec<Content> {
195        let hash = typst_utils::hash128(selector);
196        if let Some(output) = self.queries.get(hash) {
197            return output;
198        }
199
200        let output = match selector {
201            Selector::Elem(..) => self
202                .all()
203                .filter(|elem| selector.matches(elem, None))
204                .cloned()
205                .collect(),
206            Selector::Location(location) => {
207                self.get_by_loc(location).cloned().into_iter().collect()
208            }
209            Selector::Label(label) => self
210                .labels
211                .get(label)
212                .iter()
213                .map(|&idx| self.get_by_idx(idx).clone())
214                .collect(),
215            Selector::Or(selectors) => selectors
216                .iter()
217                .flat_map(|sel| self.query(sel))
218                .map(|elem| self.elem_index(&elem))
219                .collect::<BTreeSet<usize>>()
220                .into_iter()
221                .map(|idx| self.get_by_idx(idx).clone())
222                .collect(),
223            Selector::And(selectors) => {
224                let mut results: Vec<_> =
225                    selectors.iter().map(|sel| self.query(sel)).collect();
226
227                // Extract the smallest result list and then keep only those
228                // elements in the smallest list that are also in all other
229                // lists.
230                results
231                    .iter()
232                    .enumerate()
233                    .min_by_key(|(_, vec)| vec.len())
234                    .map(|(i, _)| i)
235                    .map(|i| results.swap_remove(i))
236                    .iter()
237                    .flatten()
238                    .filter(|candidate| {
239                        results
240                            .iter()
241                            .all(|other| self.binary_search(other, candidate).is_ok())
242                    })
243                    .cloned()
244                    .collect()
245            }
246            Selector::Before { selector, end, inclusive } => {
247                let mut list = self.query(selector);
248                if let Some(end) = self.query_first(end) {
249                    // Determine which elements are before `end`.
250                    let split = match self.binary_search(&list, &end) {
251                        // Element itself is contained.
252                        Ok(i) => i + *inclusive as usize,
253                        // Element itself is not contained.
254                        Err(i) => i,
255                    };
256                    list = list[..split].into();
257                }
258                list
259            }
260            Selector::After { selector, start, inclusive } => {
261                let mut list = self.query(selector);
262                if let Some(start) = self.query_first(start) {
263                    // Determine which elements are after `start`.
264                    let split = match self.binary_search(&list, &start) {
265                        // Element itself is contained.
266                        Ok(i) => i + !*inclusive as usize,
267                        // Element itself is not contained.
268                        Err(i) => i,
269                    };
270                    list = list[split..].into();
271                }
272                list
273            }
274            Selector::Within { selector, ancestor } => {
275                let list = self.query(selector);
276                let ancestors = self.query(ancestor);
277
278                let mut out = EcoVec::new();
279                let mut visited = 0;
280
281                // Walk the ancestors in order, collecting all elements in
282                // `list` that are descendants of them. Elements in the list
283                // that are descendants of multiple ancestors are yielded only
284                // once by virtue of `visited`.
285                for ancestor in &ancestors {
286                    let loc = ancestor.location().unwrap();
287                    let Range { start, end } = self.loc_range(&loc);
288
289                    let start_in_list = match list
290                        .binary_search_by_key(&start, |elem| self.elem_index(elem))
291                    {
292                        // The element and the ancestor start at the same index.
293                        // This means they are one and the same. The within
294                        // selector is not inclusive, so we exclude it.
295                        Ok(i) => i + 1,
296                        // The ancestor's insertion index would be at `i`, so
297                        // the element currently at `i` is later than it and
298                        // should be included.
299                        Err(i) => i,
300                    };
301
302                    let end_in_list = match list.binary_search_by_key(&end, |elem| {
303                        self.loc_range(&elem.location().unwrap()).end
304                    }) {
305                        // The element and the ancestor end in the same place.
306                        // They might be the same, but it's equally possible for
307                        // the element to be a rightmost leaf of the ancestor.
308                        // If it's the same, we already exclude it via `start`
309                        // above and if it's a rightmost leaf, we want to
310                        // include it.
311                        Ok(i) => i + 1,
312                        // The ancestor's end index would be at `i`, so the
313                        // element right before `i` is earlier than it and
314                        // should be included.
315                        Err(i) => i,
316                    };
317
318                    // If the ancestor is fully contained in one of the list
319                    // elements, we exclude the list element from both the start
320                    // and end, leading to `end < start``.
321                    if end_in_list < start_in_list {
322                        debug_assert_eq!(end_in_list + 1, start_in_list);
323                        continue;
324                    }
325
326                    // Clamp at `visited` to ensure we don't yield elements
327                    // twice.
328                    let start_in_list = start_in_list.max(visited);
329                    let end_in_list = end_in_list.max(visited);
330                    out.extend(list[start_in_list..end_in_list].iter().cloned());
331                    visited = end_in_list;
332                }
333
334                out
335            }
336            // Not supported here.
337            Selector::Can(_) | Selector::Regex(_) => EcoVec::new(),
338        };
339
340        self.queries.insert(hash, output.clone());
341        output
342    }
343
344    /// Queries for the first element that matches the selector.
345    pub fn query_first(&self, selector: &Selector) -> Option<Content> {
346        match selector {
347            Selector::Location(location) => self.get_by_loc(location).cloned(),
348            Selector::Label(label) => self
349                .labels
350                .get(label)
351                .first()
352                .map(|&idx| self.get_by_idx(idx).clone()),
353            _ => self.query(selector).first().cloned(),
354        }
355    }
356
357    /// Queries for the first element that matches the selector.
358    pub fn query_unique(&self, selector: &Selector) -> StrResult<Content> {
359        match selector {
360            Selector::Location(location) => self
361                .get_by_loc(location)
362                .cloned()
363                .ok_or_else(|| "element does not exist in the document".into()),
364            Selector::Label(label) => self.query_label(*label).cloned(),
365            _ => {
366                let elems = self.query(selector);
367                if elems.len() > 1 {
368                    bail!("selector matches multiple elements",);
369                }
370                elems
371                    .into_iter()
372                    .next()
373                    .ok_or_else(|| "selector does not match any element".into())
374            }
375        }
376    }
377
378    /// Queries for a unique element with the label.
379    pub fn query_label(&self, label: Label) -> StrResult<&Content> {
380        match *self.labels.get(&label) {
381            [idx] => Ok(self.get_by_idx(idx)),
382            [] => bail!("label `{}` does not exist in the document", label.repr()),
383            _ => bail!("label `{}` occurs multiple times in the document", label.repr()),
384        }
385    }
386
387    /// Queries for all elements with a label.
388    pub fn query_labelled(&self) -> EcoVec<Content> {
389        self.all().filter(|c| c.label().is_some()).cloned().collect()
390    }
391
392    /// An optimized version of `query(selector.before(end, true).len()` used by
393    /// counters and state.
394    pub fn query_count_before(&self, selector: &Selector, end: Location) -> usize {
395        // See `query()` for details.
396        let list = self.query(selector);
397        if let Some(end) = self.get_by_loc(&end) {
398            match self.binary_search(&list, end) {
399                Ok(i) => i + 1,
400                Err(i) => i,
401            }
402        } else {
403            list.len()
404        }
405    }
406
407    /// Checks how many times a label exists.
408    pub fn label_count(&self, label: Label) -> usize {
409        self.labels.get(&label).len()
410    }
411
412    /// Tries to find a location for an element with the given `key` hash
413    /// that is closest after the `base`.
414    pub fn locator(&self, key: u128, base: Location) -> Option<Location> {
415        let base = self.loc_index(&base);
416        self.keys
417            .get(&key)
418            .iter()
419            .copied()
420            .min_by_key(|loc| self.loc_index(loc).wrapping_sub(base))
421    }
422
423    /// Returns the target-specific position of the element at the given
424    /// location.
425    pub fn position(&self, location: Location) -> Option<&P> {
426        self.locations.get(&location).map(|r| self.get_pos_by_idx(r.start))
427    }
428
429    /// Iterates over all locatable elements.
430    pub fn all(&self) -> impl Iterator<Item = &Content> + '_ {
431        self.elems.iter().map(|(c, _)| c)
432    }
433
434    /// Retrieves the element with the given index.
435    #[track_caller]
436    pub fn get_by_idx(&self, idx: usize) -> &Content {
437        &self.elems[idx].0
438    }
439
440    /// Retrieves the position of the element with the given index.
441    #[track_caller]
442    pub fn get_pos_by_idx(&self, idx: usize) -> &P {
443        &self.elems[idx].1
444    }
445
446    /// Retrieves an element by its location.
447    pub fn get_by_loc(&self, location: &Location) -> Option<&Content> {
448        self.locations.get(location).map(|r| self.get_by_idx(r.start))
449    }
450
451    /// Performs a binary search for `elem` among the `list`.
452    pub fn binary_search(
453        &self,
454        list: &[Content],
455        elem: &Content,
456    ) -> Result<usize, usize> {
457        list.binary_search_by_key(&self.elem_index(elem), |elem| self.elem_index(elem))
458    }
459
460    /// Gets the index of this element.
461    pub fn elem_index(&self, elem: &Content) -> usize {
462        self.loc_index(&elem.location().unwrap())
463    }
464
465    /// Gets the index of the element with this location among all.
466    pub fn loc_index(&self, location: &Location) -> usize {
467        self.locations.get(location).map(|r| r.start).unwrap_or(usize::MAX)
468    }
469
470    /// Gets the range of the element with this location among all.
471    pub fn loc_range(&self, location: &Location) -> Range<usize> {
472        self.locations
473            .get(location)
474            .cloned()
475            .unwrap_or(usize::MAX..usize::MAX)
476    }
477}
478
479/// Constructs the [`ElementIntrospector`].
480pub struct ElementIntrospectorBuilder<P> {
481    stack: Vec<Vec<BuilderItem<P>>>,
482    sink: Vec<BuilderItem<P>>,
483    seen: FxHashSet<Location>,
484    insertions: MultiMap<Location, Vec<BuilderItem<P>>>,
485    keys: MultiMap<u128, Location>,
486    locations: FxHashMap<Location, Range<usize>>,
487    labels: MultiMap<Label, usize>,
488}
489
490/// An item in the builder's sink.
491enum BuilderItem<P> {
492    /// Indicates the start of the given element. Also holds its position.
493    Start(Content, P),
494    /// Indicates the end of the element with the given location.
495    End(Location),
496}
497
498impl<P> ElementIntrospectorBuilder<P> {
499    /// Creates an empty builder.
500    pub fn new() -> Self {
501        Self {
502            stack: Vec::new(),
503            sink: Vec::new(),
504            seen: FxHashSet::default(),
505            insertions: MultiMap::default(),
506            keys: MultiMap::default(),
507            locations: FxHashMap::default(),
508            labels: MultiMap::default(),
509        }
510    }
511
512    /// Discovers an introspectible in a tag.
513    pub fn discover_tag(&mut self, tag: &Tag, position: P) {
514        match tag {
515            Tag::Start(elem, flags) => {
516                if flags.introspectable {
517                    let loc = elem.location().unwrap();
518                    if self.seen.insert(loc) {
519                        self.sink.push(BuilderItem::Start(elem.clone(), position));
520                    }
521                }
522            }
523            Tag::End(loc, key, flags) => {
524                if flags.introspectable {
525                    self.keys.insert(*key, *loc);
526                    self.sink.push(BuilderItem::End(*loc));
527                }
528            }
529        }
530    }
531
532    /// Discovers elements from another already built introspector.
533    pub fn discover_elements<Q>(
534        &mut self,
535        elements: &ElementIntrospector<Q>,
536        map_position: impl Fn(&Q) -> P,
537    ) {
538        // Because `elements` is already fully built, we need to basically
539        // reverse the already built location ranges back to start/end events.
540        // We do this by queueing end events for positions as we visit elements
541        // and dequeueing them at the end of the relevant element.
542        self.sink.reserve(2 * elements.elems.len());
543        let mut queued = MultiMap::default();
544        for (i, (elem, q)) in elements.elems.iter().enumerate() {
545            let loc = elem.location().unwrap();
546            if self.seen.insert(loc) {
547                let range = elements.locations.get(&loc).unwrap();
548                let position = map_position(q);
549                self.sink.push(BuilderItem::Start(elem.clone(), position));
550                debug_assert_eq!(range.start, i);
551                queued.insert(range.end, loc);
552            }
553            for &end in queued.get(&(i + 1)).iter().rev() {
554                self.sink.push(BuilderItem::End(end));
555            }
556        }
557        self.keys.extend(&elements.keys);
558    }
559
560    /// Future content until a matching `end_insertion` will ordering-wise be
561    /// treated as belonging to the `parent` passed to `end_insertion`.
562    pub fn start_insertion(&mut self) {
563        self.stack.push(std::mem::take(&mut self.sink));
564    }
565
566    /// Closes an insertion group started by a matching `start_insertion`.
567    #[track_caller]
568    pub fn end_insertion(&mut self, parent: Location) {
569        let elems = std::mem::replace(
570            &mut self.sink,
571            self.stack.pop().expect("insertion to have been started"),
572        );
573        self.insertions.insert(parent, elems);
574    }
575
576    /// Builds a complete introspector with all acceleration structures from a
577    /// list of top-level pairs.
578    pub fn finalize(mut self) -> ElementIntrospector<P> {
579        self.locations.reserve(self.seen.len());
580
581        // Save all pairs and their descendants in the correct order.
582        let mut elems = Vec::with_capacity(self.seen.len());
583        for item in std::mem::take(&mut self.sink) {
584            self.visit(&mut elems, item);
585        }
586
587        ElementIntrospector {
588            elems,
589            keys: self.keys,
590            locations: self.locations,
591            labels: self.labels,
592            queries: QueryCache::default(),
593        }
594    }
595
596    /// Saves a pair and all its descendants into `elems` and populates the
597    /// acceleration structures.
598    fn visit(&mut self, elems: &mut Vec<(Content, P)>, item: BuilderItem<P>) {
599        match item {
600            BuilderItem::Start(elem, pos) => {
601                let loc = elem.location().unwrap();
602                let idx = elems.len();
603
604                // Populate the location acceleration map. Initially, we insert
605                // with a range covering just the element itself. Once we visit
606                // the end tag, we update this information.
607                self.locations.insert(loc, idx..idx + 1);
608
609                // Populate the label acceleration map.
610                if let Some(label) = elem.label() {
611                    self.labels.insert(label, idx);
612                }
613
614                // Save the element.
615                elems.push((elem, pos));
616
617                // Process potential descendants.
618                if let Some(insertions) = self.insertions.take(&loc) {
619                    for pair in insertions.flatten() {
620                        self.visit(elems, pair);
621                    }
622                }
623            }
624            BuilderItem::End(loc) => {
625                // Update the end of the element's range.
626                if let Some(entry) = self.locations.get_mut(&loc) {
627                    entry.end = elems.len();
628                }
629            }
630        }
631    }
632}
633
634impl<P> Default for ElementIntrospectorBuilder<P> {
635    fn default() -> Self {
636        Self::new()
637    }
638}
639
640/// A map from one keys to multiple elements.
641#[derive(Clone)]
642struct MultiMap<K, V>(FxHashMap<K, SmallVec<[V; 1]>>);
643
644impl<K, V> MultiMap<K, V>
645where
646    K: Hash + Eq,
647{
648    fn get(&self, key: &K) -> &[V] {
649        self.0.get(key).map_or(&[], |vec| vec.as_slice())
650    }
651
652    fn iter<'a>(&'a self) -> impl Iterator<Item = (&'a K, &'a [V])> + use<'a, K, V> {
653        self.0.iter().map(|(k, v)| (k, v.as_slice()))
654    }
655
656    fn insert(&mut self, key: K, value: V) {
657        self.0.entry(key).or_default().push(value);
658    }
659
660    fn insert_iter(&mut self, key: K, values: impl IntoIterator<Item = V>) {
661        self.0.entry(key).or_default().extend(values);
662    }
663
664    fn take(&mut self, key: &K) -> Option<impl Iterator<Item = V> + use<K, V>> {
665        self.0.remove(key).map(|vec| vec.into_iter())
666    }
667
668    fn extend(&mut self, other: &Self)
669    where
670        K: Clone,
671        V: Clone,
672    {
673        for (key, locs) in other.iter() {
674            self.insert_iter(key.clone(), locs.iter().cloned());
675        }
676    }
677}
678
679impl<K, V> Default for MultiMap<K, V> {
680    fn default() -> Self {
681        Self(FxHashMap::default())
682    }
683}
684
685/// Caches queries.
686#[derive(Default)]
687struct QueryCache(RwLock<FxHashMap<u128, EcoVec<Content>>>);
688
689impl QueryCache {
690    fn get(&self, hash: u128) -> Option<EcoVec<Content>> {
691        self.0.read().unwrap().get(&hash).cloned()
692    }
693
694    fn insert(&self, hash: u128, output: EcoVec<Content>) {
695        self.0.write().unwrap().insert(hash, output);
696    }
697}
698
699impl Clone for QueryCache {
700    fn clone(&self) -> Self {
701        Self(RwLock::new(self.0.read().unwrap().clone()))
702    }
703}