1use std::collections::BTreeSet;
2use std::fmt::{self, Debug, Formatter};
3use std::hash::Hash;
4use std::num::NonZeroUsize;
5use std::sync::RwLock;
6
7use ecow::{EcoString, EcoVec};
8use rustc_hash::{FxHashMap, FxHashSet};
9use smallvec::SmallVec;
10use typst_utils::NonZeroExt;
11
12use crate::diag::{StrResult, bail};
13use crate::foundations::{Content, Label, Repr, Selector};
14use crate::introspection::{Location, Tag};
15use crate::layout::{Frame, FrameItem, Point, Position, Transform};
16use crate::model::Numbering;
17
18#[derive(Default, Clone)]
20pub struct Introspector {
21 pages: usize,
23 page_numberings: Vec<Option<Numbering>>,
25 page_supplements: Vec<Content>,
27
28 elems: Vec<Pair>,
30 keys: MultiMap<u128, Location>,
33
34 locations: FxHashMap<Location, usize>,
36 labels: MultiMap<Label, usize>,
38
39 html_ids: FxHashMap<Location, EcoString>,
43
44 queries: QueryCache,
49}
50
51type Pair = (Content, Position);
53
54impl Introspector {
55 pub fn all(&self) -> impl Iterator<Item = &Content> + '_ {
57 self.elems.iter().map(|(c, _)| c)
58 }
59
60 pub fn label_count(&self, label: Label) -> usize {
62 self.labels.get(&label).len()
63 }
64
65 pub fn set_html_ids(&mut self, html_ids: FxHashMap<Location, EcoString>) {
68 self.html_ids = html_ids;
69 }
70
71 #[track_caller]
73 fn get_by_idx(&self, idx: usize) -> &Content {
74 &self.elems[idx].0
75 }
76
77 #[track_caller]
79 fn get_pos_by_idx(&self, idx: usize) -> Position {
80 self.elems[idx].1
81 }
82
83 fn get_by_loc(&self, location: &Location) -> Option<&Content> {
85 self.locations.get(location).map(|&idx| self.get_by_idx(idx))
86 }
87
88 fn get_pos_by_loc(&self, location: &Location) -> Option<Position> {
90 self.locations.get(location).map(|&idx| self.get_pos_by_idx(idx))
91 }
92
93 fn binary_search(&self, list: &[Content], elem: &Content) -> Result<usize, usize> {
95 list.binary_search_by_key(&self.elem_index(elem), |elem| self.elem_index(elem))
96 }
97
98 fn elem_index(&self, elem: &Content) -> usize {
100 self.loc_index(&elem.location().unwrap())
101 }
102
103 fn loc_index(&self, location: &Location) -> usize {
105 self.locations.get(location).copied().unwrap_or(usize::MAX)
106 }
107}
108
109#[comemo::track]
110impl Introspector {
111 pub fn query(&self, selector: &Selector) -> EcoVec<Content> {
113 let hash = typst_utils::hash128(selector);
114 if let Some(output) = self.queries.get(hash) {
115 return output;
116 }
117
118 let output = match selector {
119 Selector::Elem(..) => self
120 .all()
121 .filter(|elem| selector.matches(elem, None))
122 .cloned()
123 .collect(),
124 Selector::Location(location) => {
125 self.get_by_loc(location).cloned().into_iter().collect()
126 }
127 Selector::Label(label) => self
128 .labels
129 .get(label)
130 .iter()
131 .map(|&idx| self.get_by_idx(idx).clone())
132 .collect(),
133 Selector::Or(selectors) => selectors
134 .iter()
135 .flat_map(|sel| self.query(sel))
136 .map(|elem| self.elem_index(&elem))
137 .collect::<BTreeSet<usize>>()
138 .into_iter()
139 .map(|idx| self.get_by_idx(idx).clone())
140 .collect(),
141 Selector::And(selectors) => {
142 let mut results: Vec<_> =
143 selectors.iter().map(|sel| self.query(sel)).collect();
144
145 results
149 .iter()
150 .enumerate()
151 .min_by_key(|(_, vec)| vec.len())
152 .map(|(i, _)| i)
153 .map(|i| results.swap_remove(i))
154 .iter()
155 .flatten()
156 .filter(|candidate| {
157 results
158 .iter()
159 .all(|other| self.binary_search(other, candidate).is_ok())
160 })
161 .cloned()
162 .collect()
163 }
164 Selector::Before { selector, end, inclusive } => {
165 let mut list = self.query(selector);
166 if let Some(end) = self.query_first(end) {
167 let split = match self.binary_search(&list, &end) {
169 Ok(i) => i + *inclusive as usize,
171 Err(i) => i,
173 };
174 list = list[..split].into();
175 }
176 list
177 }
178 Selector::After { selector, start, inclusive } => {
179 let mut list = self.query(selector);
180 if let Some(start) = self.query_first(start) {
181 let split = match self.binary_search(&list, &start) {
183 Ok(i) => i + !*inclusive as usize,
185 Err(i) => i,
187 };
188 list = list[split..].into();
189 }
190 list
191 }
192 Selector::Can(_) | Selector::Regex(_) => EcoVec::new(),
194 };
195
196 self.queries.insert(hash, output.clone());
197 output
198 }
199
200 pub fn query_first(&self, selector: &Selector) -> Option<Content> {
202 match selector {
203 Selector::Location(location) => self.get_by_loc(location).cloned(),
204 Selector::Label(label) => self
205 .labels
206 .get(label)
207 .first()
208 .map(|&idx| self.get_by_idx(idx).clone()),
209 _ => self.query(selector).first().cloned(),
210 }
211 }
212
213 pub fn query_unique(&self, selector: &Selector) -> StrResult<Content> {
215 match selector {
216 Selector::Location(location) => self
217 .get_by_loc(location)
218 .cloned()
219 .ok_or_else(|| "element does not exist in the document".into()),
220 Selector::Label(label) => self.query_label(*label).cloned(),
221 _ => {
222 let elems = self.query(selector);
223 if elems.len() > 1 {
224 bail!("selector matches multiple elements",);
225 }
226 elems
227 .into_iter()
228 .next()
229 .ok_or_else(|| "selector does not match any element".into())
230 }
231 }
232 }
233
234 pub fn query_label(&self, label: Label) -> StrResult<&Content> {
236 match *self.labels.get(&label) {
237 [idx] => Ok(self.get_by_idx(idx)),
238 [] => bail!("label `{}` does not exist in the document", label.repr()),
239 _ => bail!("label `{}` occurs multiple times in the document", label.repr()),
240 }
241 }
242
243 pub fn query_count_before(&self, selector: &Selector, end: Location) -> usize {
246 let list = self.query(selector);
248 if let Some(end) = self.get_by_loc(&end) {
249 match self.binary_search(&list, end) {
250 Ok(i) => i + 1,
251 Err(i) => i,
252 }
253 } else {
254 list.len()
255 }
256 }
257
258 pub fn pages(&self) -> NonZeroUsize {
260 NonZeroUsize::new(self.pages).unwrap_or(NonZeroUsize::ONE)
261 }
262
263 pub fn page(&self, location: Location) -> NonZeroUsize {
265 self.position(location).page
266 }
267
268 pub fn position(&self, location: Location) -> Position {
270 self.get_pos_by_loc(&location)
271 .unwrap_or(Position { page: NonZeroUsize::ONE, point: Point::zero() })
272 }
273
274 pub fn page_numbering(&self, location: Location) -> Option<&Numbering> {
276 let page = self.page(location);
277 self.page_numberings
278 .get(page.get() - 1)
279 .and_then(|slot| slot.as_ref())
280 }
281
282 pub fn page_supplement(&self, location: Location) -> Content {
284 let page = self.page(location);
285 self.page_supplements.get(page.get() - 1).cloned().unwrap_or_default()
286 }
287
288 pub fn html_id(&self, location: Location) -> Option<&EcoString> {
290 self.html_ids.get(&location)
291 }
292
293 pub fn locator(&self, key: u128, anchor: Location) -> Option<Location> {
300 let anchor = self.loc_index(&anchor);
301 self.keys
302 .get(&key)
303 .iter()
304 .copied()
305 .min_by_key(|loc| self.loc_index(loc).wrapping_sub(anchor))
306 }
307}
308
309impl Debug for Introspector {
310 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
311 f.pad("Introspector(..)")
312 }
313}
314
315#[derive(Clone)]
317struct MultiMap<K, V>(FxHashMap<K, SmallVec<[V; 1]>>);
318
319impl<K, V> MultiMap<K, V>
320where
321 K: Hash + Eq,
322{
323 fn get(&self, key: &K) -> &[V] {
324 self.0.get(key).map_or(&[], |vec| vec.as_slice())
325 }
326
327 fn insert(&mut self, key: K, value: V) {
328 self.0.entry(key).or_default().push(value);
329 }
330
331 fn take(&mut self, key: &K) -> Option<impl Iterator<Item = V> + use<K, V>> {
332 self.0.remove(key).map(|vec| vec.into_iter())
333 }
334}
335
336impl<K, V> Default for MultiMap<K, V> {
337 fn default() -> Self {
338 Self(FxHashMap::default())
339 }
340}
341
342#[derive(Default)]
344struct QueryCache(RwLock<FxHashMap<u128, EcoVec<Content>>>);
345
346impl QueryCache {
347 fn get(&self, hash: u128) -> Option<EcoVec<Content>> {
348 self.0.read().unwrap().get(&hash).cloned()
349 }
350
351 fn insert(&self, hash: u128, output: EcoVec<Content>) {
352 self.0.write().unwrap().insert(hash, output);
353 }
354}
355
356impl Clone for QueryCache {
357 fn clone(&self) -> Self {
358 Self(RwLock::new(self.0.read().unwrap().clone()))
359 }
360}
361
362#[derive(Default)]
364pub struct IntrospectorBuilder {
365 pub pages: usize,
366 pub page_numberings: Vec<Option<Numbering>>,
367 pub page_supplements: Vec<Content>,
368 pub html_ids: FxHashMap<Location, EcoString>,
369 seen: FxHashSet<Location>,
370 insertions: MultiMap<Location, Vec<Pair>>,
371 keys: MultiMap<u128, Location>,
372 locations: FxHashMap<Location, usize>,
373 labels: MultiMap<Label, usize>,
374}
375
376impl IntrospectorBuilder {
377 pub fn new() -> Self {
379 Self::default()
380 }
381
382 pub fn discover_in_frame(
384 &mut self,
385 sink: &mut Vec<Pair>,
386 frame: &Frame,
387 page: NonZeroUsize,
388 ts: Transform,
389 ) {
390 for (pos, item) in frame.items() {
391 match item {
392 FrameItem::Group(group) => {
393 let ts = ts
394 .pre_concat(Transform::translate(pos.x, pos.y))
395 .pre_concat(group.transform);
396
397 if let Some(parent) = group.parent {
398 let mut nested = vec![];
399 self.discover_in_frame(&mut nested, &group.frame, page, ts);
400 self.register_insertion(parent.location, nested);
401 } else {
402 self.discover_in_frame(sink, &group.frame, page, ts);
403 }
404 }
405 FrameItem::Tag(tag) => {
406 self.discover_in_tag(
407 sink,
408 tag,
409 Position { page, point: pos.transform(ts) },
410 );
411 }
412 _ => {}
413 }
414 }
415 }
416
417 pub fn discover_in_tag(
419 &mut self,
420 sink: &mut Vec<Pair>,
421 tag: &Tag,
422 position: Position,
423 ) {
424 match tag {
425 Tag::Start(elem, flags) => {
426 if flags.introspectable {
427 let loc = elem.location().unwrap();
428 if self.seen.insert(loc) {
429 sink.push((elem.clone(), position));
430 }
431 }
432 }
433 Tag::End(loc, key, flags) => {
434 if flags.introspectable {
435 self.keys.insert(*key, *loc);
436 }
437 }
438 }
439 }
440
441 pub fn register_insertion(&mut self, parent: Location, nested: Vec<Pair>) {
443 self.insertions.insert(parent, nested);
444 }
445
446 pub fn finalize(mut self, root: Vec<Pair>) -> Introspector {
449 self.locations.reserve(self.seen.len());
450
451 let mut elems = Vec::with_capacity(self.seen.len());
453 for pair in root {
454 self.visit(&mut elems, pair);
455 }
456
457 Introspector {
458 pages: self.pages,
459 page_numberings: self.page_numberings,
460 page_supplements: self.page_supplements,
461 html_ids: self.html_ids,
462 elems,
463 keys: self.keys,
464 locations: self.locations,
465 labels: self.labels,
466 queries: QueryCache::default(),
467 }
468 }
469
470 fn visit(&mut self, elems: &mut Vec<Pair>, pair: Pair) {
473 let elem = &pair.0;
474 let loc = elem.location().unwrap();
475 let idx = elems.len();
476
477 self.locations.insert(loc, idx);
479
480 if let Some(label) = elem.label() {
482 self.labels.insert(label, idx);
483 }
484
485 elems.push(pair);
487
488 if let Some(insertions) = self.insertions.take(&loc) {
490 for pair in insertions.flatten() {
491 self.visit(elems, pair);
492 }
493 }
494 }
495}