Skip to main content

rpdfium_doc/
struct_tree.rs

1// Derived from PDFium's cpdf_structtree.h/cpp
2// Original: Copyright 2014 The PDFium Authors
3// Licensed under BSD-3-Clause / Apache-2.0
4// See pdfium-upstream/LICENSE for the original license.
5
6//! Tagged PDF structure tree — `CPDF_StructTree`.
7//!
8//! Provides parsing of the PDF logical structure tree (ISO 32000-2 section 14.7),
9//! which encodes accessibility (tagged PDF) information. The structure tree maps
10//! marked content sequences in page content streams to semantic tags such as
11//! paragraphs, headings, tables, and figures.
12//!
13//! All tree traversal is **iterative** (explicit `Vec` stack) for WASM safety.
14//! Structure element types are in [`crate::struct_element`] (corresponds to
15//! `CPDF_StructElement`).
16
17use std::collections::HashMap;
18
19use rpdfium_core::{Name, PdfSource};
20use rpdfium_parser::{Object, ObjectId, ObjectStore};
21
22use crate::error::{DocError, DocResult};
23use crate::struct_element::{StructElement, parse_struct_element};
24
25pub use crate::struct_element::{AttributeValue, StructAttribute};
26
27/// Maximum number of structure elements to parse (safety limit).
28const MAX_ELEMENTS: usize = 100_000;
29
30/// Maximum depth for structure tree traversal.
31const MAX_TREE_DEPTH: usize = 64;
32
33/// Standard PDF structure type constants (ISO 32000-2 Table 368 ff.).
34pub mod structure_types {
35    pub const DOCUMENT: &str = "Document";
36    pub const PART: &str = "Part";
37    pub const SECT: &str = "Sect";
38    pub const P: &str = "P";
39    pub const H: &str = "H";
40    pub const H1: &str = "H1";
41    pub const H2: &str = "H2";
42    pub const H3: &str = "H3";
43    pub const H4: &str = "H4";
44    pub const H5: &str = "H5";
45    pub const H6: &str = "H6";
46    pub const TABLE: &str = "Table";
47    pub const TR: &str = "TR";
48    pub const TD: &str = "TD";
49    pub const TH: &str = "TH";
50    pub const FIGURE: &str = "Figure";
51    pub const SPAN: &str = "Span";
52    pub const LINK: &str = "Link";
53    pub const LIST: &str = "L";
54    pub const LIST_ITEM: &str = "LI";
55    pub const LABEL: &str = "Lbl";
56    pub const LIST_BODY: &str = "LBody";
57}
58
59/// The parsed structure tree of a tagged PDF document.
60#[derive(Debug, Clone)]
61pub struct StructTree {
62    /// Root structure elements.
63    pub root_elements: Vec<StructElement>,
64    /// Role mapping: custom tag name to standard tag name.
65    pub role_map: HashMap<String, String>,
66}
67
68/// Returns indices of structure elements in the slice that reference the given MCID.
69///
70/// Performs an iterative depth-first search across all elements and their `/K` kids.
71/// An element "references" an MCID if it contains that value in its `mcids` list.
72///
73/// The returned indices are positions in the top-level `elements` slice.  If a
74/// descendant of element *i* holds the MCID, index *i* is included in the result.
75///
76/// # Examples
77///
78/// ```ignore
79/// let result = find_elements_for_mcid(&tree.root_elements, 42);
80/// // result contains indices of root elements whose subtrees include MCID 42
81/// ```
82pub fn find_elements_for_mcid(elements: &[StructElement], mcid: i32) -> Vec<usize> {
83    let target = mcid as i64;
84    let mut result = Vec::new();
85    // Stack holds (root_index, element_ref) pairs.
86    // Children are pushed with their ancestor's root_index so the caller
87    // can identify which top-level element's subtree contains the MCID.
88    let mut stack: Vec<(usize, &StructElement)> = elements.iter().enumerate().rev().collect();
89
90    while let Some((idx, elem)) = stack.pop() {
91        if elem.mcids.contains(&target) {
92            result.push(idx);
93        }
94        // Push children with the same root index for DFS.
95        for child in elem.children.iter().rev() {
96            stack.push((idx, child));
97        }
98    }
99    result
100}
101
102impl StructTree {
103    /// Parse the structure tree from the document catalog dictionary.
104    ///
105    /// Returns `Ok(None)` if the catalog has no `/StructTreeRoot` entry.
106    /// All traversal is iterative using an explicit stack.
107    pub fn from_catalog<S: PdfSource>(
108        catalog_dict: &HashMap<Name, Object>,
109        store: &ObjectStore<S>,
110    ) -> DocResult<Option<Self>> {
111        // Look up /StructTreeRoot
112        let root_obj = match catalog_dict.get(&Name::struct_tree_root()) {
113            Some(obj) => store
114                .deep_resolve(obj)
115                .map_err(|e| DocError::Parser(e.to_string()))?,
116            None => return Ok(None),
117        };
118
119        let root_dict = match root_obj.as_dict() {
120            Some(d) => d,
121            None => return Ok(None),
122        };
123
124        // Parse /RoleMap
125        let role_map = parse_role_map(root_dict, store);
126
127        // Parse /K (kids) — root content elements
128        let root_elements = match root_dict.get(&Name::k()) {
129            Some(k_obj) => parse_k_children(k_obj, store)?,
130            None => Vec::new(),
131        };
132
133        Ok(Some(StructTree {
134            root_elements,
135            role_map,
136        }))
137    }
138
139    /// Resolve a custom structure type to its standard type via the role map.
140    ///
141    /// Returns the mapped standard type if a mapping exists, or the original
142    /// type name if no mapping is defined.
143    ///
144    /// Corresponds to `CPDF_StructTree::GetRoleMapNameFor()` in PDFium.
145    pub fn role_map_name_for<'a>(&'a self, struct_type: &'a str) -> &'a str {
146        self.role_map
147            .get(struct_type)
148            .map(|s| s.as_str())
149            .unwrap_or(struct_type)
150    }
151
152    /// Upstream-aligned alias for [`role_map_name_for()`](StructTree::role_map_name_for).
153    ///
154    /// Corresponds to `CPDF_StructTree::GetRoleMapNameFor()` in PDFium.
155    #[inline]
156    pub fn get_role_map_name_for<'a>(&'a self, struct_type: &'a str) -> &'a str {
157        self.role_map_name_for(struct_type)
158    }
159
160    /// Returns the number of top-level children in the structure tree.
161    ///
162    /// Corresponds to upstream `FPDF_StructTree_CountChildren`.
163    pub fn child_count(&self) -> usize {
164        self.root_elements.len()
165    }
166
167    /// ADR-019 Tier 2 alias for [`child_count()`](StructTree::child_count).
168    ///
169    /// Corresponds to upstream `FPDF_StructTree_CountChildren`.
170    #[inline]
171    pub fn struct_tree_count_children(&self) -> usize {
172        self.child_count()
173    }
174
175    /// Deprecated short alias — use [`struct_tree_count_children()`](StructTree::struct_tree_count_children)
176    /// or [`child_count()`](StructTree::child_count) instead.
177    #[deprecated(
178        since = "0.1.0",
179        note = "use `struct_tree_count_children()` — matches upstream `FPDF_StructTree_CountChildren`"
180    )]
181    #[inline]
182    pub fn count_children(&self) -> usize {
183        self.child_count()
184    }
185
186    /// Returns the top-level child at the given zero-based index, or `None`.
187    ///
188    /// Corresponds to upstream `FPDF_StructTree_GetChildAtIndex`.
189    pub fn child_at_index(&self, index: usize) -> Option<&StructElement> {
190        self.root_elements.get(index)
191    }
192
193    /// ADR-019 Tier 2 alias for [`child_at_index()`](StructTree::child_at_index).
194    ///
195    /// Corresponds to upstream `FPDF_StructTree_GetChildAtIndex`.
196    #[inline]
197    pub fn struct_tree_get_child_at_index(&self, index: usize) -> Option<&StructElement> {
198        self.child_at_index(index)
199    }
200
201    /// Deprecated short alias — use [`struct_tree_get_child_at_index()`](StructTree::struct_tree_get_child_at_index)
202    /// or [`child_at_index()`](StructTree::child_at_index) instead.
203    #[deprecated(
204        since = "0.1.0",
205        note = "use `struct_tree_get_child_at_index()` — matches upstream `FPDF_StructTree_GetChildAtIndex`"
206    )]
207    #[inline]
208    pub fn get_child_at_index(&self, index: usize) -> Option<&StructElement> {
209        self.child_at_index(index)
210    }
211
212    /// Iterate over all structure elements that reference the given page object ID.
213    ///
214    /// Equivalent to upstream `CPDF_StructTree::LoadPage`, which filters the
215    /// structure tree to elements that belong to a specific page.  This variant
216    /// takes the page's indirect-object ID (the `/Pg` reference value stored in
217    /// each structure element) and performs an iterative depth-first walk of the
218    /// entire tree, yielding every element whose `page_ref` matches.
219    ///
220    /// Use [`PageStructure::for_page`] if you need an owned collection instead.
221    pub fn elements_for_page_ref(&self, page_ref: ObjectId) -> ElementsForPage<'_> {
222        ElementsForPage {
223            stack: self.root_elements.iter().rev().collect(),
224            page_ref,
225        }
226    }
227
228    /// Returns the indices (into `root_elements`) of elements whose subtrees reference
229    /// the given marked content ID.
230    ///
231    /// Equivalent to calling [`find_elements_for_mcid`] with `&self.root_elements`.
232    pub fn elements_for_mcid(&self, mcid: i32) -> Vec<usize> {
233        find_elements_for_mcid(&self.root_elements, mcid)
234    }
235
236    /// Non-upstream alias — use [`elements_for_mcid()`](Self::elements_for_mcid).
237    #[deprecated(
238        note = "use `elements_for_mcid()` — no public `FPDF_StructTree_GetElementsForMcid` API"
239    )]
240    #[inline]
241    pub fn get_elements_for_mcid(&self, mcid: i32) -> Vec<usize> {
242        self.elements_for_mcid(mcid)
243    }
244
245    /// Iterate over all structure elements associated with the page at the given
246    /// zero-based index, resolved against the provided ordered list of page IDs.
247    ///
248    /// `page_ids` must be ordered so that `page_ids[page_index]` is the indirect
249    /// object ID of the page at that index — the same ordering as returned by
250    /// `collect_page_ids` in `rpdfium-page`.
251    ///
252    /// Returns an empty iterator if `page_index` is out of range.
253    pub fn elements_for_page<'a>(
254        &'a self,
255        page_index: usize,
256        page_ids: &[ObjectId],
257    ) -> ElementsForPage<'a> {
258        match page_ids.get(page_index) {
259            Some(&page_ref) => ElementsForPage {
260                stack: self.root_elements.iter().rev().collect(),
261                page_ref,
262            },
263            None => ElementsForPage {
264                stack: Vec::new(),
265                page_ref: ObjectId::new(0, 0),
266            },
267        }
268    }
269}
270
271/// An iterator over structure elements that reference a specific page.
272///
273/// Created by [`StructTree::elements_for_page_ref`] and [`StructTree::elements_for_page`].
274/// Traverses the structure tree iteratively (no recursion) in depth-first order,
275/// yielding every [`StructElement`] whose `page_ref` field matches the target page.
276pub struct ElementsForPage<'a> {
277    /// DFS traversal stack holding remaining unvisited elements.
278    stack: Vec<&'a StructElement>,
279    /// The page object ID we are filtering for.
280    page_ref: ObjectId,
281}
282
283impl<'a> Iterator for ElementsForPage<'a> {
284    type Item = &'a StructElement;
285
286    fn next(&mut self) -> Option<Self::Item> {
287        loop {
288            let elem = self.stack.pop()?;
289            // Push children in reverse order for correct left-to-right visitation.
290            for child in elem.children.iter().rev() {
291                self.stack.push(child);
292            }
293            if elem.page_ref == Some(self.page_ref) {
294                return Some(elem);
295            }
296        }
297    }
298}
299
300/// Parse the `/RoleMap` dictionary: maps custom tag names to standard tag names.
301fn parse_role_map<S: PdfSource>(
302    root_dict: &HashMap<Name, Object>,
303    store: &ObjectStore<S>,
304) -> HashMap<String, String> {
305    let mut map = HashMap::new();
306    let role_map_obj = match root_dict.get(&Name::role_map()) {
307        Some(obj) => match store.deep_resolve(obj) {
308            Ok(resolved) => resolved,
309            Err(_) => return map,
310        },
311        None => return map,
312    };
313
314    if let Some(dict) = role_map_obj.as_dict() {
315        for (key, value) in dict {
316            if let Some(target) = value.as_name() {
317                map.insert(key.as_str().into_owned(), target.as_str().into_owned());
318            }
319        }
320    }
321    map
322}
323
324/// Parse `/K` children, which can be a single element or an array.
325/// Returns a Vec of root-level StructElements built via iterative traversal.
326fn parse_k_children<S: PdfSource>(
327    k_obj: &Object,
328    store: &ObjectStore<S>,
329) -> DocResult<Vec<StructElement>> {
330    let resolved = store
331        .deep_resolve(k_obj)
332        .map_err(|e| DocError::Parser(e.to_string()))?;
333
334    // Collect top-level items from /K
335    let top_items: Vec<&Object> = match resolved {
336        Object::Array(arr) => arr.iter().collect(),
337        _ => vec![resolved],
338    };
339
340    // Each top-level item is either a struct element dict or an MCID integer.
341    // We parse struct element dicts iteratively using a stack.
342    // Result: flat list of (depth, StructElement) that we reconstruct into a tree.
343    let mut flat: Vec<(usize, StructElement)> = Vec::new();
344
345    // Stack entry for iterative DFS
346    struct StackEntry<'a> {
347        obj: &'a Object,
348        depth: usize,
349    }
350
351    // Push top-level items in reverse order for correct left-to-right processing
352    let mut stack: Vec<StackEntry<'_>> = Vec::new();
353    for item in top_items.iter().rev() {
354        stack.push(StackEntry {
355            obj: item,
356            depth: 0,
357        });
358    }
359
360    while let Some(entry) = stack.pop() {
361        if flat.len() >= MAX_ELEMENTS {
362            break;
363        }
364        if entry.depth > MAX_TREE_DEPTH {
365            return Err(DocError::DepthExceeded);
366        }
367
368        let resolved = match store.deep_resolve(entry.obj) {
369            Ok(r) => r,
370            Err(_) => continue,
371        };
372
373        // If it's an integer, it's a bare MCID at the top level — skip (no struct element)
374        if resolved.as_i64().is_some() {
375            continue;
376        }
377
378        let dict = match resolved.as_dict() {
379            Some(d) => d,
380            None => continue,
381        };
382
383        // Parse the structure element from this dictionary
384        let mut elem = parse_struct_element(dict, store);
385
386        // Process /K children: push child dicts onto stack, collect MCIDs into elem
387        if let Some(k_val) = dict.get(&Name::k()) {
388            let k_resolved = match store.deep_resolve(k_val) {
389                Ok(r) => r,
390                Err(_) => {
391                    flat.push((entry.depth, elem));
392                    continue;
393                }
394            };
395
396            match k_resolved {
397                Object::Integer(n) => {
398                    elem.mcids.push(*n);
399                }
400                Object::Dictionary(child_dict) => {
401                    // Could be a struct element dict or an MCID reference dict
402                    if let Some(mcid) = extract_mcid_from_dict(child_dict) {
403                        elem.mcids.push(mcid);
404                    } else {
405                        // It's a child struct element — push onto stack
406                        stack.push(StackEntry {
407                            obj: k_val,
408                            depth: entry.depth + 1,
409                        });
410                    }
411                }
412                Object::Array(arr) => {
413                    // Mixed array of MCIDs, MCID dicts, and child struct element dicts
414                    // Process in reverse order for correct left-to-right stack processing
415                    for child in arr.iter().rev() {
416                        let child_resolved = match store.deep_resolve(child) {
417                            Ok(r) => r,
418                            Err(_) => continue,
419                        };
420                        match child_resolved {
421                            Object::Integer(n) => {
422                                elem.mcids.push(*n);
423                            }
424                            Object::Dictionary(child_dict) => {
425                                if let Some(mcid) = extract_mcid_from_dict(child_dict) {
426                                    elem.mcids.push(mcid);
427                                } else {
428                                    stack.push(StackEntry {
429                                        obj: child,
430                                        depth: entry.depth + 1,
431                                    });
432                                }
433                            }
434                            _ => {}
435                        }
436                    }
437                    // MCIDs were pushed in reverse; restore original order
438                    elem.mcids.reverse();
439                }
440                _ => {}
441            }
442        }
443
444        flat.push((entry.depth, elem));
445    }
446
447    build_tree_from_flat(flat)
448}
449
450/// Extract MCID from a marked content reference dictionary (has `/Type /MCR` or just `/MCID`).
451fn extract_mcid_from_dict(dict: &HashMap<Name, Object>) -> Option<i64> {
452    dict.get(&Name::mcid()).and_then(|obj| obj.as_i64())
453}
454
455/// Reconstruct a tree from a flat depth-tagged list of elements.
456/// Uses the same index-path approach as the bookmark tree builder.
457fn build_tree_from_flat(flat: Vec<(usize, StructElement)>) -> DocResult<Vec<StructElement>> {
458    if flat.is_empty() {
459        return Ok(Vec::new());
460    }
461
462    let mut root: Vec<StructElement> = Vec::new();
463    let mut path: Vec<usize> = Vec::new();
464
465    for (depth, mut elem) in flat {
466        path.truncate(depth);
467        let container = get_children_at_path(&mut root, &path);
468        let idx = container.len();
469        // Root elements (depth == 0) have no parent; children record their position
470        // within their parent's `children` vec so callers can implement GetParent.
471        if depth > 0 {
472            elem.parent_index = Some(idx);
473        }
474        container.push(elem);
475        if path.len() <= depth {
476            path.push(idx);
477        }
478    }
479
480    Ok(root)
481}
482
483/// Iteratively navigate to the children vec at the given index path.
484fn get_children_at_path<'a>(
485    root: &'a mut Vec<StructElement>,
486    path: &[usize],
487) -> &'a mut Vec<StructElement> {
488    let mut current = root;
489    for &idx in path {
490        current = &mut current[idx].children;
491    }
492    current
493}
494
495/// Mapping from (page ObjectId, MCID) to structure element information.
496///
497/// Built by walking the structure tree and collecting all MCID associations.
498#[derive(Debug, Clone)]
499pub struct McidMapping {
500    /// Map from (page_object_id, mcid) to index into `elements`.
501    entries: HashMap<(ObjectId, i64), usize>,
502    /// Flat list of elements referenced by the mapping.
503    elements: Vec<StructElement>,
504}
505
506impl McidMapping {
507    /// Build an MCID mapping by iteratively walking the structure tree.
508    pub fn from_struct_tree(tree: &StructTree) -> Self {
509        let mut entries = HashMap::new();
510        let mut elements = Vec::new();
511
512        // Iterative walk of the tree
513        let mut stack: Vec<&StructElement> = tree.root_elements.iter().rev().collect();
514
515        while let Some(elem) = stack.pop() {
516            if !elem.mcids.is_empty() {
517                if let Some(page_id) = elem.page_ref {
518                    let idx = elements.len();
519                    elements.push(elem.clone());
520                    for &mcid in &elem.mcids {
521                        entries.insert((page_id, mcid), idx);
522                    }
523                }
524            }
525            // Push children in reverse for left-to-right processing
526            for child in elem.children.iter().rev() {
527                stack.push(child);
528            }
529        }
530
531        McidMapping { entries, elements }
532    }
533
534    /// Look up the structure element for a given page and MCID.
535    pub fn element_for_mcid(&self, page_id: ObjectId, mcid: i64) -> Option<&StructElement> {
536        self.entries
537            .get(&(page_id, mcid))
538            .map(|&idx| &self.elements[idx])
539    }
540}
541
542/// Structure elements filtered for a specific page.
543#[derive(Debug, Clone)]
544pub struct PageStructure {
545    /// Structure elements that reference this page.
546    pub elements: Vec<StructElement>,
547}
548
549impl PageStructure {
550    /// Filter the structure tree to elements referencing the given page.
551    pub fn for_page(tree: &StructTree, page_id: ObjectId) -> Self {
552        let mut elements = Vec::new();
553
554        // Iterative walk
555        let mut stack: Vec<&StructElement> = tree.root_elements.iter().rev().collect();
556
557        while let Some(elem) = stack.pop() {
558            if elem.page_ref == Some(page_id) {
559                elements.push(elem.clone());
560            }
561            for child in elem.children.iter().rev() {
562                stack.push(child);
563            }
564        }
565
566        PageStructure { elements }
567    }
568}
569
570#[cfg(test)]
571mod tests {
572    use super::*;
573    use rpdfium_core::PdfString;
574
575    fn build_store() -> ObjectStore<Vec<u8>> {
576        let pdf = build_minimal_pdf();
577        ObjectStore::open(pdf, rpdfium_core::ParsingMode::Lenient).unwrap()
578    }
579
580    fn build_minimal_pdf() -> Vec<u8> {
581        let mut pdf = Vec::new();
582        pdf.extend_from_slice(b"%PDF-1.4\n");
583        let obj1_offset = pdf.len();
584        pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
585        let obj2_offset = pdf.len();
586        pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
587        let xref_offset = pdf.len();
588        pdf.extend_from_slice(b"xref\n0 3\n");
589        pdf.extend_from_slice(b"0000000000 65535 f \r\n");
590        pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
591        pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
592        pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
593        pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
594        pdf
595    }
596
597    fn str_obj(s: &str) -> Object {
598        Object::String(PdfString::from_bytes(s.as_bytes().to_vec()))
599    }
600
601    fn name_obj(s: &str) -> Object {
602        Object::Name(Name::from(s))
603    }
604
605    /// Helper: build a struct element dict with a type tag.
606    fn struct_elem_dict(tag: &str) -> HashMap<Name, Object> {
607        let mut d = HashMap::new();
608        d.insert(Name::s(), name_obj(tag));
609        d
610    }
611
612    #[test]
613    fn test_no_struct_tree_root_returns_none() {
614        let store = build_store();
615        let catalog = HashMap::new();
616        let result = StructTree::from_catalog(&catalog, &store).unwrap();
617        assert!(result.is_none());
618    }
619
620    #[test]
621    fn test_empty_struct_tree_root() {
622        let store = build_store();
623        let root_dict = HashMap::new();
624        let mut catalog = HashMap::new();
625        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
626        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
627        assert!(tree.root_elements.is_empty());
628        assert!(tree.role_map.is_empty());
629    }
630
631    #[test]
632    fn test_basic_structure_tree_document_with_paragraphs() {
633        let store = build_store();
634
635        // Two paragraph children
636        let p1 = struct_elem_dict("P");
637        let p2 = struct_elem_dict("P");
638
639        // Document element with /K array of children
640        let mut doc = struct_elem_dict("Document");
641        doc.insert(
642            Name::k(),
643            Object::Array(vec![Object::Dictionary(p1), Object::Dictionary(p2)]),
644        );
645
646        let mut root_dict = HashMap::new();
647        root_dict.insert(Name::k(), Object::Dictionary(doc));
648
649        let mut catalog = HashMap::new();
650        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
651
652        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
653        assert_eq!(tree.root_elements.len(), 1);
654        assert_eq!(tree.root_elements[0].struct_type, "Document");
655        assert_eq!(tree.root_elements[0].children.len(), 2);
656        assert_eq!(tree.root_elements[0].children[0].struct_type, "P");
657        assert_eq!(tree.root_elements[0].children[1].struct_type, "P");
658    }
659
660    #[test]
661    fn test_mcid_from_integer_in_k() {
662        let store = build_store();
663
664        // A paragraph with a single MCID integer in /K
665        let mut p = struct_elem_dict("P");
666        p.insert(Name::k(), Object::Integer(42));
667        p.insert(Name::pg(), Object::Reference(ObjectId::new(5, 0)));
668
669        let mut root_dict = HashMap::new();
670        root_dict.insert(Name::k(), Object::Dictionary(p));
671
672        let mut catalog = HashMap::new();
673        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
674
675        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
676        assert_eq!(tree.root_elements.len(), 1);
677        assert_eq!(tree.root_elements[0].mcids, vec![42]);
678        assert_eq!(tree.root_elements[0].page_ref, Some(ObjectId::new(5, 0)));
679    }
680
681    #[test]
682    fn test_mcid_from_dict_in_k() {
683        let store = build_store();
684
685        // A marked content reference dict with /MCID
686        let mut mcr = HashMap::new();
687        mcr.insert(Name::mcid(), Object::Integer(7));
688
689        let mut p = struct_elem_dict("Span");
690        p.insert(Name::k(), Object::Dictionary(mcr));
691        p.insert(Name::pg(), Object::Reference(ObjectId::new(3, 0)));
692
693        let mut root_dict = HashMap::new();
694        root_dict.insert(Name::k(), Object::Dictionary(p));
695
696        let mut catalog = HashMap::new();
697        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
698
699        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
700        assert_eq!(tree.root_elements[0].mcids, vec![7]);
701    }
702
703    #[test]
704    fn test_alt_text_extraction() {
705        let store = build_store();
706
707        let mut fig = struct_elem_dict("Figure");
708        fig.insert(Name::alt(), str_obj("A photo of a cat"));
709
710        let mut root_dict = HashMap::new();
711        root_dict.insert(Name::k(), Object::Dictionary(fig));
712
713        let mut catalog = HashMap::new();
714        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
715
716        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
717        assert_eq!(
718            tree.root_elements[0].alt_text.as_deref(),
719            Some("A photo of a cat")
720        );
721    }
722
723    #[test]
724    fn test_nested_structure_elements() {
725        let store = build_store();
726
727        // Span inside P inside Document
728        let span = struct_elem_dict("Span");
729        let mut p = struct_elem_dict("P");
730        p.insert(Name::k(), Object::Dictionary(span));
731
732        let mut doc = struct_elem_dict("Document");
733        doc.insert(Name::k(), Object::Dictionary(p));
734
735        let mut root_dict = HashMap::new();
736        root_dict.insert(Name::k(), Object::Dictionary(doc));
737
738        let mut catalog = HashMap::new();
739        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
740
741        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
742        assert_eq!(tree.root_elements[0].struct_type, "Document");
743        assert_eq!(tree.root_elements[0].children[0].struct_type, "P");
744        assert_eq!(
745            tree.root_elements[0].children[0].children[0].struct_type,
746            "Span"
747        );
748    }
749
750    #[test]
751    fn test_role_mapping() {
752        let store = build_store();
753
754        // RoleMap: "MyTag" -> "P"
755        let mut role_map_dict = HashMap::new();
756        role_map_dict.insert(Name::from("MyTag"), name_obj("P"));
757        role_map_dict.insert(Name::from("CustomH"), name_obj("H1"));
758
759        let mut root_dict = HashMap::new();
760        root_dict.insert(Name::role_map(), Object::Dictionary(role_map_dict));
761
762        let mut catalog = HashMap::new();
763        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
764
765        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
766        assert_eq!(tree.role_map.get("MyTag"), Some(&"P".to_string()));
767        assert_eq!(tree.role_map.get("CustomH"), Some(&"H1".to_string()));
768    }
769
770    #[test]
771    fn test_mixed_k_content_dicts_and_integers() {
772        let store = build_store();
773
774        // MCID reference dict
775        let mut mcr = HashMap::new();
776        mcr.insert(Name::mcid(), Object::Integer(3));
777
778        // Child struct element
779        let child = struct_elem_dict("Span");
780
781        // Parent with mixed /K: integer, MCR dict, struct element
782        let mut p = struct_elem_dict("P");
783        p.insert(Name::pg(), Object::Reference(ObjectId::new(10, 0)));
784        p.insert(
785            Name::k(),
786            Object::Array(vec![
787                Object::Integer(1),
788                Object::Dictionary(mcr),
789                Object::Dictionary(child),
790            ]),
791        );
792
793        let mut root_dict = HashMap::new();
794        root_dict.insert(Name::k(), Object::Dictionary(p));
795
796        let mut catalog = HashMap::new();
797        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
798
799        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
800        let elem = &tree.root_elements[0];
801        assert_eq!(elem.struct_type, "P");
802        assert_eq!(elem.mcids, vec![1, 3]);
803        assert_eq!(elem.children.len(), 1);
804        assert_eq!(elem.children[0].struct_type, "Span");
805    }
806
807    #[test]
808    fn test_page_structure_filtering() {
809        let store = build_store();
810
811        let page1 = ObjectId::new(5, 0);
812        let page2 = ObjectId::new(6, 0);
813
814        let mut p1 = struct_elem_dict("P");
815        p1.insert(Name::pg(), Object::Reference(page1));
816        let mut p2 = struct_elem_dict("P");
817        p2.insert(Name::pg(), Object::Reference(page2));
818        let mut p3 = struct_elem_dict("P");
819        p3.insert(Name::pg(), Object::Reference(page1));
820
821        let mut doc = struct_elem_dict("Document");
822        doc.insert(
823            Name::k(),
824            Object::Array(vec![
825                Object::Dictionary(p1),
826                Object::Dictionary(p2),
827                Object::Dictionary(p3),
828            ]),
829        );
830
831        let mut root_dict = HashMap::new();
832        root_dict.insert(Name::k(), Object::Dictionary(doc));
833
834        let mut catalog = HashMap::new();
835        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
836
837        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
838        let page1_struct = PageStructure::for_page(&tree, page1);
839        assert_eq!(page1_struct.elements.len(), 2);
840        for elem in &page1_struct.elements {
841            assert_eq!(elem.page_ref, Some(page1));
842        }
843
844        let page2_struct = PageStructure::for_page(&tree, page2);
845        assert_eq!(page2_struct.elements.len(), 1);
846    }
847
848    #[test]
849    fn test_mcid_mapping_lookup() {
850        let store = build_store();
851
852        let page_id = ObjectId::new(7, 0);
853
854        let mut p = struct_elem_dict("P");
855        p.insert(Name::pg(), Object::Reference(page_id));
856        p.insert(
857            Name::k(),
858            Object::Array(vec![Object::Integer(0), Object::Integer(1)]),
859        );
860
861        let mut root_dict = HashMap::new();
862        root_dict.insert(Name::k(), Object::Dictionary(p));
863
864        let mut catalog = HashMap::new();
865        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
866
867        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
868        let mapping = McidMapping::from_struct_tree(&tree);
869
870        let elem = mapping.element_for_mcid(page_id, 0).unwrap();
871        assert_eq!(elem.struct_type, "P");
872        let elem1 = mapping.element_for_mcid(page_id, 1).unwrap();
873        assert_eq!(elem1.struct_type, "P");
874        assert!(mapping.element_for_mcid(page_id, 99).is_none());
875        assert!(mapping.element_for_mcid(ObjectId::new(999, 0), 0).is_none());
876    }
877
878    #[test]
879    fn test_security_limit_truncates_large_tree() {
880        let store = build_store();
881
882        // Build an array with MAX_ELEMENTS + 10 struct element dicts
883        let count = MAX_ELEMENTS + 10;
884        let arr: Vec<Object> = (0..count)
885            .map(|_| Object::Dictionary(struct_elem_dict("P")))
886            .collect();
887
888        let mut doc = struct_elem_dict("Document");
889        doc.insert(Name::k(), Object::Array(arr));
890
891        let mut root_dict = HashMap::new();
892        root_dict.insert(Name::k(), Object::Dictionary(doc));
893
894        let mut catalog = HashMap::new();
895        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
896
897        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
898        // The Document element itself counts as 1, and children are capped
899        let total = count_elements(&tree.root_elements);
900        assert!(total <= MAX_ELEMENTS + 1); // +1 for the Document root
901    }
902
903    /// Count total elements in a tree (iteratively).
904    fn count_elements(roots: &[StructElement]) -> usize {
905        let mut count = 0;
906        let mut stack: Vec<&StructElement> = roots.iter().collect();
907        while let Some(elem) = stack.pop() {
908            count += 1;
909            for child in &elem.children {
910                stack.push(child);
911            }
912        }
913        count
914    }
915
916    #[test]
917    fn test_title_and_id_extraction() {
918        let store = build_store();
919
920        let mut elem = struct_elem_dict("Table");
921        elem.insert(Name::t(), str_obj("Sales Data 2026"));
922        elem.insert(Name::id(), str_obj("table-001"));
923
924        let mut root_dict = HashMap::new();
925        root_dict.insert(Name::k(), Object::Dictionary(elem));
926
927        let mut catalog = HashMap::new();
928        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
929
930        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
931        assert_eq!(
932            tree.root_elements[0].title.as_deref(),
933            Some("Sales Data 2026")
934        );
935        assert_eq!(tree.root_elements[0].id.as_deref(), Some("table-001"));
936    }
937
938    #[test]
939    fn test_role_map_name_for_lookup() {
940        let store = build_store();
941
942        let mut role_map_dict = HashMap::new();
943        role_map_dict.insert(Name::from("MyTag"), name_obj("P"));
944        role_map_dict.insert(Name::from("CustomH"), name_obj("H1"));
945
946        let mut root_dict = HashMap::new();
947        root_dict.insert(Name::role_map(), Object::Dictionary(role_map_dict));
948
949        let mut catalog = HashMap::new();
950        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
951
952        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
953        assert_eq!(tree.role_map_name_for("MyTag"), "P");
954        assert_eq!(tree.role_map_name_for("CustomH"), "H1");
955        // Unknown tag returns itself
956        assert_eq!(tree.role_map_name_for("P"), "P");
957        assert_eq!(tree.role_map_name_for("UnknownTag"), "UnknownTag");
958    }
959
960    #[test]
961    fn test_actual_text_and_lang() {
962        let store = build_store();
963
964        let mut span = struct_elem_dict("Span");
965        span.insert(Name::actual_text(), str_obj("Hello World"));
966        span.insert(Name::lang(), str_obj("en-US"));
967
968        let mut root_dict = HashMap::new();
969        root_dict.insert(Name::k(), Object::Dictionary(span));
970
971        let mut catalog = HashMap::new();
972        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
973
974        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
975        assert_eq!(
976            tree.root_elements[0].actual_text.as_deref(),
977            Some("Hello World")
978        );
979        assert_eq!(tree.root_elements[0].lang.as_deref(), Some("en-US"));
980    }
981
982    #[test]
983    fn test_struct_element_with_attributes() {
984        let store = build_store();
985
986        // Build an attribute dict with /O owner and some entries
987        let mut attr_dict = HashMap::new();
988        attr_dict.insert(Name::o(), name_obj("Layout"));
989        attr_dict.insert(Name::from("WritingMode"), name_obj("LrTb"));
990        attr_dict.insert(Name::from("SpaceBefore"), Object::Real(12.0));
991
992        let mut td = struct_elem_dict("TD");
993        td.insert(Name::a(), Object::Dictionary(attr_dict));
994
995        let mut root_dict = HashMap::new();
996        root_dict.insert(Name::k(), Object::Dictionary(td));
997
998        let mut catalog = HashMap::new();
999        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
1000
1001        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
1002        let elem = &tree.root_elements[0];
1003        assert_eq!(elem.struct_type, "TD");
1004        assert_eq!(elem.attributes.len(), 1);
1005        assert_eq!(elem.attributes[0].owner, "Layout");
1006        assert!(elem.attributes[0].entries.len() >= 2);
1007
1008        // Check that at least one entry has the expected value
1009        let writing_mode = elem.attributes[0]
1010            .entries
1011            .iter()
1012            .find(|(k, _)| k == "WritingMode");
1013        assert!(writing_mode.is_some());
1014        match &writing_mode.unwrap().1 {
1015            AttributeValue::Name(n) => assert_eq!(n, "LrTb"),
1016            _ => panic!("expected Name attribute value"),
1017        }
1018    }
1019
1020    #[test]
1021    fn test_struct_element_with_attribute_array() {
1022        let store = build_store();
1023
1024        let mut attr1 = HashMap::new();
1025        attr1.insert(Name::o(), name_obj("Layout"));
1026        attr1.insert(Name::from("TextAlign"), name_obj("Center"));
1027
1028        let mut attr2 = HashMap::new();
1029        attr2.insert(Name::o(), name_obj("Table"));
1030        attr2.insert(Name::from("RowSpan"), Object::Integer(2));
1031
1032        let mut td = struct_elem_dict("TD");
1033        td.insert(
1034            Name::a(),
1035            Object::Array(vec![Object::Dictionary(attr1), Object::Dictionary(attr2)]),
1036        );
1037
1038        let mut root_dict = HashMap::new();
1039        root_dict.insert(Name::k(), Object::Dictionary(td));
1040
1041        let mut catalog = HashMap::new();
1042        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
1043
1044        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
1045        let elem = &tree.root_elements[0];
1046        assert_eq!(elem.attributes.len(), 2);
1047        assert_eq!(elem.attributes[0].owner, "Layout");
1048        assert_eq!(elem.attributes[1].owner, "Table");
1049    }
1050
1051    #[test]
1052    fn test_struct_element_no_attributes() {
1053        let store = build_store();
1054
1055        let p = struct_elem_dict("P");
1056
1057        let mut root_dict = HashMap::new();
1058        root_dict.insert(Name::k(), Object::Dictionary(p));
1059
1060        let mut catalog = HashMap::new();
1061        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
1062
1063        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
1064        assert!(tree.root_elements[0].attributes.is_empty());
1065    }
1066
1067    #[test]
1068    fn test_struct_element_obj_type_none_by_default() {
1069        let store = build_store();
1070
1071        let p = struct_elem_dict("P");
1072        let mut root_dict = HashMap::new();
1073        root_dict.insert(Name::k(), Object::Dictionary(p));
1074        let mut catalog = HashMap::new();
1075        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
1076
1077        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
1078        assert!(tree.root_elements[0].obj_type.is_none());
1079    }
1080
1081    #[test]
1082    fn test_struct_element_obj_type_parsed() {
1083        let store = build_store();
1084
1085        let mut elem = struct_elem_dict("Span");
1086        elem.insert(Name::obj_type(), Object::Name(Name::from("Elem")));
1087
1088        let mut root_dict = HashMap::new();
1089        root_dict.insert(Name::k(), Object::Dictionary(elem));
1090        let mut catalog = HashMap::new();
1091        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
1092
1093        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
1094        assert_eq!(tree.root_elements[0].obj_type.as_deref(), Some("Elem"));
1095    }
1096
1097    // --- elements_for_page_ref / elements_for_page tests ---
1098
1099    /// `elements_for_page_ref` returns only elements that reference the given page object ID.
1100    #[test]
1101    fn test_elements_for_page_ref_filters_correctly() {
1102        let store = build_store();
1103
1104        let page1 = ObjectId::new(5, 0);
1105        let page2 = ObjectId::new(6, 0);
1106
1107        // Three paragraphs: two on page1, one on page2.
1108        let mut p1 = struct_elem_dict("P");
1109        p1.insert(Name::pg(), Object::Reference(page1));
1110        let mut p2 = struct_elem_dict("H1");
1111        p2.insert(Name::pg(), Object::Reference(page2));
1112        let mut p3 = struct_elem_dict("Span");
1113        p3.insert(Name::pg(), Object::Reference(page1));
1114
1115        let mut doc = struct_elem_dict("Document");
1116        doc.insert(
1117            Name::k(),
1118            Object::Array(vec![
1119                Object::Dictionary(p1),
1120                Object::Dictionary(p2),
1121                Object::Dictionary(p3),
1122            ]),
1123        );
1124
1125        let mut root_dict = HashMap::new();
1126        root_dict.insert(Name::k(), Object::Dictionary(doc));
1127        let mut catalog = HashMap::new();
1128        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
1129
1130        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
1131
1132        // Filter by page1 — should get "P" and "Span".
1133        let page1_elems: Vec<&StructElement> = tree.elements_for_page_ref(page1).collect();
1134        assert_eq!(page1_elems.len(), 2);
1135        assert!(page1_elems.iter().all(|e| e.page_ref == Some(page1)));
1136        let types: Vec<&str> = page1_elems.iter().map(|e| e.struct_type.as_str()).collect();
1137        assert!(types.contains(&"P"));
1138        assert!(types.contains(&"Span"));
1139
1140        // Filter by page2 — should get only "H1".
1141        let page2_elems: Vec<&StructElement> = tree.elements_for_page_ref(page2).collect();
1142        assert_eq!(page2_elems.len(), 1);
1143        assert_eq!(page2_elems[0].struct_type, "H1");
1144
1145        // Filter by a page not in the tree — should get nothing.
1146        let absent = ObjectId::new(99, 0);
1147        let absent_elems: Vec<&StructElement> = tree.elements_for_page_ref(absent).collect();
1148        assert!(absent_elems.is_empty());
1149    }
1150
1151    /// `elements_for_page` with a valid page_index selects the correct page's elements.
1152    #[test]
1153    fn test_elements_for_page_with_valid_index() {
1154        let store = build_store();
1155
1156        let page0 = ObjectId::new(10, 0);
1157        let page1 = ObjectId::new(11, 0);
1158
1159        let mut h1 = struct_elem_dict("H1");
1160        h1.insert(Name::pg(), Object::Reference(page0));
1161        let mut p = struct_elem_dict("P");
1162        p.insert(Name::pg(), Object::Reference(page1));
1163
1164        let mut doc = struct_elem_dict("Document");
1165        doc.insert(
1166            Name::k(),
1167            Object::Array(vec![Object::Dictionary(h1), Object::Dictionary(p)]),
1168        );
1169
1170        let mut root_dict = HashMap::new();
1171        root_dict.insert(Name::k(), Object::Dictionary(doc));
1172        let mut catalog = HashMap::new();
1173        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
1174
1175        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
1176
1177        // Simulate a page_ids list: page0 at index 0, page1 at index 1.
1178        let page_ids = vec![page0, page1];
1179
1180        let idx0_elems: Vec<&StructElement> = tree.elements_for_page(0, &page_ids).collect();
1181        assert_eq!(idx0_elems.len(), 1);
1182        assert_eq!(idx0_elems[0].struct_type, "H1");
1183
1184        let idx1_elems: Vec<&StructElement> = tree.elements_for_page(1, &page_ids).collect();
1185        assert_eq!(idx1_elems.len(), 1);
1186        assert_eq!(idx1_elems[0].struct_type, "P");
1187    }
1188
1189    // --- find_elements_for_mcid tests ---
1190
1191    /// `find_elements_for_mcid` on an empty slice returns empty vec.
1192    #[test]
1193    fn test_find_elements_for_mcid_empty() {
1194        let result = find_elements_for_mcid(&[], 42);
1195        assert!(result.is_empty());
1196    }
1197
1198    /// `find_elements_for_mcid` returns the index of an element with a matching MCID.
1199    #[test]
1200    fn test_find_elements_for_mcid_found() {
1201        // Build two root elements: index 0 has MCID 5, index 1 has MCID 10.
1202        let elem0 = StructElement {
1203            struct_type: "P".to_string(),
1204            obj_type: None,
1205            alt_text: None,
1206            actual_text: None,
1207            lang: None,
1208            title: None,
1209            id: None,
1210            page_ref: None,
1211            mcids: vec![5],
1212            children: Vec::new(),
1213            attributes: Vec::new(),
1214            parent_index: None,
1215        };
1216        let elem1 = StructElement {
1217            struct_type: "Span".to_string(),
1218            obj_type: None,
1219            alt_text: None,
1220            actual_text: None,
1221            lang: None,
1222            title: None,
1223            id: None,
1224            page_ref: None,
1225            mcids: vec![10],
1226            children: Vec::new(),
1227            attributes: Vec::new(),
1228            parent_index: None,
1229        };
1230        let elements = vec![elem0, elem1];
1231
1232        // MCID 5 is in element at index 0.
1233        let result = find_elements_for_mcid(&elements, 5);
1234        assert_eq!(result, vec![0]);
1235
1236        // MCID 10 is in element at index 1.
1237        let result = find_elements_for_mcid(&elements, 10);
1238        assert_eq!(result, vec![1]);
1239
1240        // MCID from a child element returns the root index.
1241        let child = StructElement {
1242            struct_type: "Span".to_string(),
1243            obj_type: None,
1244            alt_text: None,
1245            actual_text: None,
1246            lang: None,
1247            title: None,
1248            id: None,
1249            page_ref: None,
1250            mcids: vec![99],
1251            children: Vec::new(),
1252            attributes: Vec::new(),
1253            parent_index: None,
1254        };
1255        let parent = StructElement {
1256            struct_type: "P".to_string(),
1257            obj_type: None,
1258            alt_text: None,
1259            actual_text: None,
1260            lang: None,
1261            title: None,
1262            id: None,
1263            page_ref: None,
1264            mcids: Vec::new(),
1265            children: vec![child],
1266            attributes: Vec::new(),
1267            parent_index: None,
1268        };
1269        let result = find_elements_for_mcid(&[parent], 99);
1270        assert_eq!(result, vec![0]);
1271    }
1272
1273    /// `find_elements_for_mcid` returns empty vec when the MCID is not present.
1274    #[test]
1275    fn test_find_elements_for_mcid_not_found() {
1276        let elem = StructElement {
1277            struct_type: "P".to_string(),
1278            obj_type: None,
1279            alt_text: None,
1280            actual_text: None,
1281            lang: None,
1282            title: None,
1283            id: None,
1284            page_ref: None,
1285            mcids: vec![1, 2, 3],
1286            children: Vec::new(),
1287            attributes: Vec::new(),
1288            parent_index: None,
1289        };
1290        let result = find_elements_for_mcid(&[elem], 999);
1291        assert!(result.is_empty());
1292    }
1293
1294    /// `elements_for_page` with an out-of-range index returns an empty iterator.
1295    #[test]
1296    fn test_elements_for_page_out_of_range_returns_empty() {
1297        let store = build_store();
1298
1299        let page0 = ObjectId::new(20, 0);
1300
1301        let mut p = struct_elem_dict("P");
1302        p.insert(Name::pg(), Object::Reference(page0));
1303
1304        let mut root_dict = HashMap::new();
1305        root_dict.insert(Name::k(), Object::Dictionary(p));
1306        let mut catalog = HashMap::new();
1307        catalog.insert(Name::struct_tree_root(), Object::Dictionary(root_dict));
1308
1309        let tree = StructTree::from_catalog(&catalog, &store).unwrap().unwrap();
1310
1311        let page_ids = vec![page0];
1312        // Index 5 is out of range for a single-page list.
1313        let elems: Vec<&StructElement> = tree.elements_for_page(5, &page_ids).collect();
1314        assert!(elems.is_empty());
1315    }
1316}