Skip to main content

rpdfium_doc/
number_tree.rs

1//! PDF number tree parser (ISO 32000-2 section 7.9.7).
2//!
3//! A number tree maps integer keys to values. It has the same structure as
4//! a name tree but uses `/Nums` instead of `/Names`.
5
6use std::collections::HashMap;
7
8use rpdfium_core::{Name, PdfSource};
9use rpdfium_parser::{Object, ObjectStore};
10
11use crate::error::{DocError, DocResult};
12
13/// Maximum traversal depth for number trees.
14const MAX_TREE_DEPTH: usize = 64;
15
16/// A parsed number tree mapping integer keys to values of type `V`.
17#[derive(Debug, Clone)]
18pub struct NumberTree<V> {
19    entries: Vec<(i64, V)>,
20}
21
22impl<V> NumberTree<V> {
23    /// Parse a number tree starting from the given root object.
24    ///
25    /// The `convert` function transforms each value `Object` into a `V`.
26    /// Traversal is fully iterative using an explicit stack.
27    pub fn parse<S: PdfSource, F>(
28        root: &Object,
29        store: &ObjectStore<S>,
30        convert: F,
31    ) -> DocResult<Self>
32    where
33        F: Fn(&Object, &ObjectStore<S>) -> DocResult<V>,
34    {
35        let root_dict = resolve_dict(root, store)?;
36        let mut entries = Vec::new();
37
38        // Stack of (dict, depth) for iterative traversal
39        let mut stack: Vec<(&HashMap<Name, Object>, usize)> = vec![(root_dict, 0)];
40
41        while let Some((dict, depth)) = stack.pop() {
42            if depth > MAX_TREE_DEPTH {
43                return Err(DocError::DepthExceeded);
44            }
45
46            // Leaf node: has /Nums array
47            if let Some(Object::Array(nums_arr)) = dict.get(&Name::nums()) {
48                let mut i = 0;
49                while i + 1 < nums_arr.len() {
50                    let key_obj = store
51                        .deep_resolve(&nums_arr[i])
52                        .map_err(|e| DocError::Parser(e.to_string()))?;
53                    let val_obj = store
54                        .deep_resolve(&nums_arr[i + 1])
55                        .map_err(|e| DocError::Parser(e.to_string()))?;
56
57                    let key = key_obj.as_i64().ok_or(DocError::UnexpectedType)?;
58                    let value = convert(val_obj, store)?;
59                    entries.push((key, value));
60                    i += 2;
61                }
62            }
63
64            // Intermediate node: has /Kids array
65            if let Some(Object::Array(kids)) = dict.get(&Name::kids()) {
66                for kid in kids.iter().rev() {
67                    let kid_obj = store
68                        .deep_resolve(kid)
69                        .map_err(|e| DocError::Parser(e.to_string()))?;
70                    let kid_dict = kid_obj.as_dict().ok_or(DocError::UnexpectedType)?;
71                    stack.push((kid_dict, depth + 1));
72                }
73            }
74        }
75
76        // Sort by key for binary search in floor
77        entries.sort_by_key(|(k, _)| *k);
78
79        Ok(Self { entries })
80    }
81
82    /// Look up a value by exact integer key.
83    pub fn get(&self, num: i64) -> Option<&V> {
84        self.entries
85            .binary_search_by_key(&num, |(k, _)| *k)
86            .ok()
87            .map(|idx| &self.entries[idx].1)
88    }
89
90    /// Find the entry with the largest key less than or equal to `num`.
91    ///
92    /// Corresponds to `CPDF_NumberTree::GetFloor()` in PDFium.
93    pub fn floor(&self, num: i64) -> Option<&V> {
94        match self.entries.binary_search_by_key(&num, |(k, _)| *k) {
95            Ok(idx) => Some(&self.entries[idx].1),
96            Err(idx) => {
97                if idx > 0 {
98                    Some(&self.entries[idx - 1].1)
99                } else {
100                    None
101                }
102            }
103        }
104    }
105
106    /// ADR-019 alias for [`floor()`](Self::floor).
107    ///
108    /// Corresponds to `CPDF_NumberTree::GetFloor()` in PDFium.
109    #[inline]
110    pub fn get_floor(&self, num: i64) -> Option<&V> {
111        self.floor(num)
112    }
113
114    /// Return all entries as a slice of `(i64, V)` pairs, sorted by key.
115    pub fn entries(&self) -> &[(i64, V)] {
116        &self.entries
117    }
118}
119
120/// Resolve an object to a dictionary reference.
121fn resolve_dict<'a, S: PdfSource>(
122    obj: &'a Object,
123    store: &'a ObjectStore<S>,
124) -> DocResult<&'a HashMap<Name, Object>> {
125    let resolved = store
126        .deep_resolve(obj)
127        .map_err(|e| DocError::Parser(e.to_string()))?;
128    resolved.as_dict().ok_or(DocError::UnexpectedType)
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134
135    fn int_obj(n: i64) -> Object {
136        Object::Integer(n)
137    }
138
139    fn convert_int<S: PdfSource>(obj: &Object, _store: &ObjectStore<S>) -> DocResult<i64> {
140        obj.as_i64().ok_or(DocError::UnexpectedType)
141    }
142
143    fn build_store() -> ObjectStore<Vec<u8>> {
144        let pdf = build_minimal_pdf();
145        ObjectStore::open(pdf, rpdfium_core::ParsingMode::Lenient).unwrap()
146    }
147
148    fn build_minimal_pdf() -> Vec<u8> {
149        let mut pdf = Vec::new();
150        pdf.extend_from_slice(b"%PDF-1.4\n");
151        let obj1_offset = pdf.len();
152        pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
153        let obj2_offset = pdf.len();
154        pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
155        let xref_offset = pdf.len();
156        pdf.extend_from_slice(b"xref\n0 3\n");
157        pdf.extend_from_slice(b"0000000000 65535 f \r\n");
158        pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
159        pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
160        pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
161        pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
162        pdf
163    }
164
165    #[test]
166    fn test_empty_tree() {
167        let store = build_store();
168        let root = Object::Dictionary(HashMap::new());
169        let tree = NumberTree::parse(&root, &store, convert_int).unwrap();
170        assert!(tree.entries().is_empty());
171    }
172
173    #[test]
174    fn test_single_level_leaf() {
175        let store = build_store();
176        let mut dict = HashMap::new();
177        dict.insert(
178            Name::nums(),
179            Object::Array(vec![int_obj(0), int_obj(100), int_obj(5), int_obj(500)]),
180        );
181        let root = Object::Dictionary(dict);
182        let tree = NumberTree::parse(&root, &store, convert_int).unwrap();
183        assert_eq!(tree.entries().len(), 2);
184        assert_eq!(tree.get(0), Some(&100));
185        assert_eq!(tree.get(5), Some(&500));
186    }
187
188    #[test]
189    fn test_get_floor_exact() {
190        let store = build_store();
191        let mut dict = HashMap::new();
192        dict.insert(
193            Name::nums(),
194            Object::Array(vec![int_obj(0), int_obj(10), int_obj(5), int_obj(50)]),
195        );
196        let root = Object::Dictionary(dict);
197        let tree = NumberTree::parse(&root, &store, convert_int).unwrap();
198        assert_eq!(tree.floor(5), Some(&50));
199    }
200
201    #[test]
202    fn test_get_floor_between() {
203        let store = build_store();
204        let mut dict = HashMap::new();
205        dict.insert(
206            Name::nums(),
207            Object::Array(vec![
208                int_obj(0),
209                int_obj(10),
210                int_obj(5),
211                int_obj(50),
212                int_obj(10),
213                int_obj(100),
214            ]),
215        );
216        let root = Object::Dictionary(dict);
217        let tree = NumberTree::parse(&root, &store, convert_int).unwrap();
218        // 7 is between 5 and 10, floor should be 5 -> value 50
219        assert_eq!(tree.floor(7), Some(&50));
220    }
221
222    #[test]
223    fn test_get_floor_before_first() {
224        let store = build_store();
225        let mut dict = HashMap::new();
226        dict.insert(Name::nums(), Object::Array(vec![int_obj(5), int_obj(50)]));
227        let root = Object::Dictionary(dict);
228        let tree = NumberTree::parse(&root, &store, convert_int).unwrap();
229        assert_eq!(tree.floor(3), None);
230    }
231
232    #[test]
233    fn test_two_level_tree() {
234        let store = build_store();
235
236        let mut leaf1 = HashMap::new();
237        leaf1.insert(Name::nums(), Object::Array(vec![int_obj(0), int_obj(100)]));
238
239        let mut leaf2 = HashMap::new();
240        leaf2.insert(Name::nums(), Object::Array(vec![int_obj(10), int_obj(200)]));
241
242        let mut root_dict = HashMap::new();
243        root_dict.insert(
244            Name::kids(),
245            Object::Array(vec![Object::Dictionary(leaf1), Object::Dictionary(leaf2)]),
246        );
247
248        let root = Object::Dictionary(root_dict);
249        let tree = NumberTree::parse(&root, &store, convert_int).unwrap();
250        assert_eq!(tree.entries().len(), 2);
251        assert_eq!(tree.get(0), Some(&100));
252        assert_eq!(tree.get(10), Some(&200));
253    }
254
255    #[test]
256    fn test_lookup_miss() {
257        let store = build_store();
258        let mut dict = HashMap::new();
259        dict.insert(Name::nums(), Object::Array(vec![int_obj(1), int_obj(11)]));
260        let root = Object::Dictionary(dict);
261        let tree = NumberTree::parse(&root, &store, convert_int).unwrap();
262        assert_eq!(tree.get(1), Some(&11));
263        assert_eq!(tree.get(99), None);
264    }
265}