Skip to main content

rpdfium_doc/
name_tree.rs

1//! PDF name tree parser (ISO 32000-2 section 7.9.6).
2//!
3//! A name tree is a balanced tree of nodes whose leaves contain sorted
4//! `(String, V)` pairs. Intermediate nodes point to children via `/Kids`.
5
6use std::collections::HashMap;
7
8use rpdfium_core::{Name, PdfSource};
9use rpdfium_parser::{Object, ObjectStore};
10
11use crate::error::{DocError, DocResult};
12
13/// Maximum traversal depth for name trees (prevents infinite loops).
14const MAX_TREE_DEPTH: usize = 64;
15
16/// A parsed name tree mapping string keys to values of type `V`.
17#[derive(Debug, Clone)]
18pub struct NameTree<V> {
19    entries: Vec<(String, V)>,
20}
21
22impl<V> NameTree<V> {
23    /// Parse a name tree starting from the given root object.
24    ///
25    /// The `convert` function transforms each value `Object` into a `V`.
26    /// Traversal is fully iterative using an explicit stack.
27    pub fn parse<S: PdfSource, F>(
28        root: &Object,
29        store: &ObjectStore<S>,
30        convert: F,
31    ) -> DocResult<Self>
32    where
33        F: Fn(&Object) -> DocResult<V>,
34    {
35        let root_dict = resolve_dict(root, store)?;
36        let mut entries = Vec::new();
37
38        // Stack of (dict, depth) for iterative traversal
39        let mut stack: Vec<(&HashMap<Name, Object>, usize)> = vec![(root_dict, 0)];
40
41        while let Some((dict, depth)) = stack.pop() {
42            if depth > MAX_TREE_DEPTH {
43                return Err(DocError::DepthExceeded);
44            }
45
46            // Leaf node: has /Names array
47            if let Some(Object::Array(names_arr)) = dict.get(&Name::names()) {
48                // /Names is [key1, value1, key2, value2, ...]
49                let mut i = 0;
50                while i + 1 < names_arr.len() {
51                    let key_obj = store
52                        .deep_resolve(&names_arr[i])
53                        .map_err(|e| DocError::Parser(e.to_string()))?;
54                    let val_obj = store
55                        .deep_resolve(&names_arr[i + 1])
56                        .map_err(|e| DocError::Parser(e.to_string()))?;
57
58                    let key = extract_string(key_obj)?;
59                    let value = convert(val_obj)?;
60                    entries.push((key, value));
61                    i += 2;
62                }
63            }
64
65            // Intermediate node: has /Kids array
66            if let Some(Object::Array(kids)) = dict.get(&Name::kids()) {
67                // Push children in reverse order so we process left-to-right
68                for kid in kids.iter().rev() {
69                    let kid_obj = store
70                        .deep_resolve(kid)
71                        .map_err(|e| DocError::Parser(e.to_string()))?;
72                    let kid_dict = kid_obj.as_dict().ok_or(DocError::UnexpectedType)?;
73                    stack.push((kid_dict, depth + 1));
74                }
75            }
76        }
77
78        Ok(Self { entries })
79    }
80
81    /// Return the number of entries in this name tree.
82    ///
83    /// Corresponds to `CPDF_NameTree::GetCount()` in PDFium.
84    pub fn count(&self) -> usize {
85        self.entries.len()
86    }
87
88    /// Upstream-aligned alias for [`count()`](Self::count).
89    ///
90    /// Corresponds to `CPDF_NameTree::GetCount()` in PDFium.
91    #[inline]
92    pub fn get_count(&self) -> usize {
93        self.count()
94    }
95
96    /// Look up a value by its string key.
97    pub fn get(&self, name: &str) -> Option<&V> {
98        self.entries.iter().find(|(k, _)| k == name).map(|(_, v)| v)
99    }
100
101    /// Return all entries as a slice of `(String, V)` pairs.
102    pub fn entries(&self) -> &[(String, V)] {
103        &self.entries
104    }
105
106    /// Add an entry to the name tree, maintaining sorted order.
107    ///
108    /// If an entry with the same name already exists it is replaced.
109    /// The in-memory tree is updated; to persist the change to a PDF file
110    /// use rpdfium-edit.
111    pub fn add(&mut self, name: String, value: V) -> DocResult<()> {
112        let pos = self
113            .entries
114            .partition_point(|(k, _)| k.as_str() < name.as_str());
115        // Replace if already present at that position
116        if self.entries.get(pos).is_some_and(|(k, _)| k == &name) {
117            self.entries[pos] = (name, value);
118        } else {
119            self.entries.insert(pos, (name, value));
120        }
121        Ok(())
122    }
123
124    /// Delete an entry by its index.
125    ///
126    /// Returns [`DocError::InvalidIndex`] if `index` is out of range.
127    pub fn delete_by_index(&mut self, index: usize) -> DocResult<()> {
128        if index >= self.entries.len() {
129            return Err(DocError::InvalidIndex(index));
130        }
131        self.entries.remove(index);
132        Ok(())
133    }
134
135    /// Delete an entry by its name key.
136    ///
137    /// Returns [`DocError::NotFound`] if no entry with the given name exists.
138    pub fn delete_by_name(&mut self, name: &str) -> DocResult<()> {
139        if let Some(pos) = self.entries.iter().position(|(k, _)| k == name) {
140            self.entries.remove(pos);
141            Ok(())
142        } else {
143            Err(DocError::NotFound(name.to_string()))
144        }
145    }
146}
147
148/// Resolve an object to a dictionary reference.
149fn resolve_dict<'a, S: PdfSource>(
150    obj: &'a Object,
151    store: &'a ObjectStore<S>,
152) -> DocResult<&'a HashMap<Name, Object>> {
153    let resolved = store
154        .deep_resolve(obj)
155        .map_err(|e| DocError::Parser(e.to_string()))?;
156    resolved.as_dict().ok_or(DocError::UnexpectedType)
157}
158
159/// Extract a string from a PDF object (either Name or String).
160fn extract_string(obj: &Object) -> DocResult<String> {
161    if let Some(s) = obj.as_string() {
162        return Ok(s.to_string_lossy());
163    }
164    if let Some(n) = obj.as_name() {
165        return Ok(n.as_str().into_owned());
166    }
167    Err(DocError::UnexpectedType)
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173    use rpdfium_core::PdfString;
174
175    fn str_obj(s: &str) -> Object {
176        Object::String(PdfString::from_bytes(s.as_bytes().to_vec()))
177    }
178
179    fn int_obj(n: i64) -> Object {
180        Object::Integer(n)
181    }
182
183    fn convert_int(obj: &Object) -> DocResult<i64> {
184        obj.as_i64().ok_or(DocError::UnexpectedType)
185    }
186
187    /// Helper to build a minimal PDF and ObjectStore for testing.
188    fn build_store() -> ObjectStore<Vec<u8>> {
189        let pdf = build_minimal_pdf();
190        ObjectStore::open(pdf, rpdfium_core::ParsingMode::Lenient).unwrap()
191    }
192
193    fn build_minimal_pdf() -> Vec<u8> {
194        let mut pdf = Vec::new();
195        pdf.extend_from_slice(b"%PDF-1.4\n");
196        let obj1_offset = pdf.len();
197        pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
198        let obj2_offset = pdf.len();
199        pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
200        let xref_offset = pdf.len();
201        pdf.extend_from_slice(b"xref\n0 3\n");
202        pdf.extend_from_slice(b"0000000000 65535 f \r\n");
203        pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
204        pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
205        pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
206        pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
207        pdf
208    }
209
210    #[test]
211    fn test_empty_tree() {
212        let store = build_store();
213        let root = Object::Dictionary(HashMap::new());
214        let tree = NameTree::parse(&root, &store, convert_int).unwrap();
215        assert!(tree.entries().is_empty());
216    }
217
218    #[test]
219    fn test_single_level_leaf() {
220        let store = build_store();
221        let mut dict = HashMap::new();
222        dict.insert(
223            Name::names(),
224            Object::Array(vec![
225                str_obj("alpha"),
226                int_obj(1),
227                str_obj("beta"),
228                int_obj(2),
229            ]),
230        );
231        let root = Object::Dictionary(dict);
232        let tree = NameTree::parse(&root, &store, convert_int).unwrap();
233        assert_eq!(tree.entries().len(), 2);
234        assert_eq!(tree.get("alpha"), Some(&1));
235        assert_eq!(tree.get("beta"), Some(&2));
236    }
237
238    #[test]
239    fn test_two_level_tree() {
240        let store = build_store();
241
242        // Leaf 1
243        let mut leaf1 = HashMap::new();
244        leaf1.insert(
245            Name::names(),
246            Object::Array(vec![str_obj("a"), int_obj(10)]),
247        );
248
249        // Leaf 2
250        let mut leaf2 = HashMap::new();
251        leaf2.insert(
252            Name::names(),
253            Object::Array(vec![str_obj("b"), int_obj(20)]),
254        );
255
256        // Root with /Kids
257        let mut root_dict = HashMap::new();
258        root_dict.insert(
259            Name::kids(),
260            Object::Array(vec![Object::Dictionary(leaf1), Object::Dictionary(leaf2)]),
261        );
262
263        let root = Object::Dictionary(root_dict);
264        let tree = NameTree::parse(&root, &store, convert_int).unwrap();
265        assert_eq!(tree.entries().len(), 2);
266        assert_eq!(tree.get("a"), Some(&10));
267        assert_eq!(tree.get("b"), Some(&20));
268    }
269
270    #[test]
271    fn test_lookup_miss() {
272        let store = build_store();
273        let mut dict = HashMap::new();
274        dict.insert(
275            Name::names(),
276            Object::Array(vec![str_obj("key"), int_obj(42)]),
277        );
278        let root = Object::Dictionary(dict);
279        let tree = NameTree::parse(&root, &store, convert_int).unwrap();
280        assert_eq!(tree.get("key"), Some(&42));
281        assert_eq!(tree.get("missing"), None);
282    }
283
284    #[test]
285    fn test_duplicate_key_keeps_first() {
286        let store = build_store();
287        let mut dict = HashMap::new();
288        dict.insert(
289            Name::names(),
290            Object::Array(vec![str_obj("dup"), int_obj(1), str_obj("dup"), int_obj(2)]),
291        );
292        let root = Object::Dictionary(dict);
293        let tree = NameTree::parse(&root, &store, convert_int).unwrap();
294        // Both entries present; get() returns first
295        assert_eq!(tree.get("dup"), Some(&1));
296        assert_eq!(tree.entries().len(), 2);
297    }
298
299    #[test]
300    fn test_add_inserts_sorted() {
301        let store = build_store();
302        let root = Object::Dictionary(HashMap::new());
303        let mut tree = NameTree::parse(&root, &store, convert_int).unwrap();
304        tree.add("beta".to_string(), 2).unwrap();
305        tree.add("alpha".to_string(), 1).unwrap();
306        tree.add("gamma".to_string(), 3).unwrap();
307        let entries = tree.entries();
308        assert_eq!(entries.len(), 3);
309        assert_eq!(entries[0].0, "alpha");
310        assert_eq!(entries[1].0, "beta");
311        assert_eq!(entries[2].0, "gamma");
312        assert_eq!(tree.get("beta"), Some(&2));
313    }
314
315    #[test]
316    fn test_add_replaces_existing_key() {
317        let store = build_store();
318        let root = Object::Dictionary(HashMap::new());
319        let mut tree = NameTree::parse(&root, &store, convert_int).unwrap();
320        tree.add("key".to_string(), 1).unwrap();
321        tree.add("key".to_string(), 99).unwrap();
322        assert_eq!(tree.entries().len(), 1);
323        assert_eq!(tree.get("key"), Some(&99));
324    }
325
326    #[test]
327    fn test_delete_by_index_removes_entry() {
328        let store = build_store();
329        let root = Object::Dictionary(HashMap::new());
330        let mut tree = NameTree::parse(&root, &store, convert_int).unwrap();
331        tree.add("a".to_string(), 1).unwrap();
332        tree.add("b".to_string(), 2).unwrap();
333        tree.delete_by_index(0).unwrap();
334        assert_eq!(tree.entries().len(), 1);
335        assert_eq!(tree.get("b"), Some(&2));
336    }
337
338    #[test]
339    fn test_delete_by_index_out_of_range() {
340        let store = build_store();
341        let root = Object::Dictionary(HashMap::new());
342        let mut tree = NameTree::parse(&root, &store, convert_int).unwrap();
343        let result = tree.delete_by_index(0);
344        assert!(matches!(result.unwrap_err(), DocError::InvalidIndex(0)));
345    }
346
347    #[test]
348    fn test_delete_by_name_removes_entry() {
349        let store = build_store();
350        let root = Object::Dictionary(HashMap::new());
351        let mut tree = NameTree::parse(&root, &store, convert_int).unwrap();
352        tree.add("key".to_string(), 42).unwrap();
353        tree.delete_by_name("key").unwrap();
354        assert!(tree.entries().is_empty());
355    }
356
357    #[test]
358    fn test_delete_by_name_not_found() {
359        let store = build_store();
360        let root = Object::Dictionary(HashMap::new());
361        let mut tree = NameTree::parse(&root, &store, convert_int).unwrap();
362        let result = tree.delete_by_name("missing");
363        assert!(matches!(result.unwrap_err(), DocError::NotFound(_)));
364    }
365
366    /// Upstream: TEST(CPDFNameTreeTest, GetUnicodeNameWithBOM)
367    ///
368    /// Verifies that a name tree with a BOM-prefixed key correctly resolves
369    /// the key when looking up by its decoded Unicode value.
370    #[test]
371    fn test_cpdf_name_tree_get_unicode_name_with_bom() {
372        let store = build_store();
373
374        // Build a Names array with a BOM-prefixed key "1" (U+0031).
375        // BOM = FE FF, then 00 31 => "1" in UTF-16BE.
376        let bom_key = PdfString::from_bytes(vec![0xFE, 0xFF, 0x00, 0x31]);
377        let mut dict = HashMap::new();
378        dict.insert(
379            Name::names(),
380            Object::Array(vec![Object::String(bom_key), int_obj(100)]),
381        );
382        let root = Object::Dictionary(dict);
383        let tree = NameTree::parse(&root, &store, convert_int).unwrap();
384
385        // The tree should have one entry.
386        assert_eq!(tree.count(), 1);
387
388        // The stored key is the decoded string "1".
389        let (stored_key, stored_val) = &tree.entries()[0];
390        assert_eq!(*stored_val, 100);
391        // The BOM-stripped key should start with "1" (the exact decoded form
392        // depends on to_string_lossy handling of UTF-16BE BOM bytes).
393        // In PDFium upstream, LookupValue(L"1") succeeds; here we verify
394        // the value is reachable.
395        assert!(tree.get(stored_key).is_some());
396        assert_eq!(*tree.get(stored_key).unwrap(), 100);
397    }
398
399    /// Upstream: TEST(CPDFNameTreeTest, GetFromTreeWithLimitsArrayWith4Items)
400    ///
401    /// After building a multi-level name tree, verifies that lookups still
402    /// succeed even if a /Limits array has excess elements (4 instead of 2).
403    /// In rpdfium the in-memory NameTree ignores /Limits entirely during
404    /// traversal (it reads leaf /Names directly), so the test verifies that
405    /// the full 3-level tree structure is correctly traversed.
406    #[test]
407    fn test_cpdf_name_tree_get_from_tree_with_limits_array_with_4_items() {
408        let store = build_store();
409
410        // Build the 3-level tree from the upstream FillNameTreeDict:
411        //   [root]
412        //     └── [kid1]
413        //           ├── [grandKid2]
414        //           │     ├── [greatGrandKid4] {1.txt:111, 2.txt:222}
415        //           │     └── [greatGrandKid5] {3.txt:333, 5.txt:555}
416        //           └── [grandKid3]            {9.txt:999}
417        //
418        // The upstream test then mutates grandKid3's /Limits to have 4 items
419        // and verifies that LookupValue("9.txt") still returns 999.
420        // Since rpdfium ignores /Limits, we just verify the full tree works.
421
422        let great_grand_kid4 = {
423            let mut d = HashMap::new();
424            d.insert(
425                Name::names(),
426                Object::Array(vec![
427                    str_obj("1.txt"),
428                    int_obj(111),
429                    str_obj("2.txt"),
430                    int_obj(222),
431                ]),
432            );
433            Object::Dictionary(d)
434        };
435        let great_grand_kid5 = {
436            let mut d = HashMap::new();
437            d.insert(
438                Name::names(),
439                Object::Array(vec![
440                    str_obj("3.txt"),
441                    int_obj(333),
442                    str_obj("5.txt"),
443                    int_obj(555),
444                ]),
445            );
446            Object::Dictionary(d)
447        };
448        let grand_kid2 = {
449            let mut d = HashMap::new();
450            d.insert(
451                Name::kids(),
452                Object::Array(vec![great_grand_kid4, great_grand_kid5]),
453            );
454            Object::Dictionary(d)
455        };
456        let grand_kid3 = {
457            let mut d = HashMap::new();
458            d.insert(
459                Name::names(),
460                Object::Array(vec![str_obj("9.txt"), int_obj(999)]),
461            );
462            // In upstream, /Limits is mutated to have 4 items here.
463            // rpdfium ignores /Limits, so we just include a malformed one.
464            d.insert(
465                Name::from("Limits"),
466                Object::Array(vec![
467                    str_obj("9.txt"),
468                    str_obj("9.txt"),
469                    int_obj(5),
470                    int_obj(6),
471                ]),
472            );
473            Object::Dictionary(d)
474        };
475        let kid1 = {
476            let mut d = HashMap::new();
477            d.insert(Name::kids(), Object::Array(vec![grand_kid2, grand_kid3]));
478            Object::Dictionary(d)
479        };
480        let mut root_dict = HashMap::new();
481        root_dict.insert(Name::kids(), Object::Array(vec![kid1]));
482        let root = Object::Dictionary(root_dict);
483
484        let tree = NameTree::parse(&root, &store, convert_int).unwrap();
485
486        // All 5 entries should be found.
487        assert_eq!(tree.count(), 5);
488        assert_eq!(tree.get("1.txt"), Some(&111));
489        assert_eq!(tree.get("2.txt"), Some(&222));
490        assert_eq!(tree.get("3.txt"), Some(&333));
491        assert_eq!(tree.get("5.txt"), Some(&555));
492        assert_eq!(tree.get("9.txt"), Some(&999));
493    }
494
495    #[test]
496    fn test_name_object_as_key() {
497        let store = build_store();
498        let mut dict = HashMap::new();
499        dict.insert(
500            Name::names(),
501            Object::Array(vec![Object::Name(Name::from("myname")), int_obj(99)]),
502        );
503        let root = Object::Dictionary(dict);
504        let tree = NameTree::parse(&root, &store, convert_int).unwrap();
505        assert_eq!(tree.get("myname"), Some(&99));
506    }
507}