Skip to main content

oxidize_pdf/structure/
page_tree.rs

1//! Page tree structure according to ISO 32000-1 Section 7.7.3
2
3#![allow(clippy::large_enum_variant, clippy::collapsible_match)]
4
5use crate::error::{PdfError, Result};
6use crate::geometry::Rectangle;
7use crate::objects::{Array, Dictionary, Object, ObjectId};
8use crate::page::Page;
9use std::collections::HashMap;
10
11/// Page tree node type
12#[derive(Clone)]
13pub enum PageTreeNode {
14    /// Pages node (intermediate node)
15    Pages {
16        /// Child nodes
17        kids: Vec<ObjectId>,
18        /// Total page count in subtree
19        count: u32,
20        /// Media box inherited by children
21        media_box: Option<Rectangle>,
22        /// Resources inherited by children
23        resources: Option<Dictionary>,
24        /// Parent node reference
25        parent: Option<ObjectId>,
26    },
27    /// Page node (leaf node)
28    Page {
29        /// Page content
30        page: Page,
31        /// Parent node reference
32        parent: ObjectId,
33    },
34}
35
36/// Page tree structure
37pub struct PageTree {
38    /// Root node ID
39    root: ObjectId,
40    /// All nodes in the tree
41    nodes: HashMap<ObjectId, PageTreeNode>,
42    /// Page to node mapping
43    page_map: HashMap<u32, ObjectId>,
44}
45
46impl PageTree {
47    /// Create new page tree
48    pub fn new(root_id: ObjectId) -> Self {
49        let mut nodes = HashMap::new();
50        nodes.insert(
51            root_id,
52            PageTreeNode::Pages {
53                kids: Vec::new(),
54                count: 0,
55                media_box: None,
56                resources: None,
57                parent: None,
58            },
59        );
60
61        Self {
62            root: root_id,
63            nodes,
64            page_map: HashMap::new(),
65        }
66    }
67
68    /// Get root node ID
69    pub fn root(&self) -> ObjectId {
70        self.root
71    }
72
73    /// Get total page count
74    pub fn page_count(&self) -> u32 {
75        match self.nodes.get(&self.root) {
76            Some(PageTreeNode::Pages { count, .. }) => *count,
77            _ => 0,
78        }
79    }
80
81    /// Get page by index
82    pub fn get_page(&self, index: u32) -> Option<&Page> {
83        let node_id = self.page_map.get(&index)?;
84        match self.nodes.get(node_id)? {
85            PageTreeNode::Page { page, .. } => Some(page),
86            _ => None,
87        }
88    }
89
90    /// Add a page
91    pub fn add_page(&mut self, page: Page, page_id: ObjectId, parent_id: ObjectId) -> Result<()> {
92        // Add page node
93        self.nodes.insert(
94            page_id,
95            PageTreeNode::Page {
96                page,
97                parent: parent_id,
98            },
99        );
100
101        // Update parent
102        self.add_kid_to_parent(parent_id, page_id)?;
103
104        // Increment count for immediate parent and all ancestors
105        if let Some(PageTreeNode::Pages { count, .. }) = self.nodes.get_mut(&parent_id) {
106            *count += 1;
107        }
108        self.update_ancestor_counts(parent_id)?;
109
110        // Update page map
111        let page_index = self.page_count() - 1;
112        self.page_map.insert(page_index, page_id);
113
114        Ok(())
115    }
116
117    /// Add intermediate node
118    pub fn add_pages_node(&mut self, node_id: ObjectId, parent_id: Option<ObjectId>) -> Result<()> {
119        self.nodes.insert(
120            node_id,
121            PageTreeNode::Pages {
122                kids: Vec::new(),
123                count: 0,
124                media_box: None,
125                resources: None,
126                parent: parent_id,
127            },
128        );
129
130        if let Some(parent) = parent_id {
131            self.add_kid_to_parent(parent, node_id)?;
132        }
133
134        Ok(())
135    }
136
137    /// Add kid to parent node
138    fn add_kid_to_parent(&mut self, parent_id: ObjectId, kid_id: ObjectId) -> Result<()> {
139        match self.nodes.get_mut(&parent_id) {
140            Some(PageTreeNode::Pages { kids, .. }) => {
141                kids.push(kid_id);
142                // Don't increment count here - let update_ancestor_counts handle it if it's a page
143                Ok(())
144            }
145            _ => Err(PdfError::InvalidStructure(
146                "Parent is not a Pages node".to_string(),
147            )),
148        }
149    }
150
151    /// Update ancestor page counts
152    fn update_ancestor_counts(&mut self, node_id: ObjectId) -> Result<()> {
153        if let Some(PageTreeNode::Pages { parent, .. }) = self.nodes.get(&node_id) {
154            if let Some(parent_id) = parent {
155                let parent_id = *parent_id;
156                if let Some(PageTreeNode::Pages { count, .. }) = self.nodes.get_mut(&parent_id) {
157                    *count += 1;
158                    self.update_ancestor_counts(parent_id)?;
159                }
160            }
161        }
162        Ok(())
163    }
164
165    /// Convert node to dictionary
166    pub fn node_to_dict(&self, node_id: ObjectId) -> Result<Dictionary> {
167        let node = self
168            .nodes
169            .get(&node_id)
170            .ok_or_else(|| PdfError::InvalidStructure("Node not found".to_string()))?;
171
172        match node {
173            PageTreeNode::Pages {
174                kids,
175                count,
176                media_box,
177                resources,
178                parent,
179            } => {
180                let mut dict = Dictionary::new();
181                dict.set("Type", Object::Name("Pages".to_string()));
182
183                // Kids array
184                let kids_array: Array = kids.iter().map(|id| Object::Reference(*id)).collect();
185                dict.set("Kids", Object::Array(kids_array.into()));
186
187                dict.set("Count", Object::Integer(*count as i64));
188
189                if let Some(parent_id) = parent {
190                    dict.set("Parent", Object::Reference(*parent_id));
191                }
192
193                if let Some(mb) = media_box {
194                    let mb_array = Array::from(vec![
195                        Object::Real(mb.lower_left.x),
196                        Object::Real(mb.lower_left.y),
197                        Object::Real(mb.upper_right.x),
198                        Object::Real(mb.upper_right.y),
199                    ]);
200                    dict.set("MediaBox", Object::Array(mb_array.into()));
201                }
202
203                if let Some(res) = resources {
204                    dict.set("Resources", Object::Dictionary(res.clone()));
205                }
206
207                Ok(dict)
208            }
209            PageTreeNode::Page { page, parent } => {
210                // Convert page to dictionary
211                let mut dict = page.to_dict();
212                dict.set("Type", Object::Name("Page".to_string()));
213                dict.set("Parent", Object::Reference(*parent));
214                Ok(dict)
215            }
216        }
217    }
218}
219
220/// Page tree builder
221pub struct PageTreeBuilder {
222    /// Current tree
223    tree: PageTree,
224    /// Next object ID
225    next_id: u32,
226}
227
228impl PageTreeBuilder {
229    /// Create new builder
230    pub fn new(start_id: u32) -> Self {
231        let root_id = ObjectId::new(start_id, 0);
232        Self {
233            tree: PageTree::new(root_id),
234            next_id: start_id + 1,
235        }
236    }
237
238    /// Add pages in balanced tree structure
239    pub fn add_pages(&mut self, pages: Vec<Page>) -> Result<()> {
240        // For simplicity, add all pages under root
241        // In production, create intermediate nodes for better performance
242        for page in pages {
243            let page_id = ObjectId::new(self.next_id, 0);
244            self.next_id += 1;
245            self.tree.add_page(page, page_id, self.tree.root)?;
246        }
247        Ok(())
248    }
249
250    /// Build the tree
251    pub fn build(self) -> PageTree {
252        self.tree
253    }
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259
260    #[test]
261    fn test_page_tree_new() {
262        let root_id = ObjectId::new(1, 0);
263        let tree = PageTree::new(root_id);
264        assert_eq!(tree.root(), root_id);
265        assert_eq!(tree.page_count(), 0);
266    }
267
268    #[test]
269    fn test_add_page() {
270        let root_id = ObjectId::new(1, 0);
271        let mut tree = PageTree::new(root_id);
272
273        let page = Page::a4();
274        let page_id = ObjectId::new(2, 0);
275
276        tree.add_page(page, page_id, root_id).unwrap();
277        assert_eq!(tree.page_count(), 1);
278        assert!(tree.get_page(0).is_some());
279    }
280
281    #[test]
282    fn test_page_tree_builder() {
283        let mut builder = PageTreeBuilder::new(1);
284        let pages = vec![Page::a4(), Page::letter(), Page::a4()];
285
286        builder.add_pages(pages).unwrap();
287        let tree = builder.build();
288
289        assert_eq!(tree.page_count(), 3);
290        assert!(tree.get_page(0).is_some());
291        assert!(tree.get_page(1).is_some());
292        assert!(tree.get_page(2).is_some());
293        assert!(tree.get_page(3).is_none());
294    }
295
296    #[test]
297    fn test_node_to_dict() {
298        let root_id = ObjectId::new(1, 0);
299        let tree = PageTree::new(root_id);
300
301        let dict = tree.node_to_dict(root_id).unwrap();
302        assert_eq!(dict.get("Type"), Some(&Object::Name("Pages".to_string())));
303        assert!(dict.get("Kids").is_some());
304        assert_eq!(dict.get("Count"), Some(&Object::Integer(0)));
305    }
306
307    #[test]
308    fn test_page_tree_node_clone() {
309        use crate::geometry::Point;
310
311        let node = PageTreeNode::Pages {
312            kids: vec![ObjectId::new(2, 0), ObjectId::new(3, 0)],
313            count: 2,
314            media_box: Some(Rectangle::new(
315                Point::new(0.0, 0.0),
316                Point::new(612.0, 792.0),
317            )),
318            resources: Some(Dictionary::new()),
319            parent: Some(ObjectId::new(1, 0)),
320        };
321
322        let cloned = node;
323        match cloned {
324            PageTreeNode::Pages { kids, count, .. } => {
325                assert_eq!(kids.len(), 2);
326                assert_eq!(count, 2);
327            }
328            _ => panic!("Wrong node type"),
329        }
330    }
331
332    #[test]
333    fn test_page_tree_node_page_variant() {
334        let page = Page::a4();
335        let parent_id = ObjectId::new(1, 0);
336        let node = PageTreeNode::Page {
337            page: page,
338            parent: parent_id,
339        };
340
341        match &node {
342            PageTreeNode::Page { parent, .. } => {
343                assert_eq!(*parent, parent_id);
344            }
345            _ => panic!("Wrong node type"),
346        }
347    }
348
349    #[test]
350    fn test_page_tree_add_pages_node() {
351        let root_id = ObjectId::new(1, 0);
352        let mut tree = PageTree::new(root_id);
353
354        let child_id = ObjectId::new(2, 0);
355        tree.add_pages_node(child_id, Some(root_id)).unwrap();
356
357        // Check that the child was added
358        assert!(tree.nodes.contains_key(&child_id));
359
360        // Check parent has the child
361        match &tree.nodes[&root_id] {
362            PageTreeNode::Pages { kids, .. } => {
363                assert_eq!(kids.len(), 1);
364                assert_eq!(kids[0], child_id);
365            }
366            _ => panic!("Root should be Pages node"),
367        }
368    }
369
370    #[test]
371    fn test_page_tree_get_page_invalid_index() {
372        let root_id = ObjectId::new(1, 0);
373        let tree = PageTree::new(root_id);
374
375        assert!(tree.get_page(0).is_none());
376        assert!(tree.get_page(100).is_none());
377    }
378
379    #[test]
380    fn test_page_tree_multiple_pages() {
381        let root_id = ObjectId::new(1, 0);
382        let mut tree = PageTree::new(root_id);
383
384        // Add multiple pages
385        for i in 0..5 {
386            let page = Page::a4();
387            let page_id = ObjectId::new(i + 2, 0);
388            tree.add_page(page, page_id, root_id).unwrap();
389        }
390
391        assert_eq!(tree.page_count(), 5);
392
393        // Check all pages are accessible
394        for i in 0..5 {
395            assert!(tree.get_page(i).is_some());
396        }
397    }
398
399    #[test]
400    fn test_page_tree_nested_structure() {
401        let root_id = ObjectId::new(1, 0);
402        let mut tree = PageTree::new(root_id);
403
404        // Add intermediate node
405        let intermediate_id = ObjectId::new(2, 0);
406        tree.add_pages_node(intermediate_id, Some(root_id)).unwrap();
407
408        // Add pages under intermediate node
409        let page1_id = ObjectId::new(3, 0);
410        let page2_id = ObjectId::new(4, 0);
411
412        tree.add_page(Page::a4(), page1_id, intermediate_id)
413            .unwrap();
414        tree.add_page(Page::letter(), page2_id, intermediate_id)
415            .unwrap();
416
417        // Root should have count 2
418        assert_eq!(tree.page_count(), 2);
419    }
420
421    #[test]
422    fn test_page_tree_add_kid_to_invalid_parent() {
423        let root_id = ObjectId::new(1, 0);
424        let mut tree = PageTree::new(root_id);
425
426        let page_id = ObjectId::new(2, 0);
427        tree.add_page(Page::a4(), page_id, root_id).unwrap();
428
429        // Try to add a kid to a page node (not a Pages node)
430        let result = tree.add_kid_to_parent(page_id, ObjectId::new(3, 0));
431        assert!(result.is_err());
432    }
433
434    #[test]
435    fn test_page_tree_node_to_dict_page_node() {
436        let root_id = ObjectId::new(1, 0);
437        let mut tree = PageTree::new(root_id);
438
439        let page = Page::a4();
440        let page_id = ObjectId::new(2, 0);
441        tree.add_page(page, page_id, root_id).unwrap();
442
443        let dict = tree.node_to_dict(page_id).unwrap();
444        assert_eq!(dict.get("Type"), Some(&Object::Name("Page".to_string())));
445        assert_eq!(dict.get("Parent"), Some(&Object::Reference(root_id)));
446    }
447
448    #[test]
449    fn test_page_tree_node_to_dict_with_media_box() {
450        use crate::geometry::Point;
451
452        let root_id = ObjectId::new(1, 0);
453        let mut tree = PageTree::new(root_id);
454
455        // Add media box to root
456        if let Some(PageTreeNode::Pages { media_box, .. }) = tree.nodes.get_mut(&root_id) {
457            *media_box = Some(Rectangle::new(
458                Point::new(0.0, 0.0),
459                Point::new(612.0, 792.0),
460            ));
461        }
462
463        let dict = tree.node_to_dict(root_id).unwrap();
464        assert!(dict.get("MediaBox").is_some());
465
466        match dict.get("MediaBox") {
467            Some(Object::Array(arr)) => {
468                assert_eq!(arr.len(), 4);
469            }
470            _ => panic!("MediaBox should be an array"),
471        }
472    }
473
474    #[test]
475    fn test_page_tree_node_to_dict_with_resources() {
476        let root_id = ObjectId::new(1, 0);
477        let mut tree = PageTree::new(root_id);
478
479        // Add resources to root
480        let mut res = Dictionary::new();
481        res.set("Font", Object::Dictionary(Dictionary::new()));
482
483        if let Some(PageTreeNode::Pages { resources, .. }) = tree.nodes.get_mut(&root_id) {
484            *resources = Some(res);
485        }
486
487        let dict = tree.node_to_dict(root_id).unwrap();
488        assert!(dict.get("Resources").is_some());
489    }
490
491    #[test]
492    fn test_page_tree_node_to_dict_invalid_node() {
493        let root_id = ObjectId::new(1, 0);
494        let tree = PageTree::new(root_id);
495
496        let invalid_id = ObjectId::new(999, 0);
497        let result = tree.node_to_dict(invalid_id);
498        assert!(result.is_err());
499    }
500
501    #[test]
502    fn test_page_tree_builder_empty() {
503        let builder = PageTreeBuilder::new(10);
504        let tree = builder.build();
505
506        assert_eq!(tree.root(), ObjectId::new(10, 0));
507        assert_eq!(tree.page_count(), 0);
508    }
509
510    #[test]
511    fn test_page_tree_builder_with_custom_start_id() {
512        let mut builder = PageTreeBuilder::new(100);
513        builder.add_pages(vec![Page::a4()]).unwrap();
514
515        let tree = builder.build();
516        assert_eq!(tree.root(), ObjectId::new(100, 0));
517
518        // The page should have ID 101
519        match tree.nodes.get(&ObjectId::new(101, 0)) {
520            Some(PageTreeNode::Page { .. }) => (),
521            _ => panic!("Page not found with expected ID"),
522        }
523    }
524
525    #[test]
526    fn test_page_tree_update_ancestor_counts() {
527        let root_id = ObjectId::new(1, 0);
528        let mut tree = PageTree::new(root_id);
529
530        // Create a nested structure
531        let level1_id = ObjectId::new(2, 0);
532        let level2_id = ObjectId::new(3, 0);
533
534        tree.add_pages_node(level1_id, Some(root_id)).unwrap();
535        tree.add_pages_node(level2_id, Some(level1_id)).unwrap();
536
537        // Add a page at the deepest level
538        let page_id = ObjectId::new(4, 0);
539        tree.add_page(Page::a4(), page_id, level2_id).unwrap();
540
541        // Check counts at all levels
542        assert_eq!(tree.page_count(), 1); // Root count
543
544        match &tree.nodes[&level1_id] {
545            PageTreeNode::Pages { count, .. } => assert_eq!(*count, 1),
546            _ => panic!("Wrong node type"),
547        }
548
549        match &tree.nodes[&level2_id] {
550            PageTreeNode::Pages { count, .. } => assert_eq!(*count, 1),
551            _ => panic!("Wrong node type"),
552        }
553    }
554
555    #[test]
556    fn test_page_tree_node_to_dict_with_parent() {
557        let root_id = ObjectId::new(1, 0);
558        let mut tree = PageTree::new(root_id);
559
560        let child_id = ObjectId::new(2, 0);
561        tree.add_pages_node(child_id, Some(root_id)).unwrap();
562
563        let dict = tree.node_to_dict(child_id).unwrap();
564        assert_eq!(dict.get("Parent"), Some(&Object::Reference(root_id)));
565    }
566
567    #[test]
568    fn test_page_tree_node_to_dict_root_no_parent() {
569        let root_id = ObjectId::new(1, 0);
570        let tree = PageTree::new(root_id);
571
572        let dict = tree.node_to_dict(root_id).unwrap();
573        assert!(dict.get("Parent").is_none());
574    }
575
576    #[test]
577    fn test_page_tree_page_map() {
578        let root_id = ObjectId::new(1, 0);
579        let mut tree = PageTree::new(root_id);
580
581        let page1_id = ObjectId::new(2, 0);
582        let page2_id = ObjectId::new(3, 0);
583
584        tree.add_page(Page::a4(), page1_id, root_id).unwrap();
585        tree.add_page(Page::letter(), page2_id, root_id).unwrap();
586
587        // Check page map
588        assert_eq!(tree.page_map.get(&0), Some(&page1_id));
589        assert_eq!(tree.page_map.get(&1), Some(&page2_id));
590    }
591
592    #[test]
593    fn test_page_tree_different_page_sizes() {
594        let mut builder = PageTreeBuilder::new(1);
595        let pages = vec![Page::a4(), Page::letter(), Page::new(200.0, 300.0)];
596
597        builder.add_pages(pages).unwrap();
598        let tree = builder.build();
599
600        assert_eq!(tree.page_count(), 3);
601
602        // Verify different sizes
603        let page0 = tree.get_page(0).unwrap();
604        let page1 = tree.get_page(1).unwrap();
605        let page2 = tree.get_page(2).unwrap();
606
607        // Verify we got the pages (we can't access private fields)
608        // A4, Letter, and custom sizes were added
609        assert_eq!(tree.page_count(), 3);
610
611        // Since we can't access the private width/height fields,
612        // we just verify the pages exist
613        assert!(std::ptr::eq(page0, tree.get_page(0).unwrap()));
614        assert!(std::ptr::eq(page1, tree.get_page(1).unwrap()));
615        assert!(std::ptr::eq(page2, tree.get_page(2).unwrap()));
616    }
617
618    #[test]
619    fn test_page_tree_add_page_to_non_existent_parent() {
620        let root_id = ObjectId::new(1, 0);
621        let mut tree = PageTree::new(root_id);
622
623        let page = Page::a4();
624        let page_id = ObjectId::new(2, 0);
625        let invalid_parent = ObjectId::new(999, 0);
626
627        let result = tree.add_page(page, page_id, invalid_parent);
628        assert!(result.is_err());
629    }
630
631    #[test]
632    fn test_page_tree_empty_kids_array() {
633        let root_id = ObjectId::new(1, 0);
634        let tree = PageTree::new(root_id);
635
636        let dict = tree.node_to_dict(root_id).unwrap();
637        match dict.get("Kids") {
638            Some(Object::Array(arr)) => {
639                assert_eq!(arr.len(), 0);
640            }
641            _ => panic!("Kids should be an empty array"),
642        }
643    }
644
645    #[test]
646    fn test_page_tree_builder_large_number_of_pages() {
647        let mut builder = PageTreeBuilder::new(1);
648        let pages: Vec<Page> = (0..100).map(|_| Page::a4()).collect();
649
650        builder.add_pages(pages).unwrap();
651        let tree = builder.build();
652
653        assert_eq!(tree.page_count(), 100);
654
655        // Spot check some pages
656        assert!(tree.get_page(0).is_some());
657        assert!(tree.get_page(50).is_some());
658        assert!(tree.get_page(99).is_some());
659        assert!(tree.get_page(100).is_none());
660    }
661
662    #[test]
663    fn test_page_tree_add_pages_node_without_parent() {
664        let root_id = ObjectId::new(1, 0);
665        let mut tree = PageTree::new(root_id);
666
667        let orphan_id = ObjectId::new(2, 0);
668        tree.add_pages_node(orphan_id, None).unwrap();
669
670        // Orphan node should exist but not be connected
671        assert!(tree.nodes.contains_key(&orphan_id));
672
673        match &tree.nodes[&root_id] {
674            PageTreeNode::Pages { kids, .. } => {
675                assert_eq!(kids.len(), 0); // Root should have no kids
676            }
677            _ => panic!("Wrong node type"),
678        }
679    }
680}