oxidize-pdf 2.5.1

A pure Rust PDF generation and manipulation library with zero external dependencies
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
//! PDF Page Tree Parser
//!
//! This module handles navigation and extraction of pages from the PDF page tree structure.
//! The page tree is a hierarchical structure that organizes pages in a PDF document,
//! allowing for efficient access and inheritance of properties from parent nodes.
//!
//! # Overview
//!
//! The PDF page tree consists of:
//! - **Page Tree Nodes**: Internal nodes that can contain other nodes or pages
//! - **Page Objects**: Leaf nodes representing individual pages
//! - **Inherited Properties**: Resources, MediaBox, CropBox, and Rotate can be inherited from parent nodes
//!
//! # Example
//!
//! ```rust,no_run
//! use oxidize_pdf::parser::{PdfDocument, PdfReader};
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! // Open a PDF document
//! let reader = PdfReader::open("document.pdf")?;
//! let document = PdfDocument::new(reader);
//!
//! // Get a specific page
//! let page = document.get_page(0)?;
//!
//! // Access page properties
//! println!("Page size: {}x{} points", page.width(), page.height());
//! println!("Rotation: {}°", page.rotation);
//!
//! // Get page resources
//! if let Some(resources) = page.get_resources() {
//!     println!("Page has resources");
//! }
//! # Ok(())
//! # }
//! ```

use super::document::PdfDocument;
use super::objects::{PdfArray, PdfDictionary, PdfObject, PdfStream};
use super::reader::PdfReader;
use super::{ParseError, ParseResult};
use std::collections::{HashMap, HashSet};
use std::io::{Read, Seek};

/// Represents a single page in the PDF with all its properties and resources.
///
/// A `ParsedPage` contains all the information needed to render or analyze a PDF page,
/// including its dimensions, content streams, resources, and inherited properties from
/// parent page tree nodes.
///
/// # Fields
///
/// * `obj_ref` - Object reference (object number, generation number) pointing to this page in the PDF
/// * `dict` - Complete page dictionary containing all page-specific entries
/// * `inherited_resources` - Resources inherited from parent page tree nodes
/// * `media_box` - Page dimensions in PDF units [llx, lly, urx, ury]
/// * `crop_box` - Optional visible area of the page
/// * `rotation` - Page rotation in degrees (0, 90, 180, or 270)
///
/// # Example
///
/// ```rust,no_run
/// use oxidize_pdf::parser::{PdfDocument, PdfReader};
///
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let reader = PdfReader::open("document.pdf")?;
/// let document = PdfDocument::new(reader);
/// let page = document.get_page(0)?;
///
/// // Access page properties
/// let (obj_num, gen_num) = page.obj_ref;
/// println!("Page object: {} {} R", obj_num, gen_num);
///
/// // Get page dimensions
/// let [llx, lly, urx, ury] = page.media_box;
/// println!("MediaBox: ({}, {}) to ({}, {})", llx, lly, urx, ury);
///
/// // Check for content
/// if let Some(contents) = page.dict.get("Contents") {
///     println!("Page has content streams");
/// }
/// # Ok(())
/// # }
/// ```
#[derive(Debug, Clone)]
pub struct ParsedPage {
    /// Object reference to this page in the form (object_number, generation_number).
    /// This uniquely identifies the page object in the PDF file.
    pub obj_ref: (u32, u16),

    /// Page dictionary containing all page-specific entries like Contents, Resources, etc.
    /// This is the raw PDF dictionary for the page object.
    pub dict: PdfDictionary,

    /// Resources inherited from parent page tree nodes.
    /// These are automatically merged during page tree traversal.
    pub inherited_resources: Option<PdfDictionary>,

    /// MediaBox defining the page dimensions in PDF units (typically points).
    /// Format: [lower_left_x, lower_left_y, upper_right_x, upper_right_y]
    pub media_box: [f64; 4],

    /// CropBox defining the visible area of the page.
    /// If None, the entire MediaBox is visible.
    pub crop_box: Option<[f64; 4]>,

    /// Page rotation in degrees. Valid values are 0, 90, 180, or 270.
    /// The rotation is applied clockwise.
    pub rotation: i32,

    /// Annotations array containing references to annotation objects.
    /// This is parsed from the page's /Annots entry.
    pub annotations: Option<PdfArray>,
}

/// Maximum number of pages to allow in a flat index.
/// Prevents OOM from malicious /Count values (e.g., 9,999,999,999).
const MAX_PAGES: usize = 100_000;

/// Page tree navigator
pub struct PageTree {
    /// Total number of pages
    page_count: u32,
    /// Cached pages by index
    pages: HashMap<u32, ParsedPage>,
    /// Root pages dictionary (for navigation)
    #[allow(dead_code)]
    pages_dict: Option<PdfDictionary>,
    /// Flat index of page object references, built once during initialization.
    /// Each entry is (obj_num, gen_num) for a leaf Page node.
    page_refs: Vec<(u32, u16)>,
}

impl PageTree {
    /// Create a new page tree navigator
    pub fn new(page_count: u32) -> Self {
        Self {
            page_count,
            pages: HashMap::new(),
            pages_dict: None,
            page_refs: Vec::new(),
        }
    }

    /// Create a new page tree navigator with pages dictionary
    pub fn new_with_pages_dict(page_count: u32, pages_dict: PdfDictionary) -> Self {
        Self {
            page_count,
            pages: HashMap::new(),
            pages_dict: Some(pages_dict),
            page_refs: Vec::new(),
        }
    }

    /// Create a new page tree navigator with a pre-built flat index.
    /// The page_count is derived from the actual number of leaf pages found.
    pub fn new_with_flat_index(pages_dict: PdfDictionary, page_refs: Vec<(u32, u16)>) -> Self {
        let page_count = page_refs.len() as u32;
        Self {
            page_count,
            pages: HashMap::new(),
            pages_dict: Some(pages_dict),
            page_refs,
        }
    }

    /// Get a cached page by index (0-based)
    pub fn get_cached_page(&self, index: u32) -> Option<&ParsedPage> {
        self.pages.get(&index)
    }

    /// Cache a page
    pub fn cache_page(&mut self, index: u32, page: ParsedPage) {
        self.pages.insert(index, page);
    }

    /// Clear all cached pages
    pub fn clear_cache(&mut self) {
        self.pages.clear();
    }

    /// Get the total page count
    pub fn page_count(&self) -> u32 {
        self.page_count
    }

    /// Get a page object reference from the flat index by page index (0-based).
    pub fn get_page_ref(&self, index: u32) -> Option<(u32, u16)> {
        self.page_refs.get(index as usize).copied()
    }

    /// Flatten the page tree into a `Vec<(u32, u16)>` of leaf Page object references.
    ///
    /// This walks the tree iteratively using an explicit stack, with:
    /// - **Cycle detection**: `HashSet<(u32, u16)>` prevents infinite loops from circular refs
    /// - **Page cap**: Stops at `MAX_PAGES` to prevent OOM from absurd `/Count` values
    /// - **Type inference**: Handles missing `/Type` keys by checking for `/Kids`, `/Contents`, `/MediaBox`
    pub fn flatten_page_tree<R: Read + Seek>(
        reader: &mut PdfReader<R>,
        pages_dict: &PdfDictionary,
    ) -> ParseResult<Vec<(u32, u16)>> {
        let mut page_refs: Vec<(u32, u16)> = Vec::new();
        let mut visited: HashSet<(u32, u16)> = HashSet::new();

        // Work stack: each entry is an object reference to process
        let mut stack: Vec<(u32, u16)> = Vec::new();

        // Seed from root Kids array
        if let Some(kids) = pages_dict.get("Kids").and_then(|k| k.as_array()) {
            // Push in reverse so first kid is processed first (LIFO stack)
            for kid_obj in kids.0.iter().rev() {
                if let Some(kid_ref) = kid_obj.as_reference() {
                    stack.push(kid_ref);
                }
            }
        }

        while let Some(obj_ref) = stack.pop() {
            if page_refs.len() >= MAX_PAGES {
                tracing::warn!("Page tree exceeds {} leaves, truncating", MAX_PAGES);
                break;
            }

            // Cycle detection
            if !visited.insert(obj_ref) {
                tracing::warn!(
                    "Cycle detected at {} {} R in page tree, skipping",
                    obj_ref.0,
                    obj_ref.1
                );
                continue;
            }

            // Resolve the object
            let obj = match reader.get_object(obj_ref.0, obj_ref.1) {
                Ok(o) => o,
                Err(e) => {
                    tracing::warn!(
                        "Failed to resolve page tree node {} {} R: {}",
                        obj_ref.0,
                        obj_ref.1,
                        e
                    );
                    continue;
                }
            };

            let dict = match obj.as_dict() {
                Some(d) => d,
                None => {
                    // Check if it's a stream with a dict (some PDFs embed page data in streams)
                    if let Some(stream) = obj.as_stream() {
                        &stream.dict
                    } else {
                        continue; // Skip non-dict/non-stream nodes
                    }
                }
            };

            // Determine node type
            let node_type = dict.get_type().or_else(|| {
                if dict.contains_key("Kids") {
                    Some("Pages")
                } else if dict.contains_key("Contents") || dict.contains_key("MediaBox") {
                    Some("Page")
                } else {
                    None
                }
            });

            match node_type {
                Some("Page") => {
                    page_refs.push(obj_ref);
                }
                Some("Pages") => {
                    if let Some(kids) = dict.get("Kids").and_then(|k| k.as_array()) {
                        // Push in reverse for correct order
                        for kid_obj in kids.0.iter().rev() {
                            if let Some(kid_ref) = kid_obj.as_reference() {
                                stack.push(kid_ref);
                            }
                        }
                    }
                }
                _ => {
                    // Unknown type — treat as Page if it has page-like attributes
                    if dict.contains_key("MediaBox") || dict.contains_key("Contents") {
                        page_refs.push(obj_ref);
                    }
                    // Otherwise silently skip
                }
            }
        }

        Ok(page_refs)
    }

    /// Load a specific page by traversing the page tree
    ///
    /// Note: This method is currently not fully implemented due to architectural constraints
    /// with recursive page tree traversal and borrow checker issues.
    #[allow(dead_code)]
    fn load_page_at_index<R: Read + Seek>(
        &self,
        reader: &mut PdfReader<R>,
        node: &PdfDictionary,
        node_ref: (u32, u16),
        target_index: u32,
        inherited: Option<&PdfDictionary>,
    ) -> ParseResult<ParsedPage> {
        let node_type = node
            .get_type()
            .or_else(|| {
                // If Type is missing, try to infer from content
                if node.contains_key("Kids") && node.contains_key("Count") {
                    Some("Pages")
                } else if node.contains_key("Contents") || node.contains_key("MediaBox") {
                    Some("Page")
                } else {
                    None
                }
            })
            .or_else(|| {
                // If Type is missing and we have lenient parsing, try to infer
                let lenient_syntax = reader.options().lenient_syntax;
                let collect_warnings = reader.options().collect_warnings;

                if lenient_syntax || collect_warnings {
                    // If it has Kids, it's likely a Pages node
                    if node.contains_key("Kids") {
                        if collect_warnings {
                            tracing::debug!(
                                "Warning: Inferred Type=Pages for object {} {} R (missing Type field, has Kids)",
                                node_ref.0, node_ref.1
                            );
                        }
                        Some("Pages")
                    }
                    // If it has Contents or MediaBox but no Kids, it's likely a Page
                    else if node.contains_key("Contents")
                        || (node.contains_key("MediaBox") && !node.contains_key("Kids"))
                    {
                        if collect_warnings {
                            tracing::debug!(
                                "Warning: Inferred Type=Page for object {} {} R (missing Type field, has Contents/MediaBox)",
                                node_ref.0, node_ref.1
                            );
                        }
                        Some("Page")
                    } else {
                        None
                    }
                } else {
                    None
                }
            })
            .ok_or_else(|| ParseError::MissingKey("Type".to_string()))?;

        match node_type {
            "Pages" => {
                // This is a page tree node
                let kids = node
                    .get("Kids")
                    .and_then(|obj| obj.as_array())
                    .or_else(|| {
                        // If Kids is missing and we have lenient parsing, use empty array
                        if reader.options().lenient_syntax {
                            if reader.options().collect_warnings {
                                tracing::debug!(
                                    "Warning: Missing Kids array in Pages node, using empty array"
                                );
                            }
                            Some(&super::objects::EMPTY_PDF_ARRAY)
                        } else {
                            None
                        }
                    })
                    .ok_or_else(|| ParseError::MissingKey("Kids".to_string()))?;

                // Merge inherited attributes
                let mut merged_inherited = inherited.cloned().unwrap_or_else(PdfDictionary::new);

                // Inheritable attributes: Resources, MediaBox, CropBox, Rotate
                if let Some(resources) = node.get("Resources") {
                    if !merged_inherited.contains_key("Resources") {
                        merged_inherited.insert("Resources".to_string(), resources.clone());
                    }
                }
                if let Some(media_box) = node.get("MediaBox") {
                    if !merged_inherited.contains_key("MediaBox") {
                        merged_inherited.insert("MediaBox".to_string(), media_box.clone());
                    }
                }
                if let Some(crop_box) = node.get("CropBox") {
                    if !merged_inherited.contains_key("CropBox") {
                        merged_inherited.insert("CropBox".to_string(), crop_box.clone());
                    }
                }
                if let Some(rotate) = node.get("Rotate") {
                    if !merged_inherited.contains_key("Rotate") {
                        merged_inherited.insert("Rotate".to_string(), rotate.clone());
                    }
                }

                // Find which kid contains our target page
                let mut current_index = 0;
                for kid_ref in &kids.0 {
                    let kid_ref =
                        kid_ref
                            .as_reference()
                            .ok_or_else(|| ParseError::SyntaxError {
                                position: 0,
                                message: "Kids array must contain references".to_string(),
                            })?;

                    // Get the kid object info first
                    let (_kid_type, count, is_target) = {
                        // Cache parse options to avoid borrow checker issues
                        let lenient_syntax = reader.options().lenient_syntax;
                        let collect_warnings = reader.options().collect_warnings;

                        let kid_obj = reader.get_object(kid_ref.0, kid_ref.1)?;
                        let kid_dict =
                            kid_obj.as_dict().ok_or_else(|| ParseError::SyntaxError {
                                position: 0,
                                message: "Page tree node must be a dictionary".to_string(),
                            })?;

                        let kid_type = kid_dict
                            .get_type()
                            .or_else(|| {
                                // If Type is missing, try to infer from content
                                if kid_dict.contains_key("Kids") && kid_dict.contains_key("Count") {
                                    Some("Pages")
                                } else if kid_dict.contains_key("Contents")
                                    || kid_dict.contains_key("MediaBox")
                                {
                                    Some("Page")
                                } else {
                                    None
                                }
                            })
                            .or_else(|| {
                                // Additional inference for reconstructed/corrupted objects
                                if lenient_syntax || collect_warnings {
                                    // If it has Kids, it's likely a Pages node
                                    if kid_dict.contains_key("Kids") {
                                        if collect_warnings {
                                            tracing::debug!(
                                                "Warning: Inferred Type=Pages for object {} 0 R (missing Type field, has Kids)",
                                                kid_ref.0
                                            );
                                        }
                                        Some("Pages")
                                    }
                                    // If it has Contents or MediaBox but no Kids, it's likely a Page
                                    else if kid_dict.contains_key("Contents")
                                        || (kid_dict.contains_key("MediaBox") && !kid_dict.contains_key("Kids"))
                                    {
                                        if collect_warnings {
                                            tracing::debug!(
                                                "Warning: Inferred Type=Page for object {} 0 R (missing Type field, has Contents/MediaBox)",
                                                kid_ref.0
                                            );
                                        }
                                        Some("Page")
                                    } else {
                                        None
                                    }
                                } else {
                                    None
                                }
                            })
                            .ok_or_else(|| ParseError::MissingKey("Type".to_string()))?;

                        let count = if kid_type == "Pages" {
                            // This is another page tree node
                            if let Some(count_obj) = kid_dict.get("Count") {
                                count_obj.as_integer().unwrap_or(0) as u32
                            } else {
                                // Missing Count - use size of Kids array as approximation
                                if let Some(nested_kids_obj) = kid_dict.get("Kids") {
                                    if let Some(nested_kids_array) = nested_kids_obj.as_array() {
                                        // Use array length as page count approximation
                                        nested_kids_array.0.len() as u32
                                    } else {
                                        1 // Default if Kids is not an array
                                    }
                                } else {
                                    1 // Default if no Kids array
                                }
                            }
                        } else {
                            // This is a page
                            1
                        };

                        let is_target = target_index < current_index + count;
                        (kid_type.to_string(), count, is_target)
                    };

                    if is_target {
                        // Found the right subtree/page
                        // Due to borrow checker constraints with recursive calls,
                        // we return a placeholder page for now.
                        // A proper implementation would require refactoring the page tree
                        // traversal to use an iterative approach instead of recursion.

                        return Ok(ParsedPage {
                            obj_ref: kid_ref,
                            dict: PdfDictionary::new(),
                            inherited_resources: Some(merged_inherited.clone()),
                            media_box: [0.0, 0.0, 612.0, 792.0],
                            crop_box: None,
                            rotation: 0,
                            annotations: None,
                        });
                    }

                    current_index += count;
                }

                Err(ParseError::SyntaxError {
                    position: 0,
                    message: "Page not found in tree".to_string(),
                })
            }
            "Page" => {
                // This is a page object
                if target_index != 0 {
                    return Err(ParseError::SyntaxError {
                        position: 0,
                        message: "Page index mismatch".to_string(),
                    });
                }

                // Use the object reference passed as parameter
                let obj_ref = node_ref;

                // Extract page attributes
                let media_box =
                    Self::get_rectangle(node, inherited, "MediaBox")?.unwrap_or_else(|| {
                        // Use default Letter size if MediaBox is missing
                        #[cfg(debug_assertions)]
                        tracing::debug!(
                            "Warning: Page {} {} R missing MediaBox, using default Letter size",
                            obj_ref.0,
                            obj_ref.1
                        );
                        [0.0, 0.0, 612.0, 792.0]
                    });

                let crop_box = Self::get_rectangle(node, inherited, "CropBox")?;

                let rotation = Self::get_integer(node, inherited, "Rotate")?.unwrap_or(0) as i32;

                // Get resources
                let inherited_resources = if let Some(inherited) = inherited {
                    inherited
                        .get("Resources")
                        .and_then(|r| r.as_dict())
                        .cloned()
                } else {
                    None
                };

                // Get annotations if present
                let annotations = node.get("Annots").and_then(|obj| obj.as_array()).cloned();

                Ok(ParsedPage {
                    obj_ref,
                    dict: node.clone(),
                    inherited_resources,
                    media_box,
                    crop_box,
                    rotation,
                    annotations,
                })
            }
            _ => Err(ParseError::SyntaxError {
                position: 0,
                message: format!("Invalid page tree node type: {node_type}"),
            }),
        }
    }

    /// Get a rectangle value, checking both node and inherited dictionaries
    #[allow(dead_code)]
    fn get_rectangle(
        node: &PdfDictionary,
        inherited: Option<&PdfDictionary>,
        key: &str,
    ) -> ParseResult<Option<[f64; 4]>> {
        let array = node.get(key).or_else(|| inherited.and_then(|i| i.get(key)));

        if let Some(array) = array.and_then(|obj| obj.as_array()) {
            if array.len() != 4 {
                return Err(ParseError::SyntaxError {
                    position: 0,
                    message: format!("{key} must have 4 elements"),
                });
            }

            // Safe: array length is guaranteed to be 4 after validation above
            let rect = [
                array.0[0].as_real().unwrap_or(0.0),
                array.0[1].as_real().unwrap_or(0.0),
                array.0[2].as_real().unwrap_or(0.0),
                array.0[3].as_real().unwrap_or(0.0),
            ];

            Ok(Some(rect))
        } else {
            Ok(None)
        }
    }

    /// Get an integer value, checking both node and inherited dictionaries
    #[allow(dead_code)]
    fn get_integer(
        node: &PdfDictionary,
        inherited: Option<&PdfDictionary>,
        key: &str,
    ) -> ParseResult<Option<i64>> {
        let value = node.get(key).or_else(|| inherited.and_then(|i| i.get(key)));

        Ok(value.and_then(|obj| obj.as_integer()))
    }
}

impl ParsedPage {
    /// Get the effective page width accounting for rotation.
    ///
    /// The width is calculated from the MediaBox and adjusted based on the page rotation.
    /// For 90° or 270° rotations, the width and height are swapped.
    ///
    /// # Returns
    ///
    /// The page width in PDF units (typically points, where 1 point = 1/72 inch)
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use oxidize_pdf::parser::{PdfDocument, PdfReader};
    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
    /// # let reader = PdfReader::open("document.pdf")?;
    /// # let document = PdfDocument::new(reader);
    /// let page = document.get_page(0)?;
    /// let width_pts = page.width();
    /// let width_inches = width_pts / 72.0;
    /// let width_mm = width_pts * 25.4 / 72.0;
    /// println!("Page width: {} points ({:.2} inches, {:.2} mm)", width_pts, width_inches, width_mm);
    /// # Ok(())
    /// # }
    /// ```
    pub fn width(&self) -> f64 {
        match self.rotation {
            90 | 270 => self.media_box[3] - self.media_box[1],
            _ => self.media_box[2] - self.media_box[0],
        }
    }

    /// Get the effective page height accounting for rotation.
    ///
    /// The height is calculated from the MediaBox and adjusted based on the page rotation.
    /// For 90° or 270° rotations, the width and height are swapped.
    ///
    /// # Returns
    ///
    /// The page height in PDF units (typically points, where 1 point = 1/72 inch)
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use oxidize_pdf::parser::{PdfDocument, PdfReader};
    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
    /// # let reader = PdfReader::open("document.pdf")?;
    /// # let document = PdfDocument::new(reader);
    /// let page = document.get_page(0)?;
    /// println!("Page dimensions: {}x{} points", page.width(), page.height());
    /// if page.rotation != 0 {
    ///     println!("Page is rotated {} degrees", page.rotation);
    /// }
    /// # Ok(())
    /// # }
    /// ```
    pub fn height(&self) -> f64 {
        match self.rotation {
            90 | 270 => self.media_box[2] - self.media_box[0],
            _ => self.media_box[3] - self.media_box[1],
        }
    }

    /// Get the content streams for this page using a PdfReader.
    ///
    /// Content streams contain the actual drawing instructions (operators) that render
    /// text, graphics, and images on the page. A page may have multiple content streams
    /// which are concatenated during rendering.
    ///
    /// # Arguments
    ///
    /// * `reader` - Mutable reference to the PDF reader
    ///
    /// # Returns
    ///
    /// A vector of decompressed content stream data. Each vector contains the raw bytes
    /// of a content stream ready for parsing.
    ///
    /// # Errors
    ///
    /// Returns an error if:
    /// - The Contents entry is malformed
    /// - Stream decompression fails
    /// - Referenced objects cannot be resolved
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use oxidize_pdf::parser::{PdfReader, ParsedPage};
    /// # fn example(page: &ParsedPage, reader: &mut PdfReader<std::fs::File>) -> Result<(), Box<dyn std::error::Error>> {
    /// let streams = page.content_streams(reader)?;
    /// for (i, stream) in streams.iter().enumerate() {
    ///     println!("Content stream {}: {} bytes", i, stream.len());
    /// }
    /// # Ok(())
    /// # }
    /// ```
    pub fn content_streams<R: Read + Seek>(
        &self,
        reader: &mut PdfReader<R>,
    ) -> ParseResult<Vec<Vec<u8>>> {
        let mut streams = Vec::new();

        if let Some(contents) = self.dict.get("Contents") {
            // First resolve contents to check its type
            let contents_type = match contents {
                PdfObject::Reference(obj_num, gen_num) => {
                    let resolved = reader.get_object(*obj_num, *gen_num)?;
                    match resolved {
                        PdfObject::Stream(_) => "stream",
                        PdfObject::Array(_) => "array",
                        _ => "other",
                    }
                }
                PdfObject::Stream(_) => "stream",
                PdfObject::Array(_) => "array",
                _ => "other",
            };

            let options = reader.options().clone();
            match contents_type {
                "stream" => {
                    let resolved = reader.resolve(contents)?;
                    if let PdfObject::Stream(stream) = resolved {
                        streams.push(stream.decode(&options)?);
                    }
                }
                "array" => {
                    // Get array references first
                    let refs: Vec<(u32, u16)> = {
                        let resolved = reader.resolve(contents)?;
                        if let PdfObject::Array(array) = resolved {
                            array
                                .0
                                .iter()
                                .filter_map(|obj| {
                                    if let PdfObject::Reference(num, gen) = obj {
                                        Some((*num, *gen))
                                    } else {
                                        None
                                    }
                                })
                                .collect()
                        } else {
                            Vec::new()
                        }
                    };

                    // Now resolve each reference
                    for (obj_num, gen_num) in refs {
                        let obj = reader.get_object(obj_num, gen_num)?;
                        if let PdfObject::Stream(stream) = obj {
                            streams.push(stream.decode(&options)?);
                        }
                    }
                }
                _ => {
                    return Err(ParseError::SyntaxError {
                        position: 0,
                        message: "Contents must be a stream or array of streams".to_string(),
                    })
                }
            }
        }

        Ok(streams)
    }

    /// Get content streams using PdfDocument (recommended method).
    ///
    /// This is the preferred method for accessing content streams as it uses the
    /// document's caching and resource management capabilities.
    ///
    /// # Arguments
    ///
    /// * `document` - Reference to the PDF document
    ///
    /// # Returns
    ///
    /// A vector of decompressed content stream data ready for parsing with `ContentParser`.
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use oxidize_pdf::parser::{PdfDocument, PdfReader};
    /// # use oxidize_pdf::parser::content::ContentParser;
    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
    /// let reader = PdfReader::open("document.pdf")?;
    /// let document = PdfDocument::new(reader);
    /// let page = document.get_page(0)?;
    ///
    /// // Get content streams
    /// let streams = page.content_streams_with_document(&document)?;
    ///
    /// // Parse each stream
    /// for stream_data in streams {
    ///     let operations = ContentParser::parse_content(&stream_data)?;
    ///     println!("Stream has {} operations", operations.len());
    /// }
    /// # Ok(())
    /// # }
    /// ```
    pub fn content_streams_with_document<R: Read + Seek>(
        &self,
        document: &PdfDocument<R>,
    ) -> ParseResult<Vec<Vec<u8>>> {
        document.get_page_content_streams(self)
    }

    /// Get the effective resources for this page (including inherited).
    ///
    /// Resources include fonts, images (XObjects), color spaces, patterns, and other
    /// assets needed to render the page. This method returns page-specific resources
    /// if present, otherwise falls back to inherited resources from parent nodes.
    ///
    /// # Returns
    ///
    /// The Resources dictionary if available, or None if the page has no resources.
    ///
    /// # Resource Categories
    ///
    /// The Resources dictionary may contain:
    /// - `Font` - Font definitions used by text operators
    /// - `XObject` - External objects (images, form XObjects)
    /// - `ColorSpace` - Color space definitions
    /// - `Pattern` - Pattern definitions for fills
    /// - `Shading` - Shading dictionaries
    /// - `ExtGState` - Graphics state parameter dictionaries
    /// - `Properties` - Property list dictionaries
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use oxidize_pdf::parser::{PdfDocument, PdfReader};
    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
    /// # let reader = PdfReader::open("document.pdf")?;
    /// # let document = PdfDocument::new(reader);
    /// # let page = document.get_page(0)?;
    /// if let Some(resources) = page.get_resources() {
    ///     // Check for fonts
    ///     if let Some(fonts) = resources.get("Font").and_then(|f| f.as_dict()) {
    ///         println!("Page uses {} fonts", fonts.0.len());
    ///     }
    ///     
    ///     // Check for images
    ///     if let Some(xobjects) = resources.get("XObject").and_then(|x| x.as_dict()) {
    ///         println!("Page has {} XObjects", xobjects.0.len());
    ///     }
    /// }
    /// # Ok(())
    /// # }
    /// ```
    pub fn get_contents(&self) -> Option<&PdfObject> {
        self.dict.get("Contents")
    }

    pub fn get_resources(&self) -> Option<&PdfDictionary> {
        self.dict
            .get("Resources")
            .and_then(|r| r.as_dict())
            .or(self.inherited_resources.as_ref())
    }

    /// Clone this page with all inherited resources merged into the page dictionary.
    ///
    /// This is useful when extracting a page for separate processing or when you need
    /// a self-contained page object with all resources explicitly included.
    ///
    /// # Returns
    ///
    /// A cloned page with inherited resources merged into the Resources entry
    /// of the page dictionary.
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use oxidize_pdf::parser::{PdfDocument, PdfReader};
    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
    /// # let reader = PdfReader::open("document.pdf")?;
    /// # let document = PdfDocument::new(reader);
    /// # let page = document.get_page(0)?;
    /// // Get a self-contained page with all resources
    /// let standalone_page = page.clone_with_resources();
    ///
    /// // The cloned page now has all resources in its dictionary
    /// assert!(standalone_page.dict.contains_key("Resources"));
    /// # Ok(())
    /// # }
    /// ```
    pub fn clone_with_resources(&self) -> Self {
        let mut cloned = self.clone();

        // Merge inherited resources into the page dictionary if needed
        if let Some(inherited) = &self.inherited_resources {
            if !cloned.dict.contains_key("Resources") {
                cloned.dict.insert(
                    "Resources".to_string(),
                    PdfObject::Dictionary(inherited.clone()),
                );
            }
        }

        cloned
    }

    /// Get the annotations array for this page.
    ///
    /// Returns a reference to the annotations array if present.
    /// Each element in the array is typically a reference to an annotation dictionary.
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use oxidize_pdf::parser::{PdfDocument, PdfReader};
    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
    /// # let reader = PdfReader::open("document.pdf")?;
    /// # let document = PdfDocument::new(reader);
    /// # let page = document.get_page(0)?;
    /// if let Some(annots) = page.get_annotations() {
    ///     println!("Page has {} annotations", annots.len());
    /// }
    /// # Ok(())
    /// # }
    /// ```
    pub fn get_annotations(&self) -> Option<&PdfArray> {
        self.annotations.as_ref()
    }

    /// Check if the page has annotations.
    ///
    /// # Returns
    ///
    /// `true` if the page has an annotations array with at least one annotation,
    /// `false` otherwise.
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use oxidize_pdf::parser::{PdfDocument, PdfReader};
    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
    /// # let reader = PdfReader::open("document.pdf")?;
    /// # let document = PdfDocument::new(reader);
    /// # let page = document.get_page(0)?;
    /// if page.has_annotations() {
    ///     println!("This page contains annotations");
    /// }
    /// # Ok(())
    /// # }
    /// ```
    pub fn has_annotations(&self) -> bool {
        self.annotations
            .as_ref()
            .map(|arr| !arr.is_empty())
            .unwrap_or(false)
    }

    /// Get all objects referenced by this page (for extraction or analysis).
    ///
    /// This method recursively collects all objects referenced by the page, including:
    /// - Content streams
    /// - Resources (fonts, images, etc.)
    /// - Nested objects within resources
    ///
    /// This is useful for extracting a complete page with all its dependencies or
    /// for analyzing the object graph of a page.
    ///
    /// # Arguments
    ///
    /// * `reader` - Mutable reference to the PDF reader
    ///
    /// # Returns
    ///
    /// A HashMap mapping object references (obj_num, gen_num) to their resolved objects.
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use oxidize_pdf::parser::{PdfReader, ParsedPage};
    /// # fn example(page: &ParsedPage, reader: &mut PdfReader<std::fs::File>) -> Result<(), Box<dyn std::error::Error>> {
    /// let referenced_objects = page.get_referenced_objects(reader)?;
    ///
    /// println!("Page references {} objects", referenced_objects.len());
    /// for ((obj_num, gen_num), obj) in &referenced_objects {
    ///     println!("  {} {} R: {:?}", obj_num, gen_num, obj);
    /// }
    /// # Ok(())
    /// # }
    /// ```
    pub fn get_referenced_objects<R: Read + Seek>(
        &self,
        reader: &mut PdfReader<R>,
    ) -> ParseResult<HashMap<(u32, u16), PdfObject>> {
        let mut objects = HashMap::new();
        let mut to_process = Vec::new();

        // Start with Contents
        if let Some(contents) = self.dict.get("Contents") {
            Self::collect_references(contents, &mut to_process);
        }

        // Add Resources
        if let Some(resources) = self.get_resources() {
            for value in resources.0.values() {
                Self::collect_references(value, &mut to_process);
            }
        }

        // Process all references
        while let Some((obj_num, gen_num)) = to_process.pop() {
            if let std::collections::hash_map::Entry::Vacant(e) = objects.entry((obj_num, gen_num))
            {
                let obj = reader.get_object(obj_num, gen_num)?;

                // Collect nested references
                Self::collect_references_from_object(obj, &mut to_process);

                e.insert(obj.clone());
            }
        }

        Ok(objects)
    }

    /// Collect object references from a PDF object
    fn collect_references(obj: &PdfObject, refs: &mut Vec<(u32, u16)>) {
        match obj {
            PdfObject::Reference(obj_num, gen_num) => {
                refs.push((*obj_num, *gen_num));
            }
            PdfObject::Array(array) => {
                for item in &array.0 {
                    Self::collect_references(item, refs);
                }
            }
            PdfObject::Dictionary(dict) => {
                for value in dict.0.values() {
                    Self::collect_references(value, refs);
                }
            }
            _ => {}
        }
    }

    /// Collect references from an object (after resolution)
    fn collect_references_from_object(obj: &PdfObject, refs: &mut Vec<(u32, u16)>) {
        match obj {
            PdfObject::Array(array) => {
                for item in &array.0 {
                    Self::collect_references(item, refs);
                }
            }
            PdfObject::Dictionary(dict) | PdfObject::Stream(PdfStream { dict, .. }) => {
                for value in dict.0.values() {
                    Self::collect_references(value, refs);
                }
            }
            _ => {}
        }
    }
}

#[cfg(test)]
mod tests {
    use super::super::objects::{PdfArray, PdfDictionary, PdfName, PdfObject};
    use super::*;
    use std::collections::HashMap;

    fn create_test_page() -> ParsedPage {
        let mut dict = PdfDictionary(HashMap::new());
        dict.0.insert(
            PdfName("Type".to_string()),
            PdfObject::Name(PdfName("Page".to_string())),
        );
        dict.0
            .insert(PdfName("Parent".to_string()), PdfObject::Reference(2, 0));

        ParsedPage {
            obj_ref: (3, 0),
            dict,
            inherited_resources: None,
            media_box: [0.0, 0.0, 595.0, 842.0],
            crop_box: None,
            rotation: 0,
            annotations: None,
        }
    }

    fn create_test_page_with_resources() -> ParsedPage {
        let mut dict = PdfDictionary(HashMap::new());
        dict.0.insert(
            PdfName("Type".to_string()),
            PdfObject::Name(PdfName("Page".to_string())),
        );

        let mut resources = PdfDictionary(HashMap::new());
        resources.0.insert(
            PdfName("Font".to_string()),
            PdfObject::Dictionary(PdfDictionary(HashMap::new())),
        );

        ParsedPage {
            obj_ref: (4, 0),
            dict,
            inherited_resources: Some(resources),
            media_box: [0.0, 0.0, 595.0, 842.0],
            crop_box: Some([10.0, 10.0, 585.0, 832.0]),
            rotation: 90,
            annotations: Some(PdfArray(vec![])),
        }
    }

    #[test]
    fn test_page_tree_new() {
        let tree = PageTree::new(10);
        assert_eq!(tree.page_count, 10);
        assert_eq!(tree.pages.len(), 0);
        assert!(tree.pages_dict.is_none());
    }

    #[test]
    fn test_page_tree_new_with_pages_dict() {
        let pages_dict = PdfDictionary(HashMap::new());
        let tree = PageTree::new_with_pages_dict(5, pages_dict);
        assert_eq!(tree.page_count, 5);
        assert_eq!(tree.pages.len(), 0);
        assert!(tree.pages_dict.is_some());
    }

    #[test]
    fn test_get_cached_page_empty() {
        let tree = PageTree::new(10);
        assert!(tree.get_cached_page(0).is_none());
        assert!(tree.get_cached_page(5).is_none());
    }

    #[test]
    fn test_cache_and_get_page() {
        let mut tree = PageTree::new(10);
        let page = create_test_page();

        tree.cache_page(0, page);

        let cached = tree.get_cached_page(0);
        assert!(cached.is_some());
        let cached_page = cached.unwrap();
        assert_eq!(cached_page.obj_ref, (3, 0));
        assert_eq!(cached_page.media_box, [0.0, 0.0, 595.0, 842.0]);
    }

    #[test]
    fn test_cache_multiple_pages() {
        let mut tree = PageTree::new(10);
        let page1 = create_test_page();
        let page2 = create_test_page_with_resources();

        tree.cache_page(0, page1);
        tree.cache_page(1, page2);

        assert!(tree.get_cached_page(0).is_some());
        assert!(tree.get_cached_page(1).is_some());
        assert!(tree.get_cached_page(2).is_none());

        let cached1 = tree.get_cached_page(0).unwrap();
        assert_eq!(cached1.rotation, 0);

        let cached2 = tree.get_cached_page(1).unwrap();
        assert_eq!(cached2.rotation, 90);
    }

    #[test]
    fn test_get_page_count() {
        let tree = PageTree::new(25);
        assert_eq!(tree.page_count, 25);
    }

    #[test]
    fn test_clear_cache() {
        let mut tree = PageTree::new(10);
        let page = create_test_page();

        tree.cache_page(0, page.clone());
        tree.cache_page(1, page);
        assert_eq!(tree.pages.len(), 2);

        tree.clear_cache();
        assert_eq!(tree.pages.len(), 0);
        assert!(tree.get_cached_page(0).is_none());
        assert!(tree.get_cached_page(1).is_none());
    }

    #[test]
    fn test_parsed_page_properties() {
        let page = create_test_page_with_resources();

        assert_eq!(page.obj_ref, (4, 0));
        assert_eq!(page.rotation, 90);
        assert!(page.inherited_resources.is_some());
        assert!(page.crop_box.is_some());
        assert!(page.annotations.is_some());

        let crop_box = page.crop_box.unwrap();
        assert_eq!(crop_box, [10.0, 10.0, 585.0, 832.0]);
    }

    #[test]
    fn test_parsed_page_creation() {
        let dict = PdfDictionary::new();
        let page = ParsedPage {
            obj_ref: (1, 0),
            dict: dict.clone(),
            inherited_resources: None,
            media_box: [0.0, 0.0, 612.0, 792.0],
            crop_box: None,
            rotation: 0,
            annotations: None,
        };

        assert_eq!(page.obj_ref, (1, 0));
        assert_eq!(page.dict, dict);
        assert!(page.inherited_resources.is_none());
        assert_eq!(page.media_box, [0.0, 0.0, 612.0, 792.0]); // Default US Letter
        assert!(page.crop_box.is_none());
        assert_eq!(page.rotation, 0);
        assert!(page.annotations.is_none());
    }

    #[test]
    fn test_parsed_page_width_height() {
        let mut page = create_test_page();

        // A4 size
        assert_eq!(page.width(), 595.0);
        assert_eq!(page.height(), 842.0);

        // Test with rotation
        page.rotation = 90;
        // Width and height should swap when rotated
        assert_eq!(page.width(), 842.0);
        assert_eq!(page.height(), 595.0);

        page.rotation = 270;
        assert_eq!(page.width(), 842.0);
        assert_eq!(page.height(), 595.0);

        page.rotation = 180;
        assert_eq!(page.width(), 595.0);
        assert_eq!(page.height(), 842.0);
    }

    #[test]
    fn test_parsed_page_get_resources() {
        let page = create_test_page_with_resources();
        let resources = page.get_resources();

        assert!(resources.is_some());
        let res = resources.unwrap();
        assert!(res.contains_key("Font"));
    }

    #[test]
    fn test_parsed_page_get_contents() {
        let mut page = create_test_page();

        // Add contents to page
        page.dict
            .insert("Contents".to_string(), PdfObject::Reference(10, 0));

        let contents = page.get_contents();
        assert!(contents.is_some());
        assert_eq!(contents, Some(&PdfObject::Reference(10, 0)));
    }

    #[test]
    fn test_parsed_page_get_annotations() {
        let page = create_test_page_with_resources();
        let annotations = page.get_annotations();

        assert!(annotations.is_some());
        if let Some(arr) = annotations {
            assert_eq!(arr.0.len(), 0);
        }
    }

    #[test]
    fn test_parsed_page_inherited_resources() {
        let mut page = create_test_page();
        let mut parent_resources = PdfDictionary::new();
        parent_resources.insert(
            "Font".to_string(),
            PdfObject::Dictionary(PdfDictionary::new()),
        );

        // Directly set inherited resources
        page.inherited_resources = Some(parent_resources.clone());

        assert!(page.inherited_resources.is_some());
        assert_eq!(page.inherited_resources, Some(parent_resources));
    }

    #[test]
    fn test_parsed_page_with_crop_box() {
        let mut page = create_test_page();
        page.crop_box = Some([50.0, 50.0, 545.0, 792.0]);

        // CropBox affects visible area
        let crop = page.crop_box.unwrap();
        assert_eq!(crop[0], 50.0);
        assert_eq!(crop[1], 50.0);
        assert_eq!(crop[2], 545.0);
        assert_eq!(crop[3], 792.0);
    }

    #[test]
    fn test_page_tree_cache_overflow() {
        let mut tree = PageTree::new(100);

        // Cache more pages than typical cache size
        for i in 0..50 {
            let page = create_test_page();
            tree.cache_page(i, page);
        }

        // All pages should be cached
        for i in 0..50 {
            assert!(tree.get_cached_page(i).is_some());
        }
    }

    #[test]
    fn test_page_tree_update_cached_page() {
        let mut tree = PageTree::new(10);
        let page1 = create_test_page();
        let mut page2 = create_test_page();
        page2.rotation = 180;

        tree.cache_page(0, page1);
        let cached = tree.get_cached_page(0).unwrap();
        assert_eq!(cached.rotation, 0);

        // Update the same page
        tree.cache_page(0, page2);
        let cached = tree.get_cached_page(0).unwrap();
        assert_eq!(cached.rotation, 180);
    }

    #[test]
    fn test_parsed_page_clone() {
        let page = create_test_page_with_resources();
        let cloned = page.clone();

        assert_eq!(page.obj_ref, cloned.obj_ref);
        assert_eq!(page.dict, cloned.dict);
        assert_eq!(page.inherited_resources, cloned.inherited_resources);
        assert_eq!(page.media_box, cloned.media_box);
        assert_eq!(page.crop_box, cloned.crop_box);
        assert_eq!(page.rotation, cloned.rotation);
        assert_eq!(page.annotations, cloned.annotations);
    }

    #[test]
    fn test_page_tree_get_page_bounds() {
        let tree = PageTree::new(100);

        // Test bounds checking
        assert!(tree.get_cached_page(0).is_none()); // Not cached yet
        assert!(tree.get_cached_page(99).is_none()); // Within bounds but not cached
        assert!(tree.get_cached_page(100).is_none()); // Out of bounds
        assert!(tree.get_cached_page(u32::MAX).is_none()); // Way out of bounds
    }
}

#[cfg(test)]
#[path = "page_tree_tests.rs"]
mod page_tree_tests;