1use std::collections::{HashMap, HashSet, VecDeque};
14
15use rpdfium_core::fx_system::MAX_OBJECT_NUMBER;
16use rpdfium_core::{Name, PdfSource};
17
18use crate::object::{Object, ObjectId};
19use crate::store::ObjectStore;
20
21pub trait ObjectVisitor {
26 fn visit_dict(&mut self, id: Option<ObjectId>, dict: &HashMap<Name, Object>);
28
29 fn visit_array(&mut self, id: Option<ObjectId>, arr: &[Object]);
31
32 fn visit_stream(&mut self, id: Option<ObjectId>);
34
35 fn visit_reference(&mut self, target: ObjectId);
37
38 fn visit_primitive(&mut self, id: Option<ObjectId>, obj: &Object);
40}
41
42pub struct ObjectWalker {
48 visited: HashSet<ObjectId>,
49 max_objects: usize,
50}
51
52impl ObjectWalker {
53 pub fn new() -> Self {
55 Self {
56 visited: HashSet::new(),
57 max_objects: MAX_OBJECT_NUMBER as usize,
58 }
59 }
60
61 pub fn with_max_objects(max_objects: usize) -> Self {
63 Self {
64 visited: HashSet::new(),
65 max_objects,
66 }
67 }
68
69 pub fn walk<S: PdfSource>(
75 &mut self,
76 store: &ObjectStore<S>,
77 root_id: ObjectId,
78 visitor: &mut dyn ObjectVisitor,
79 ) {
80 let mut queue: VecDeque<ObjectId> = VecDeque::new();
81 queue.push_back(root_id);
82
83 while let Some(id) = queue.pop_front() {
84 if self.visited.contains(&id) {
86 continue;
87 }
88
89 if self.visited.len() >= self.max_objects {
91 break;
92 }
93
94 self.visited.insert(id);
95
96 let obj = match store.resolve(id) {
98 Ok(obj) => obj,
99 Err(_) => continue,
100 };
101
102 self.visit_object(obj, Some(id), visitor, &mut queue);
103 }
104 }
105
106 fn visit_object(
109 &self,
110 obj: &Object,
111 id: Option<ObjectId>,
112 visitor: &mut dyn ObjectVisitor,
113 queue: &mut VecDeque<ObjectId>,
114 ) {
115 match obj {
116 Object::Dictionary(dict) => {
117 visitor.visit_dict(id, dict);
118 self.enqueue_dict_refs(dict, queue);
119 }
120 Object::Stream { dict, .. } => {
121 visitor.visit_stream(id);
122 visitor.visit_dict(id, dict);
123 self.enqueue_dict_refs(dict, queue);
124 }
125 Object::Array(arr) => {
126 visitor.visit_array(id, arr);
127 self.enqueue_array_refs(arr, queue);
128 }
129 Object::Reference(target) => {
130 visitor.visit_reference(*target);
131 if !self.visited.contains(target) {
132 queue.push_back(*target);
133 }
134 }
135 _ => {
136 visitor.visit_primitive(id, obj);
138 }
139 }
140 }
141
142 fn enqueue_dict_refs(&self, dict: &HashMap<Name, Object>, queue: &mut VecDeque<ObjectId>) {
144 for value in dict.values() {
145 if let Object::Reference(target) = value {
146 if !self.visited.contains(target) {
147 queue.push_back(*target);
148 }
149 }
150 }
151 }
152
153 fn enqueue_array_refs(&self, arr: &[Object], queue: &mut VecDeque<ObjectId>) {
155 for elem in arr {
156 if let Object::Reference(target) = elem {
157 if !self.visited.contains(target) {
158 queue.push_back(*target);
159 }
160 }
161 }
162 }
163
164 pub fn visited_count(&self) -> usize {
166 self.visited.len()
167 }
168
169 pub fn visited(&self) -> &HashSet<ObjectId> {
171 &self.visited
172 }
173}
174
175impl Default for ObjectWalker {
176 fn default() -> Self {
177 Self::new()
178 }
179}
180
181#[derive(Debug, Clone, Default)]
183pub struct ObjectStats {
184 pub dict_count: usize,
186 pub array_count: usize,
188 pub stream_count: usize,
190 pub reference_count: usize,
192 pub primitive_count: usize,
194 pub total_visited: usize,
196}
197
198impl ObjectVisitor for ObjectStats {
199 fn visit_dict(&mut self, _id: Option<ObjectId>, _dict: &HashMap<Name, Object>) {
200 self.dict_count += 1;
201 self.total_visited += 1;
202 }
203
204 fn visit_array(&mut self, _id: Option<ObjectId>, _arr: &[Object]) {
205 self.array_count += 1;
206 self.total_visited += 1;
207 }
208
209 fn visit_stream(&mut self, _id: Option<ObjectId>) {
210 self.stream_count += 1;
211 self.total_visited += 1;
212 }
213
214 fn visit_reference(&mut self, _target: ObjectId) {
215 self.reference_count += 1;
216 self.total_visited += 1;
217 }
218
219 fn visit_primitive(&mut self, _id: Option<ObjectId>, _obj: &Object) {
220 self.primitive_count += 1;
221 self.total_visited += 1;
222 }
223}
224
225#[cfg(test)]
226mod tests {
227 use rpdfium_core::ParsingMode;
228
229 use super::*;
230
231 fn build_minimal_pdf() -> Vec<u8> {
233 let mut pdf = Vec::new();
234 pdf.extend_from_slice(b"%PDF-1.4\n");
235
236 let obj1_offset = pdf.len();
237 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
238
239 let obj2_offset = pdf.len();
240 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
241
242 let xref_offset = pdf.len();
243 pdf.extend_from_slice(b"xref\n");
244 pdf.extend_from_slice(b"0 3\n");
245 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
246 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
247 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
248 pdf.extend_from_slice(b"trailer\n");
249 pdf.extend_from_slice(b"<< /Size 3 /Root 1 0 R >>\n");
250 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
251
252 pdf
253 }
254
255 #[test]
256 fn test_walk_minimal_pdf() {
257 let pdf = build_minimal_pdf();
258 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
259
260 let mut stats = ObjectStats::default();
261 let mut walker = ObjectWalker::new();
262 walker.walk(&store, ObjectId::new(1, 0), &mut stats);
263
264 assert_eq!(walker.visited_count(), 2);
266 assert!(walker.visited().contains(&ObjectId::new(1, 0)));
267 assert!(walker.visited().contains(&ObjectId::new(2, 0)));
268 }
269
270 #[test]
271 fn test_stats_collection_from_minimal_pdf() {
272 let pdf = build_minimal_pdf();
273 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
274
275 let mut stats = ObjectStats::default();
276 let mut walker = ObjectWalker::new();
277 walker.walk(&store, ObjectId::new(1, 0), &mut stats);
278
279 assert_eq!(stats.dict_count, 2);
281 assert_eq!(stats.array_count, 0); assert_eq!(stats.stream_count, 0);
284 assert!(stats.total_visited > 0);
285 }
286
287 #[test]
288 fn test_cycle_detection() {
289 let mut pdf = Vec::new();
291 pdf.extend_from_slice(b"%PDF-1.4\n");
292
293 let obj1_offset = pdf.len();
294 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
295
296 let obj2_offset = pdf.len();
297 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
298
299 let obj3_offset = pdf.len();
300 pdf.extend_from_slice(b"3 0 obj\n<< /Self 3 0 R >>\nendobj\n");
301
302 let xref_offset = pdf.len();
303 pdf.extend_from_slice(b"xref\n");
304 pdf.extend_from_slice(b"0 4\n");
305 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
306 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
307 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
308 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
309 pdf.extend_from_slice(b"trailer\n");
310 pdf.extend_from_slice(b"<< /Size 4 /Root 1 0 R >>\n");
311 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
312
313 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
314
315 let mut stats = ObjectStats::default();
316 let mut walker = ObjectWalker::new();
317 walker.walk(&store, ObjectId::new(3, 0), &mut stats);
318
319 assert_eq!(walker.visited_count(), 1);
321 assert!(walker.visited().contains(&ObjectId::new(3, 0)));
322 }
323
324 #[test]
325 fn test_max_objects_safety_limit() {
326 let pdf = build_minimal_pdf();
328 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
329
330 let mut stats = ObjectStats::default();
331 let mut walker = ObjectWalker::with_max_objects(1);
332 walker.walk(&store, ObjectId::new(1, 0), &mut stats);
333
334 assert_eq!(walker.visited_count(), 1);
336 }
337
338 #[test]
339 fn test_walk_with_no_references() {
340 let mut pdf = Vec::new();
342 pdf.extend_from_slice(b"%PDF-1.4\n");
343
344 let obj1_offset = pdf.len();
345 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog >>\nendobj\n");
346
347 let xref_offset = pdf.len();
348 pdf.extend_from_slice(b"xref\n");
349 pdf.extend_from_slice(b"0 2\n");
350 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
351 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
352 pdf.extend_from_slice(b"trailer\n");
353 pdf.extend_from_slice(b"<< /Size 2 /Root 1 0 R >>\n");
354 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
355
356 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
357
358 let mut stats = ObjectStats::default();
359 let mut walker = ObjectWalker::new();
360 walker.walk(&store, ObjectId::new(1, 0), &mut stats);
361
362 assert_eq!(walker.visited_count(), 1);
363 assert_eq!(stats.dict_count, 1);
364 assert_eq!(stats.reference_count, 0);
365 }
366
367 #[test]
368 fn test_object_stats_counters() {
369 let mut stats = ObjectStats::default();
370 assert_eq!(stats.dict_count, 0);
371 assert_eq!(stats.array_count, 0);
372 assert_eq!(stats.stream_count, 0);
373 assert_eq!(stats.reference_count, 0);
374 assert_eq!(stats.primitive_count, 0);
375 assert_eq!(stats.total_visited, 0);
376
377 stats.visit_dict(None, &HashMap::new());
379 stats.visit_array(None, &[]);
380 stats.visit_stream(None);
381 stats.visit_reference(ObjectId::new(1, 0));
382 stats.visit_primitive(None, &Object::Integer(42));
383
384 assert_eq!(stats.dict_count, 1);
385 assert_eq!(stats.array_count, 1);
386 assert_eq!(stats.stream_count, 1);
387 assert_eq!(stats.reference_count, 1);
388 assert_eq!(stats.primitive_count, 1);
389 assert_eq!(stats.total_visited, 5);
390 }
391
392 #[test]
393 fn test_visit_callbacks_fire_correctly() {
394 struct CallbackTracker {
396 events: Vec<String>,
397 }
398 impl ObjectVisitor for CallbackTracker {
399 fn visit_dict(&mut self, id: Option<ObjectId>, _dict: &HashMap<Name, Object>) {
400 self.events
401 .push(format!("dict:{}", id.map_or(0, |i| i.number)));
402 }
403 fn visit_array(&mut self, id: Option<ObjectId>, _arr: &[Object]) {
404 self.events
405 .push(format!("array:{}", id.map_or(0, |i| i.number)));
406 }
407 fn visit_stream(&mut self, id: Option<ObjectId>) {
408 self.events
409 .push(format!("stream:{}", id.map_or(0, |i| i.number)));
410 }
411 fn visit_reference(&mut self, target: ObjectId) {
412 self.events.push(format!("ref:{}", target.number));
413 }
414 fn visit_primitive(&mut self, id: Option<ObjectId>, _obj: &Object) {
415 self.events
416 .push(format!("prim:{}", id.map_or(0, |i| i.number)));
417 }
418 }
419
420 let pdf = build_minimal_pdf();
421 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
422
423 let mut tracker = CallbackTracker { events: Vec::new() };
424 let mut walker = ObjectWalker::new();
425 walker.walk(&store, ObjectId::new(1, 0), &mut tracker);
426
427 assert!(tracker.events.contains(&"dict:1".to_string()));
429 assert!(tracker.events.contains(&"dict:2".to_string()));
431 }
432
433 #[test]
434 fn test_walk_nonexistent_root() {
435 let pdf = build_minimal_pdf();
436 let store = ObjectStore::open(pdf, ParsingMode::Lenient).unwrap();
437
438 let mut stats = ObjectStats::default();
439 let mut walker = ObjectWalker::new();
440 walker.walk(&store, ObjectId::new(999, 0), &mut stats);
441
442 assert_eq!(stats.total_visited, 0);
445 }
446
447 #[test]
456 fn test_walker_simple_dict() {
457 let mut pdf = Vec::new();
459 pdf.extend_from_slice(b"%PDF-1.4\n");
460 let obj1_offset = pdf.len();
461 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog >>\nendobj\n");
462 let xref_offset = pdf.len();
463 pdf.extend_from_slice(b"xref\n0 2\n");
464 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
465 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
466 pdf.extend_from_slice(b"trailer\n<< /Size 2 /Root 1 0 R >>\n");
467 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
468
469 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
470 let mut stats = ObjectStats::default();
471 let mut walker = ObjectWalker::new();
472 walker.walk(&store, ObjectId::new(1, 0), &mut stats);
473
474 assert_eq!(walker.visited_count(), 1);
476 assert_eq!(stats.dict_count, 1);
477 }
478
479 #[test]
484 fn test_walker_combined_object() {
485 let pdf = build_minimal_pdf();
486 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
487
488 let mut stats = ObjectStats::default();
489 let mut walker = ObjectWalker::new();
490 walker.walk(&store, ObjectId::new(1, 0), &mut stats);
491
492 assert_eq!(walker.visited_count(), 2);
494 assert!(walker.visited().contains(&ObjectId::new(1, 0)));
495 assert!(walker.visited().contains(&ObjectId::new(2, 0)));
496 assert_eq!(stats.dict_count, 2);
498 }
499
500 #[test]
508 fn test_walker_get_parent_tracking() {
509 let pdf = build_minimal_pdf();
512 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
513
514 struct OrderTracker {
516 order: Vec<u32>,
517 }
518 impl ObjectVisitor for OrderTracker {
519 fn visit_dict(&mut self, id: Option<ObjectId>, _dict: &HashMap<Name, Object>) {
520 if let Some(id) = id {
521 self.order.push(id.number);
522 }
523 }
524 fn visit_array(&mut self, _id: Option<ObjectId>, _arr: &[Object]) {}
525 fn visit_stream(&mut self, _id: Option<ObjectId>) {}
526 fn visit_reference(&mut self, _target: ObjectId) {}
527 fn visit_primitive(&mut self, _id: Option<ObjectId>, _obj: &Object) {}
528 }
529
530 let mut tracker = OrderTracker { order: Vec::new() };
531 let mut walker = ObjectWalker::new();
532 walker.walk(&store, ObjectId::new(1, 0), &mut tracker);
533
534 assert_eq!(walker.visited_count(), 2);
536 assert!(tracker.order.contains(&1));
537 assert!(tracker.order.contains(&2));
538 }
539
540 #[test]
546 fn test_walker_skip_via_max_objects() {
547 let pdf = build_minimal_pdf();
548 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
549
550 let mut stats = ObjectStats::default();
552 let mut walker = ObjectWalker::with_max_objects(1);
553 walker.walk(&store, ObjectId::new(1, 0), &mut stats);
554
555 assert_eq!(walker.visited_count(), 1);
556 assert!(walker.visited().contains(&ObjectId::new(1, 0)));
557 assert!(!walker.visited().contains(&ObjectId::new(2, 0)));
558 }
559
560 #[test]
566 fn test_walker_dictionary_key_tracking() {
567 let pdf = build_minimal_pdf();
568 let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();
569
570 let catalog = store.resolve(ObjectId::new(1, 0)).unwrap();
572 let dict = catalog.as_dict().unwrap();
573
574 assert!(dict.contains_key(&Name::r#type()));
576 assert!(dict.contains_key(&Name::pages()));
577
578 let type_val = dict.get(&Name::r#type()).unwrap();
580 assert!(type_val.is_name());
581
582 let pages_val = dict.get(&Name::pages()).unwrap();
583 assert!(pages_val.is_reference());
584 assert_eq!(pages_val.as_reference(), Some(ObjectId::new(2, 0)));
585 }
586}