Skip to main content

oxidize_pdf/parser/
stack_safe.rs

1//! Stack-safe parsing utilities
2//!
3//! This module provides utilities for parsing deeply nested PDF structures
4//! without risking stack overflow. It implements recursion limits and
5//! iterative alternatives to recursive algorithms.
6
7use super::{ParseError, ParseResult};
8use std::collections::HashSet;
9
10/// Maximum recursion depth for PDF parsing operations
11pub const MAX_RECURSION_DEPTH: usize = 1000;
12
13/// Timeout for long-running parsing operations (in seconds)
14pub const PARSING_TIMEOUT_SECS: u64 = 120; // Aumentado para documentos complejos
15
16/// Stack-safe parsing context
17#[derive(Debug)]
18pub struct StackSafeContext {
19    /// Current recursion depth
20    pub depth: usize,
21    /// Maximum allowed depth
22    pub max_depth: usize,
23    /// Pila de referencias activas (para detectar ciclos reales)
24    pub active_stack: Vec<(u32, u16)>,
25    /// Referencias completamente procesadas (no son ciclos)
26    pub completed_refs: HashSet<(u32, u16)>,
27    /// Start time for timeout tracking
28    pub start_time: std::time::Instant,
29    /// Timeout duration
30    pub timeout: std::time::Duration,
31}
32
33impl Default for StackSafeContext {
34    fn default() -> Self {
35        Self::new()
36    }
37}
38
39impl StackSafeContext {
40    /// Create a new stack-safe context
41    pub fn new() -> Self {
42        Self {
43            depth: 0,
44            max_depth: MAX_RECURSION_DEPTH,
45            active_stack: Vec::new(),
46            completed_refs: HashSet::new(),
47            start_time: std::time::Instant::now(),
48            timeout: std::time::Duration::from_secs(PARSING_TIMEOUT_SECS),
49        }
50    }
51
52    /// Create a new context with custom limits
53    pub fn with_limits(max_depth: usize, timeout_secs: u64) -> Self {
54        Self {
55            depth: 0,
56            max_depth,
57            active_stack: Vec::new(),
58            completed_refs: HashSet::new(),
59            start_time: std::time::Instant::now(),
60            timeout: std::time::Duration::from_secs(timeout_secs),
61        }
62    }
63
64    /// Enter a new recursion level
65    pub fn enter(&mut self) -> ParseResult<()> {
66        if self.depth + 1 > self.max_depth {
67            return Err(ParseError::SyntaxError {
68                position: 0,
69                message: format!(
70                    "Maximum recursion depth exceeded: {} (limit: {})",
71                    self.depth + 1,
72                    self.max_depth
73                ),
74            });
75        }
76        self.depth += 1;
77        self.check_timeout()?;
78        Ok(())
79    }
80
81    /// Exit a recursion level
82    pub fn exit(&mut self) {
83        if self.depth > 0 {
84            self.depth -= 1;
85        }
86    }
87
88    /// Push a reference onto the active stack (for cycle detection)
89    pub fn push_ref(&mut self, obj_num: u32, gen_num: u16) -> ParseResult<()> {
90        let ref_key = (obj_num, gen_num);
91
92        // Check if it's already in the active stack (real circular reference)
93        if self.active_stack.contains(&ref_key) {
94            return Err(ParseError::SyntaxError {
95                position: 0,
96                message: format!("Circular reference detected: {obj_num} {gen_num} R"),
97            });
98        }
99
100        // It's OK if it was already processed completely
101        self.active_stack.push(ref_key);
102        Ok(())
103    }
104
105    /// Pop a reference from the active stack and mark as completed
106    pub fn pop_ref(&mut self) {
107        if let Some(ref_key) = self.active_stack.pop() {
108            self.completed_refs.insert(ref_key);
109        }
110    }
111
112    /// Check if parsing has timed out
113    pub fn check_timeout(&self) -> ParseResult<()> {
114        if self.start_time.elapsed() > self.timeout {
115            return Err(ParseError::SyntaxError {
116                position: 0,
117                message: format!("Parsing timeout exceeded: {}s", self.timeout.as_secs()),
118            });
119        }
120        Ok(())
121    }
122
123    /// Create a child context for nested operations
124    pub fn child(&self) -> Self {
125        Self {
126            depth: self.depth,
127            max_depth: self.max_depth,
128            active_stack: self.active_stack.clone(),
129            completed_refs: self.completed_refs.clone(),
130            start_time: self.start_time,
131            timeout: self.timeout,
132        }
133    }
134}
135
136/// RAII guard for recursion depth tracking
137pub struct RecursionGuard<'a> {
138    context: &'a mut StackSafeContext,
139}
140
141impl<'a> RecursionGuard<'a> {
142    /// Create a new recursion guard
143    pub fn new(context: &'a mut StackSafeContext) -> ParseResult<Self> {
144        context.enter()?;
145        Ok(Self { context })
146    }
147}
148
149impl<'a> Drop for RecursionGuard<'a> {
150    fn drop(&mut self) {
151        self.context.exit();
152    }
153}
154
155/// RAII guard for reference stack tracking
156pub struct ReferenceStackGuard<'a> {
157    context: &'a mut StackSafeContext,
158}
159
160impl<'a> ReferenceStackGuard<'a> {
161    /// Create a new reference stack guard
162    pub fn new(context: &'a mut StackSafeContext, obj_num: u32, gen_num: u16) -> ParseResult<Self> {
163        context.push_ref(obj_num, gen_num)?;
164        Ok(Self { context })
165    }
166}
167
168impl<'a> Drop for ReferenceStackGuard<'a> {
169    fn drop(&mut self) {
170        self.context.pop_ref();
171    }
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    // ==================== StackSafeContext Tests ====================
179
180    #[test]
181    fn test_stack_safe_context_new() {
182        let context = StackSafeContext::new();
183        assert_eq!(context.depth, 0);
184        assert_eq!(context.max_depth, MAX_RECURSION_DEPTH);
185        assert!(context.active_stack.is_empty());
186        assert!(context.completed_refs.is_empty());
187    }
188
189    #[test]
190    fn test_stack_safe_context_default() {
191        let context = StackSafeContext::default();
192        assert_eq!(context.depth, 0);
193        assert_eq!(context.max_depth, MAX_RECURSION_DEPTH);
194    }
195
196    #[test]
197    fn test_stack_safe_context_with_limits() {
198        let context = StackSafeContext::with_limits(50, 30);
199        assert_eq!(context.depth, 0);
200        assert_eq!(context.max_depth, 50);
201        assert_eq!(context.timeout.as_secs(), 30);
202    }
203
204    #[test]
205    fn test_recursion_limits() {
206        let mut context = StackSafeContext::with_limits(3, 60);
207
208        // Should work within limits
209        assert!(context.enter().is_ok());
210        assert_eq!(context.depth, 1);
211
212        assert!(context.enter().is_ok());
213        assert_eq!(context.depth, 2);
214
215        assert!(context.enter().is_ok());
216        assert_eq!(context.depth, 3);
217
218        // Should fail when exceeding limit
219        assert!(context.enter().is_err());
220
221        // Test exit
222        context.exit();
223        assert_eq!(context.depth, 2);
224    }
225
226    #[test]
227    fn test_enter_increments_depth() {
228        let mut context = StackSafeContext::new();
229        assert_eq!(context.depth, 0);
230
231        context.enter().unwrap();
232        assert_eq!(context.depth, 1);
233
234        context.enter().unwrap();
235        assert_eq!(context.depth, 2);
236    }
237
238    #[test]
239    fn test_exit_decrements_depth() {
240        let mut context = StackSafeContext::new();
241        context.enter().unwrap();
242        context.enter().unwrap();
243        assert_eq!(context.depth, 2);
244
245        context.exit();
246        assert_eq!(context.depth, 1);
247
248        context.exit();
249        assert_eq!(context.depth, 0);
250    }
251
252    #[test]
253    fn test_exit_at_zero_does_not_underflow() {
254        let mut context = StackSafeContext::new();
255        assert_eq!(context.depth, 0);
256
257        context.exit(); // Should not underflow
258        assert_eq!(context.depth, 0);
259
260        context.exit(); // Multiple exits at zero
261        assert_eq!(context.depth, 0);
262    }
263
264    #[test]
265    fn test_cycle_detection() {
266        let mut context = StackSafeContext::new();
267
268        // First push should work
269        assert!(context.push_ref(1, 0).is_ok());
270
271        // Second push of same ref should fail (circular)
272        assert!(context.push_ref(1, 0).is_err());
273
274        // Different ref should work
275        assert!(context.push_ref(2, 0).is_ok());
276
277        // Pop refs
278        context.pop_ref(); // pops 2,0
279        context.pop_ref(); // pops 1,0
280
281        // Now we can push 1,0 again
282        assert!(context.push_ref(1, 0).is_ok());
283    }
284
285    #[test]
286    fn test_push_ref_adds_to_active_stack() {
287        let mut context = StackSafeContext::new();
288        assert!(context.active_stack.is_empty());
289
290        context.push_ref(10, 5).unwrap();
291        assert_eq!(context.active_stack.len(), 1);
292        assert!(context.active_stack.contains(&(10, 5)));
293    }
294
295    #[test]
296    fn test_pop_ref_marks_as_completed() {
297        let mut context = StackSafeContext::new();
298        context.push_ref(7, 3).unwrap();
299        assert!(context.completed_refs.is_empty());
300
301        context.pop_ref();
302        assert!(context.active_stack.is_empty());
303        assert!(context.completed_refs.contains(&(7, 3)));
304    }
305
306    #[test]
307    fn test_pop_ref_on_empty_stack() {
308        let mut context = StackSafeContext::new();
309        assert!(context.active_stack.is_empty());
310
311        // Should not panic
312        context.pop_ref();
313        assert!(context.active_stack.is_empty());
314    }
315
316    #[test]
317    fn test_multiple_refs_stack_order() {
318        let mut context = StackSafeContext::new();
319        context.push_ref(1, 0).unwrap();
320        context.push_ref(2, 0).unwrap();
321        context.push_ref(3, 0).unwrap();
322
323        assert_eq!(context.active_stack.len(), 3);
324
325        context.pop_ref(); // pops 3,0
326        assert!(context.completed_refs.contains(&(3, 0)));
327        assert!(!context.completed_refs.contains(&(2, 0)));
328        assert!(!context.completed_refs.contains(&(1, 0)));
329
330        context.pop_ref(); // pops 2,0
331        assert!(context.completed_refs.contains(&(2, 0)));
332
333        context.pop_ref(); // pops 1,0
334        assert!(context.completed_refs.contains(&(1, 0)));
335    }
336
337    #[test]
338    fn test_check_timeout_within_limit() {
339        let context = StackSafeContext::with_limits(100, 60);
340        // Should be well within timeout
341        assert!(context.check_timeout().is_ok());
342    }
343
344    #[test]
345    fn test_child_context() {
346        let mut context = StackSafeContext::with_limits(50, 30);
347        context.enter().unwrap();
348        context.enter().unwrap();
349        context.push_ref(5, 0).unwrap();
350        context.pop_ref(); // completes 5,0
351
352        let child = context.child();
353        assert_eq!(child.depth, context.depth);
354        assert_eq!(child.max_depth, context.max_depth);
355        assert!(child.completed_refs.contains(&(5, 0)));
356    }
357
358    #[test]
359    fn test_child_context_is_independent() {
360        let mut context = StackSafeContext::new();
361        context.enter().unwrap();
362
363        let child = context.child();
364        assert_eq!(child.depth, 1);
365
366        // Original context still has depth 1
367        context.exit();
368        assert_eq!(context.depth, 0);
369        // Child should still be at 1 (cloned state)
370    }
371
372    #[test]
373    fn test_different_generation_numbers() {
374        let mut context = StackSafeContext::new();
375        // Same object number but different generations
376        context.push_ref(1, 0).unwrap();
377        context.push_ref(1, 1).unwrap(); // Different gen
378        context.push_ref(1, 2).unwrap(); // Different gen
379
380        assert_eq!(context.active_stack.len(), 3);
381    }
382
383    // ==================== RecursionGuard Tests ====================
384
385    #[test]
386    fn test_recursion_guard() {
387        let mut context = StackSafeContext::new();
388        assert_eq!(context.depth, 0);
389
390        {
391            let _guard = RecursionGuard::new(&mut context).unwrap();
392            // Can't access context.depth while guard is active due to borrow checker
393        }
394
395        // Should auto-exit when guard drops
396        assert_eq!(context.depth, 0);
397    }
398
399    #[test]
400    fn test_recursion_guard_nesting() {
401        let mut context = StackSafeContext::with_limits(10, 60);
402
403        {
404            let _guard1 = RecursionGuard::new(&mut context).unwrap();
405            // depth is 1 but can't access directly
406        }
407        assert_eq!(context.depth, 0);
408
409        // Manually test multiple enters/exits
410        context.enter().unwrap();
411        context.enter().unwrap();
412        assert_eq!(context.depth, 2);
413    }
414
415    #[test]
416    fn test_recursion_guard_fails_at_limit() {
417        let mut context = StackSafeContext::with_limits(1, 60);
418        context.enter().unwrap(); // Already at limit
419
420        let result = RecursionGuard::new(&mut context);
421        assert!(result.is_err());
422    }
423
424    // ==================== ReferenceStackGuard Tests ====================
425
426    #[test]
427    fn test_reference_stack_guard() {
428        let mut context = StackSafeContext::new();
429
430        {
431            let _guard = ReferenceStackGuard::new(&mut context, 1, 0).unwrap();
432            // Reference is in active stack while guard is active
433            // Note: Can't check stack length here due to borrow checker constraints
434        }
435
436        // Should auto-pop when guard drops
437        assert_eq!(context.active_stack.len(), 0);
438        assert!(context.completed_refs.contains(&(1, 0)));
439
440        // Can visit again after guard is dropped
441        assert!(context.push_ref(1, 0).is_ok());
442    }
443
444    #[test]
445    fn test_reference_stack_guard_circular_detection() {
446        let mut context = StackSafeContext::new();
447        context.push_ref(5, 0).unwrap();
448
449        // Should fail - circular reference
450        let result = ReferenceStackGuard::new(&mut context, 5, 0);
451        assert!(result.is_err());
452    }
453
454    // ==================== Constants Tests ====================
455
456    #[test]
457    fn test_constants() {
458        assert_eq!(MAX_RECURSION_DEPTH, 1000);
459        assert_eq!(PARSING_TIMEOUT_SECS, 120);
460    }
461
462    // ==================== Debug Tests ====================
463
464    #[test]
465    fn test_stack_safe_context_debug() {
466        let context = StackSafeContext::new();
467        let debug_str = format!("{:?}", context);
468        assert!(debug_str.contains("StackSafeContext"));
469        assert!(debug_str.contains("depth: 0"));
470    }
471
472    // ==================== Integration Tests ====================
473
474    #[test]
475    fn test_deep_recursion_simulation() {
476        let mut context = StackSafeContext::with_limits(100, 60);
477
478        // Simulate deep recursion
479        for i in 0..100 {
480            assert!(context.enter().is_ok(), "Failed at depth {}", i);
481        }
482        assert_eq!(context.depth, 100);
483
484        // Should fail at 101
485        assert!(context.enter().is_err());
486
487        // Unwind
488        for _ in 0..100 {
489            context.exit();
490        }
491        assert_eq!(context.depth, 0);
492    }
493
494    #[test]
495    fn test_complex_reference_scenario() {
496        let mut context = StackSafeContext::new();
497
498        // Simulate traversing a PDF object tree
499        context.push_ref(1, 0).unwrap(); // Enter root
500        context.push_ref(2, 0).unwrap(); // Enter child
501        context.push_ref(3, 0).unwrap(); // Enter grandchild
502
503        // Can't reference grandchild again (circular)
504        assert!(context.push_ref(3, 0).is_err());
505
506        // But can reference a different object
507        context.push_ref(4, 0).unwrap();
508
509        // Pop all
510        context.pop_ref();
511        context.pop_ref();
512        context.pop_ref();
513        context.pop_ref();
514
515        // All should be completed
516        assert!(context.completed_refs.contains(&(1, 0)));
517        assert!(context.completed_refs.contains(&(2, 0)));
518        assert!(context.completed_refs.contains(&(3, 0)));
519        assert!(context.completed_refs.contains(&(4, 0)));
520
521        // Can now revisit any
522        context.push_ref(3, 0).unwrap();
523    }
524}