Skip to main content

oxidize_pdf/parser/
stack_safe.rs

1//! Stack-safe parsing utilities
2//!
3//! This module provides utilities for parsing deeply nested PDF structures
4//! without risking stack overflow. It implements recursion limits and
5//! iterative alternatives to recursive algorithms.
6
7use super::{ParseError, ParseResult};
8use std::collections::HashSet;
9
10/// Maximum recursion depth for PDF parsing operations
11pub const MAX_RECURSION_DEPTH: usize = 1000;
12
13/// Timeout for long-running parsing operations (in seconds)
14pub const PARSING_TIMEOUT_SECS: u64 = 120; // Aumentado para documentos complejos
15
16/// Stack-safe parsing context
17#[derive(Debug)]
18pub struct StackSafeContext {
19    /// Current recursion depth
20    pub depth: usize,
21    /// Maximum allowed depth
22    pub max_depth: usize,
23    /// Pila de referencias activas (para detectar ciclos reales)
24    pub active_stack: Vec<(u32, u16)>,
25    /// Referencias completamente procesadas (no son ciclos)
26    pub completed_refs: HashSet<(u32, u16)>,
27    /// Start time for timeout tracking
28    #[cfg(not(target_arch = "wasm32"))]
29    pub start_time: std::time::Instant,
30    /// Timeout duration
31    #[cfg(not(target_arch = "wasm32"))]
32    pub timeout: std::time::Duration,
33}
34
35impl Default for StackSafeContext {
36    fn default() -> Self {
37        Self::new()
38    }
39}
40
41impl StackSafeContext {
42    /// Create a new stack-safe context
43    pub fn new() -> Self {
44        Self {
45            depth: 0,
46            max_depth: MAX_RECURSION_DEPTH,
47            active_stack: Vec::new(),
48            completed_refs: HashSet::new(),
49            #[cfg(not(target_arch = "wasm32"))]
50            start_time: std::time::Instant::now(),
51            #[cfg(not(target_arch = "wasm32"))]
52            timeout: std::time::Duration::from_secs(PARSING_TIMEOUT_SECS),
53        }
54    }
55
56    /// Create a new context with custom limits
57    ///
58    /// On `wasm32-unknown-unknown`, the `timeout_secs` parameter is ignored
59    /// because `std::time::Instant` is not available. Only the recursion
60    /// depth limit is enforced.
61    #[allow(unused_variables)]
62    pub fn with_limits(max_depth: usize, timeout_secs: u64) -> Self {
63        Self {
64            depth: 0,
65            max_depth,
66            active_stack: Vec::new(),
67            completed_refs: HashSet::new(),
68            #[cfg(not(target_arch = "wasm32"))]
69            start_time: std::time::Instant::now(),
70            #[cfg(not(target_arch = "wasm32"))]
71            timeout: std::time::Duration::from_secs(timeout_secs),
72        }
73    }
74
75    /// Enter a new recursion level
76    pub fn enter(&mut self) -> ParseResult<()> {
77        if self.depth + 1 > self.max_depth {
78            return Err(ParseError::SyntaxError {
79                position: 0,
80                message: format!(
81                    "Maximum recursion depth exceeded: {} (limit: {})",
82                    self.depth + 1,
83                    self.max_depth
84                ),
85            });
86        }
87        self.depth += 1;
88        self.check_timeout()?;
89        Ok(())
90    }
91
92    /// Exit a recursion level
93    pub fn exit(&mut self) {
94        if self.depth > 0 {
95            self.depth -= 1;
96        }
97    }
98
99    /// Push a reference onto the active stack (for cycle detection)
100    pub fn push_ref(&mut self, obj_num: u32, gen_num: u16) -> ParseResult<()> {
101        let ref_key = (obj_num, gen_num);
102
103        // Check if it's already in the active stack (real circular reference)
104        if self.active_stack.contains(&ref_key) {
105            return Err(ParseError::SyntaxError {
106                position: 0,
107                message: format!("Circular reference detected: {obj_num} {gen_num} R"),
108            });
109        }
110
111        // It's OK if it was already processed completely
112        self.active_stack.push(ref_key);
113        Ok(())
114    }
115
116    /// Pop a reference from the active stack and mark as completed
117    pub fn pop_ref(&mut self) {
118        if let Some(ref_key) = self.active_stack.pop() {
119            self.completed_refs.insert(ref_key);
120        }
121    }
122
123    /// Check if parsing has timed out
124    ///
125    /// On `wasm32-unknown-unknown` this is a no-op — only the recursion depth
126    /// limit protects against runaway parsing.
127    pub fn check_timeout(&self) -> ParseResult<()> {
128        #[cfg(not(target_arch = "wasm32"))]
129        {
130            if self.start_time.elapsed() > self.timeout {
131                return Err(ParseError::SyntaxError {
132                    position: 0,
133                    message: format!("Parsing timeout exceeded: {}s", self.timeout.as_secs()),
134                });
135            }
136        }
137        Ok(())
138    }
139
140    /// Create a child context for nested operations
141    pub fn child(&self) -> Self {
142        Self {
143            depth: self.depth,
144            max_depth: self.max_depth,
145            active_stack: self.active_stack.clone(),
146            completed_refs: self.completed_refs.clone(),
147            #[cfg(not(target_arch = "wasm32"))]
148            start_time: self.start_time,
149            #[cfg(not(target_arch = "wasm32"))]
150            timeout: self.timeout,
151        }
152    }
153}
154
155/// RAII guard for recursion depth tracking
156pub struct RecursionGuard<'a> {
157    context: &'a mut StackSafeContext,
158}
159
160impl<'a> RecursionGuard<'a> {
161    /// Create a new recursion guard
162    pub fn new(context: &'a mut StackSafeContext) -> ParseResult<Self> {
163        context.enter()?;
164        Ok(Self { context })
165    }
166}
167
168impl<'a> Drop for RecursionGuard<'a> {
169    fn drop(&mut self) {
170        self.context.exit();
171    }
172}
173
174/// RAII guard for reference stack tracking
175pub struct ReferenceStackGuard<'a> {
176    context: &'a mut StackSafeContext,
177}
178
179impl<'a> ReferenceStackGuard<'a> {
180    /// Create a new reference stack guard
181    pub fn new(context: &'a mut StackSafeContext, obj_num: u32, gen_num: u16) -> ParseResult<Self> {
182        context.push_ref(obj_num, gen_num)?;
183        Ok(Self { context })
184    }
185}
186
187impl<'a> Drop for ReferenceStackGuard<'a> {
188    fn drop(&mut self) {
189        self.context.pop_ref();
190    }
191}
192
193#[cfg(test)]
194mod tests {
195    use super::*;
196
197    // ==================== StackSafeContext Tests ====================
198
199    #[test]
200    fn test_stack_safe_context_new() {
201        let context = StackSafeContext::new();
202        assert_eq!(context.depth, 0);
203        assert_eq!(context.max_depth, MAX_RECURSION_DEPTH);
204        assert!(context.active_stack.is_empty());
205        assert!(context.completed_refs.is_empty());
206    }
207
208    #[test]
209    fn test_stack_safe_context_default() {
210        let context = StackSafeContext::default();
211        assert_eq!(context.depth, 0);
212        assert_eq!(context.max_depth, MAX_RECURSION_DEPTH);
213    }
214
215    #[test]
216    fn test_stack_safe_context_with_limits() {
217        let context = StackSafeContext::with_limits(50, 30);
218        assert_eq!(context.depth, 0);
219        assert_eq!(context.max_depth, 50);
220        #[cfg(not(target_arch = "wasm32"))]
221        assert_eq!(context.timeout.as_secs(), 30);
222    }
223
224    #[test]
225    fn test_recursion_limits() {
226        let mut context = StackSafeContext::with_limits(3, 60);
227
228        // Should work within limits
229        assert!(context.enter().is_ok());
230        assert_eq!(context.depth, 1);
231
232        assert!(context.enter().is_ok());
233        assert_eq!(context.depth, 2);
234
235        assert!(context.enter().is_ok());
236        assert_eq!(context.depth, 3);
237
238        // Should fail when exceeding limit
239        assert!(context.enter().is_err());
240
241        // Test exit
242        context.exit();
243        assert_eq!(context.depth, 2);
244    }
245
246    #[test]
247    fn test_enter_increments_depth() {
248        let mut context = StackSafeContext::new();
249        assert_eq!(context.depth, 0);
250
251        context.enter().unwrap();
252        assert_eq!(context.depth, 1);
253
254        context.enter().unwrap();
255        assert_eq!(context.depth, 2);
256    }
257
258    #[test]
259    fn test_exit_decrements_depth() {
260        let mut context = StackSafeContext::new();
261        context.enter().unwrap();
262        context.enter().unwrap();
263        assert_eq!(context.depth, 2);
264
265        context.exit();
266        assert_eq!(context.depth, 1);
267
268        context.exit();
269        assert_eq!(context.depth, 0);
270    }
271
272    #[test]
273    fn test_exit_at_zero_does_not_underflow() {
274        let mut context = StackSafeContext::new();
275        assert_eq!(context.depth, 0);
276
277        context.exit(); // Should not underflow
278        assert_eq!(context.depth, 0);
279
280        context.exit(); // Multiple exits at zero
281        assert_eq!(context.depth, 0);
282    }
283
284    #[test]
285    fn test_cycle_detection() {
286        let mut context = StackSafeContext::new();
287
288        // First push should work
289        assert!(context.push_ref(1, 0).is_ok());
290
291        // Second push of same ref should fail (circular)
292        assert!(context.push_ref(1, 0).is_err());
293
294        // Different ref should work
295        assert!(context.push_ref(2, 0).is_ok());
296
297        // Pop refs
298        context.pop_ref(); // pops 2,0
299        context.pop_ref(); // pops 1,0
300
301        // Now we can push 1,0 again
302        assert!(context.push_ref(1, 0).is_ok());
303    }
304
305    #[test]
306    fn test_push_ref_adds_to_active_stack() {
307        let mut context = StackSafeContext::new();
308        assert!(context.active_stack.is_empty());
309
310        context.push_ref(10, 5).unwrap();
311        assert_eq!(context.active_stack.len(), 1);
312        assert!(context.active_stack.contains(&(10, 5)));
313    }
314
315    #[test]
316    fn test_pop_ref_marks_as_completed() {
317        let mut context = StackSafeContext::new();
318        context.push_ref(7, 3).unwrap();
319        assert!(context.completed_refs.is_empty());
320
321        context.pop_ref();
322        assert!(context.active_stack.is_empty());
323        assert!(context.completed_refs.contains(&(7, 3)));
324    }
325
326    #[test]
327    fn test_pop_ref_on_empty_stack() {
328        let mut context = StackSafeContext::new();
329        assert!(context.active_stack.is_empty());
330
331        // Should not panic
332        context.pop_ref();
333        assert!(context.active_stack.is_empty());
334    }
335
336    #[test]
337    fn test_multiple_refs_stack_order() {
338        let mut context = StackSafeContext::new();
339        context.push_ref(1, 0).unwrap();
340        context.push_ref(2, 0).unwrap();
341        context.push_ref(3, 0).unwrap();
342
343        assert_eq!(context.active_stack.len(), 3);
344
345        context.pop_ref(); // pops 3,0
346        assert!(context.completed_refs.contains(&(3, 0)));
347        assert!(!context.completed_refs.contains(&(2, 0)));
348        assert!(!context.completed_refs.contains(&(1, 0)));
349
350        context.pop_ref(); // pops 2,0
351        assert!(context.completed_refs.contains(&(2, 0)));
352
353        context.pop_ref(); // pops 1,0
354        assert!(context.completed_refs.contains(&(1, 0)));
355    }
356
357    #[test]
358    fn test_check_timeout_within_limit() {
359        let context = StackSafeContext::with_limits(100, 60);
360        // Should be well within timeout
361        assert!(context.check_timeout().is_ok());
362    }
363
364    #[test]
365    fn test_child_context() {
366        let mut context = StackSafeContext::with_limits(50, 30);
367        context.enter().unwrap();
368        context.enter().unwrap();
369        context.push_ref(5, 0).unwrap();
370        context.pop_ref(); // completes 5,0
371
372        let child = context.child();
373        assert_eq!(child.depth, context.depth);
374        assert_eq!(child.max_depth, context.max_depth);
375        assert!(child.completed_refs.contains(&(5, 0)));
376    }
377
378    #[test]
379    fn test_child_context_is_independent() {
380        let mut context = StackSafeContext::new();
381        context.enter().unwrap();
382
383        let child = context.child();
384        assert_eq!(child.depth, 1);
385
386        // Original context still has depth 1
387        context.exit();
388        assert_eq!(context.depth, 0);
389        // Child should still be at 1 (cloned state)
390    }
391
392    #[test]
393    fn test_different_generation_numbers() {
394        let mut context = StackSafeContext::new();
395        // Same object number but different generations
396        context.push_ref(1, 0).unwrap();
397        context.push_ref(1, 1).unwrap(); // Different gen
398        context.push_ref(1, 2).unwrap(); // Different gen
399
400        assert_eq!(context.active_stack.len(), 3);
401    }
402
403    // ==================== RecursionGuard Tests ====================
404
405    #[test]
406    fn test_recursion_guard() {
407        let mut context = StackSafeContext::new();
408        assert_eq!(context.depth, 0);
409
410        {
411            let _guard = RecursionGuard::new(&mut context).unwrap();
412            // Can't access context.depth while guard is active due to borrow checker
413        }
414
415        // Should auto-exit when guard drops
416        assert_eq!(context.depth, 0);
417    }
418
419    #[test]
420    fn test_recursion_guard_nesting() {
421        let mut context = StackSafeContext::with_limits(10, 60);
422
423        {
424            let _guard1 = RecursionGuard::new(&mut context).unwrap();
425            // depth is 1 but can't access directly
426        }
427        assert_eq!(context.depth, 0);
428
429        // Manually test multiple enters/exits
430        context.enter().unwrap();
431        context.enter().unwrap();
432        assert_eq!(context.depth, 2);
433    }
434
435    #[test]
436    fn test_recursion_guard_fails_at_limit() {
437        let mut context = StackSafeContext::with_limits(1, 60);
438        context.enter().unwrap(); // Already at limit
439
440        let result = RecursionGuard::new(&mut context);
441        assert!(result.is_err());
442    }
443
444    // ==================== ReferenceStackGuard Tests ====================
445
446    #[test]
447    fn test_reference_stack_guard() {
448        let mut context = StackSafeContext::new();
449
450        {
451            let _guard = ReferenceStackGuard::new(&mut context, 1, 0).unwrap();
452            // Reference is in active stack while guard is active
453            // Note: Can't check stack length here due to borrow checker constraints
454        }
455
456        // Should auto-pop when guard drops
457        assert_eq!(context.active_stack.len(), 0);
458        assert!(context.completed_refs.contains(&(1, 0)));
459
460        // Can visit again after guard is dropped
461        assert!(context.push_ref(1, 0).is_ok());
462    }
463
464    #[test]
465    fn test_reference_stack_guard_circular_detection() {
466        let mut context = StackSafeContext::new();
467        context.push_ref(5, 0).unwrap();
468
469        // Should fail - circular reference
470        let result = ReferenceStackGuard::new(&mut context, 5, 0);
471        assert!(result.is_err());
472    }
473
474    // ==================== Constants Tests ====================
475
476    #[test]
477    fn test_constants() {
478        assert_eq!(MAX_RECURSION_DEPTH, 1000);
479        assert_eq!(PARSING_TIMEOUT_SECS, 120);
480    }
481
482    // ==================== Debug Tests ====================
483
484    #[test]
485    fn test_stack_safe_context_debug() {
486        let context = StackSafeContext::new();
487        let debug_str = format!("{:?}", context);
488        assert!(debug_str.contains("StackSafeContext"));
489        assert!(debug_str.contains("depth: 0"));
490    }
491
492    // ==================== Integration Tests ====================
493
494    #[test]
495    fn test_deep_recursion_simulation() {
496        let mut context = StackSafeContext::with_limits(100, 60);
497
498        // Simulate deep recursion
499        for i in 0..100 {
500            assert!(context.enter().is_ok(), "Failed at depth {}", i);
501        }
502        assert_eq!(context.depth, 100);
503
504        // Should fail at 101
505        assert!(context.enter().is_err());
506
507        // Unwind
508        for _ in 0..100 {
509            context.exit();
510        }
511        assert_eq!(context.depth, 0);
512    }
513
514    #[test]
515    fn test_complex_reference_scenario() {
516        let mut context = StackSafeContext::new();
517
518        // Simulate traversing a PDF object tree
519        context.push_ref(1, 0).unwrap(); // Enter root
520        context.push_ref(2, 0).unwrap(); // Enter child
521        context.push_ref(3, 0).unwrap(); // Enter grandchild
522
523        // Can't reference grandchild again (circular)
524        assert!(context.push_ref(3, 0).is_err());
525
526        // But can reference a different object
527        context.push_ref(4, 0).unwrap();
528
529        // Pop all
530        context.pop_ref();
531        context.pop_ref();
532        context.pop_ref();
533        context.pop_ref();
534
535        // All should be completed
536        assert!(context.completed_refs.contains(&(1, 0)));
537        assert!(context.completed_refs.contains(&(2, 0)));
538        assert!(context.completed_refs.contains(&(3, 0)));
539        assert!(context.completed_refs.contains(&(4, 0)));
540
541        // Can now revisit any
542        context.push_ref(3, 0).unwrap();
543    }
544}