oxidize_pdf/parser/
stack_safe.rs

1//! Stack-safe parsing utilities
2//!
3//! This module provides utilities for parsing deeply nested PDF structures
4//! without risking stack overflow. It implements recursion limits and
5//! iterative alternatives to recursive algorithms.
6
7use super::{ParseError, ParseResult};
8use std::collections::HashSet;
9
10/// Maximum recursion depth for PDF parsing operations
11pub const MAX_RECURSION_DEPTH: usize = 1000;
12
13/// Timeout for long-running parsing operations (in seconds)
14pub const PARSING_TIMEOUT_SECS: u64 = 30;
15
16/// Stack-safe parsing context
17#[derive(Debug)]
18pub struct StackSafeContext {
19    /// Current recursion depth
20    pub depth: usize,
21    /// Maximum allowed depth
22    pub max_depth: usize,
23    /// Pila de referencias activas (para detectar ciclos reales)
24    pub active_stack: Vec<(u32, u16)>,
25    /// Referencias completamente procesadas (no son ciclos)
26    pub completed_refs: HashSet<(u32, u16)>,
27    /// Start time for timeout tracking
28    pub start_time: std::time::Instant,
29    /// Timeout duration
30    pub timeout: std::time::Duration,
31}
32
33impl Default for StackSafeContext {
34    fn default() -> Self {
35        Self::new()
36    }
37}
38
39impl StackSafeContext {
40    /// Create a new stack-safe context
41    pub fn new() -> Self {
42        Self {
43            depth: 0,
44            max_depth: MAX_RECURSION_DEPTH,
45            active_stack: Vec::new(),
46            completed_refs: HashSet::new(),
47            start_time: std::time::Instant::now(),
48            timeout: std::time::Duration::from_secs(PARSING_TIMEOUT_SECS),
49        }
50    }
51
52    /// Create a new context with custom limits
53    pub fn with_limits(max_depth: usize, timeout_secs: u64) -> Self {
54        Self {
55            depth: 0,
56            max_depth,
57            active_stack: Vec::new(),
58            completed_refs: HashSet::new(),
59            start_time: std::time::Instant::now(),
60            timeout: std::time::Duration::from_secs(timeout_secs),
61        }
62    }
63
64    /// Enter a new recursion level
65    pub fn enter(&mut self) -> ParseResult<()> {
66        if self.depth + 1 > self.max_depth {
67            return Err(ParseError::SyntaxError {
68                position: 0,
69                message: format!(
70                    "Maximum recursion depth exceeded: {} (limit: {})",
71                    self.depth + 1,
72                    self.max_depth
73                ),
74            });
75        }
76        self.depth += 1;
77        self.check_timeout()?;
78        Ok(())
79    }
80
81    /// Exit a recursion level
82    pub fn exit(&mut self) {
83        if self.depth > 0 {
84            self.depth -= 1;
85        }
86    }
87
88    /// Push a reference onto the active stack (for cycle detection)
89    pub fn push_ref(&mut self, obj_num: u32, gen_num: u16) -> ParseResult<()> {
90        let ref_key = (obj_num, gen_num);
91
92        // Check if it's already in the active stack (real circular reference)
93        if self.active_stack.contains(&ref_key) {
94            return Err(ParseError::SyntaxError {
95                position: 0,
96                message: format!("Circular reference detected: {obj_num} {gen_num} R"),
97            });
98        }
99
100        // It's OK if it was already processed completely
101        self.active_stack.push(ref_key);
102        Ok(())
103    }
104
105    /// Pop a reference from the active stack and mark as completed
106    pub fn pop_ref(&mut self) {
107        if let Some(ref_key) = self.active_stack.pop() {
108            self.completed_refs.insert(ref_key);
109        }
110    }
111
112    /// Check if parsing has timed out
113    pub fn check_timeout(&self) -> ParseResult<()> {
114        if self.start_time.elapsed() > self.timeout {
115            return Err(ParseError::SyntaxError {
116                position: 0,
117                message: format!("Parsing timeout exceeded: {}s", self.timeout.as_secs()),
118            });
119        }
120        Ok(())
121    }
122
123    /// Create a child context for nested operations
124    pub fn child(&self) -> Self {
125        Self {
126            depth: self.depth,
127            max_depth: self.max_depth,
128            active_stack: self.active_stack.clone(),
129            completed_refs: self.completed_refs.clone(),
130            start_time: self.start_time,
131            timeout: self.timeout,
132        }
133    }
134}
135
136/// RAII guard for recursion depth tracking
137pub struct RecursionGuard<'a> {
138    context: &'a mut StackSafeContext,
139}
140
141impl<'a> RecursionGuard<'a> {
142    /// Create a new recursion guard
143    pub fn new(context: &'a mut StackSafeContext) -> ParseResult<Self> {
144        context.enter()?;
145        Ok(Self { context })
146    }
147}
148
149impl<'a> Drop for RecursionGuard<'a> {
150    fn drop(&mut self) {
151        self.context.exit();
152    }
153}
154
155/// RAII guard for reference stack tracking
156pub struct ReferenceStackGuard<'a> {
157    context: &'a mut StackSafeContext,
158}
159
160impl<'a> ReferenceStackGuard<'a> {
161    /// Create a new reference stack guard
162    pub fn new(context: &'a mut StackSafeContext, obj_num: u32, gen_num: u16) -> ParseResult<Self> {
163        context.push_ref(obj_num, gen_num)?;
164        Ok(Self { context })
165    }
166}
167
168impl<'a> Drop for ReferenceStackGuard<'a> {
169    fn drop(&mut self) {
170        self.context.pop_ref();
171    }
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    #[test]
179    fn test_recursion_limits() {
180        let mut context = StackSafeContext::with_limits(3, 60);
181
182        // Should work within limits
183        assert!(context.enter().is_ok());
184        assert_eq!(context.depth, 1);
185
186        assert!(context.enter().is_ok());
187        assert_eq!(context.depth, 2);
188
189        assert!(context.enter().is_ok());
190        assert_eq!(context.depth, 3);
191
192        // Should fail when exceeding limit
193        assert!(context.enter().is_err());
194
195        // Test exit
196        context.exit();
197        assert_eq!(context.depth, 2);
198    }
199
200    #[test]
201    fn test_cycle_detection() {
202        let mut context = StackSafeContext::new();
203
204        // First push should work
205        assert!(context.push_ref(1, 0).is_ok());
206
207        // Second push of same ref should fail (circular)
208        assert!(context.push_ref(1, 0).is_err());
209
210        // Different ref should work
211        assert!(context.push_ref(2, 0).is_ok());
212
213        // Pop refs
214        context.pop_ref(); // pops 2,0
215        context.pop_ref(); // pops 1,0
216
217        // Now we can push 1,0 again
218        assert!(context.push_ref(1, 0).is_ok());
219    }
220
221    #[test]
222    fn test_recursion_guard() {
223        let mut context = StackSafeContext::new();
224        assert_eq!(context.depth, 0);
225
226        {
227            let _guard = RecursionGuard::new(&mut context).unwrap();
228            // Can't access context.depth while guard is active due to borrow checker
229        }
230
231        // Should auto-exit when guard drops
232        assert_eq!(context.depth, 0);
233    }
234
235    #[test]
236    fn test_reference_stack_guard() {
237        let mut context = StackSafeContext::new();
238
239        {
240            let _guard = ReferenceStackGuard::new(&mut context, 1, 0).unwrap();
241            // Reference is in active stack while guard is active
242            // Note: Can't check stack length here due to borrow checker constraints
243        }
244
245        // Should auto-pop when guard drops
246        assert_eq!(context.active_stack.len(), 0);
247        assert!(context.completed_refs.contains(&(1, 0)));
248
249        // Can visit again after guard is dropped
250        assert!(context.push_ref(1, 0).is_ok());
251    }
252}