Skip to main content

pdf_engine/
limits.rs

1//! Resource limits for PDF processing.
2//!
3//! These limits protect against adversarial inputs that could cause OOM, stack overflow,
4//! CPU exhaustion, or zip bombs. All limits are enforced with clean error returns — no panics.
5//!
6//! # Default limits
7//!
8//! The defaults cover 99.9% of real-world PDFs while providing strong safety guarantees:
9//!
10//! | Resource | Default |
11//! |---|---|
12//! | PDF file size | 500 MB |
13//! | Single decompressed stream | 256 MB |
14//! | Total memory per document | 1 GB |
15//! | Object reference depth | 100 levels |
16//! | Content stream operators | 10,000,000 |
17//! | Image pixel count | 256 megapixels (16384×16384) |
18//! | XFA template nesting depth | 50 levels |
19//! | FormCalc recursion depth | 200 levels |
20
21/// Resource limits for a single PDF processing operation.
22///
23/// Construct via [`ProcessingLimits::default()`] for standard limits,
24/// or use the builder methods to customize for your use case.
25///
26/// # Examples
27///
28/// ```rust
29/// use pdf_engine::limits::ProcessingLimits;
30///
31/// // Default limits (recommended for server-side processing):
32/// let limits = ProcessingLimits::default();
33///
34/// // Stricter limits for WASM/browser context:
35/// let wasm_limits = ProcessingLimits::wasm();
36///
37/// // Custom limits:
38/// let custom = ProcessingLimits::default()
39///     .max_file_bytes(100 * 1024 * 1024)   // 100 MB
40///     .max_stream_bytes(64 * 1024 * 1024);  // 64 MB per stream
41/// ```
42#[derive(Debug, Clone)]
43pub struct ProcessingLimits {
44    /// Maximum PDF file size in bytes. Default: 500 MB.
45    pub max_file_bytes: u64,
46    /// Maximum decompressed size of any single stream. Default: 256 MB.
47    /// Prevents zip bombs via FlateDecode or LZWDecode.
48    pub max_stream_bytes: u64,
49    /// Maximum total memory allocated per document. Default: 1 GB.
50    pub max_total_memory_bytes: u64,
51    /// Maximum object reference depth (prevents stack overflow on recursive refs). Default: 100.
52    pub max_object_depth: u32,
53    /// Maximum content stream operators per page. Default: 10,000,000.
54    pub max_operator_count: u64,
55    /// Maximum pixel count per image (width × height). Default: 268,435,456 (16384²).
56    pub max_image_pixels: u64,
57    /// Maximum XFA template XML nesting depth. Default: 50.
58    pub max_xfa_nesting_depth: u32,
59    /// Maximum FormCalc recursion depth. Default: 200.
60    pub max_formcalc_depth: u32,
61}
62
63impl Default for ProcessingLimits {
64    fn default() -> Self {
65        Self {
66            max_file_bytes: 500 * 1024 * 1024,          // 500 MB
67            max_stream_bytes: 256 * 1024 * 1024,        // 256 MB
68            max_total_memory_bytes: 1024 * 1024 * 1024, // 1 GB
69            max_object_depth: 100,
70            max_operator_count: 10_000_000,
71            max_image_pixels: 16384 * 16384, // 268 MP
72            max_xfa_nesting_depth: 50,
73            max_formcalc_depth: 200,
74        }
75    }
76}
77
78impl ProcessingLimits {
79    /// Create a new set of limits with default values.
80    pub fn new() -> Self {
81        Self::default()
82    }
83
84    /// Strict limits for WASM/browser contexts with limited memory.
85    ///
86    /// - Max file: 50 MB
87    /// - Max stream: 32 MB
88    /// - Max total memory: 128 MB
89    /// - Image pixels: 64 MP (8192×8192)
90    pub fn wasm() -> Self {
91        Self {
92            max_file_bytes: 50 * 1024 * 1024,          // 50 MB
93            max_stream_bytes: 32 * 1024 * 1024,        // 32 MB
94            max_total_memory_bytes: 128 * 1024 * 1024, // 128 MB
95            max_object_depth: 50,
96            max_operator_count: 1_000_000,
97            max_image_pixels: 8192 * 8192, // 64 MP
98            max_xfa_nesting_depth: 30,
99            max_formcalc_depth: 100,
100        }
101    }
102
103    /// Unlimited: no resource limits (use only in trusted environments).
104    pub fn unlimited() -> Self {
105        Self {
106            max_file_bytes: u64::MAX,
107            max_stream_bytes: u64::MAX,
108            max_total_memory_bytes: u64::MAX,
109            max_object_depth: u32::MAX,
110            max_operator_count: u64::MAX,
111            max_image_pixels: u64::MAX,
112            max_xfa_nesting_depth: u32::MAX,
113            max_formcalc_depth: u32::MAX,
114        }
115    }
116
117    /// Set maximum PDF file size.
118    pub fn max_file_bytes(mut self, bytes: u64) -> Self {
119        self.max_file_bytes = bytes;
120        self
121    }
122
123    /// Set maximum decompressed stream size.
124    pub fn max_stream_bytes(mut self, bytes: u64) -> Self {
125        self.max_stream_bytes = bytes;
126        self
127    }
128
129    /// Set maximum total memory per document.
130    pub fn max_total_memory_bytes(mut self, bytes: u64) -> Self {
131        self.max_total_memory_bytes = bytes;
132        self
133    }
134
135    /// Set maximum object reference depth.
136    pub fn max_object_depth(mut self, depth: u32) -> Self {
137        self.max_object_depth = depth;
138        self
139    }
140
141    /// Set maximum content stream operator count.
142    pub fn max_operator_count(mut self, count: u64) -> Self {
143        self.max_operator_count = count;
144        self
145    }
146
147    /// Set maximum image pixel count (width × height).
148    pub fn max_image_pixels(mut self, pixels: u64) -> Self {
149        self.max_image_pixels = pixels;
150        self
151    }
152
153    /// Set maximum XFA template nesting depth.
154    pub fn max_xfa_nesting_depth(mut self, depth: u32) -> Self {
155        self.max_xfa_nesting_depth = depth;
156        self
157    }
158
159    /// Set maximum FormCalc recursion depth.
160    pub fn max_formcalc_depth(mut self, depth: u32) -> Self {
161        self.max_formcalc_depth = depth;
162        self
163    }
164
165    /// Check if a file size is within limits. Returns `Err` with a descriptive message if exceeded.
166    pub fn check_file_size(&self, bytes: u64) -> Result<(), LimitError> {
167        if bytes > self.max_file_bytes {
168            Err(LimitError::FileTooLarge {
169                actual_bytes: bytes,
170                limit_bytes: self.max_file_bytes,
171            })
172        } else {
173            Ok(())
174        }
175    }
176
177    /// Check if a decompressed stream size is within limits.
178    pub fn check_stream_size(&self, bytes: u64) -> Result<(), LimitError> {
179        if bytes > self.max_stream_bytes {
180            Err(LimitError::StreamTooLarge {
181                actual_bytes: bytes,
182                limit_bytes: self.max_stream_bytes,
183            })
184        } else {
185            Ok(())
186        }
187    }
188
189    /// Check if image dimensions are within limits.
190    pub fn check_image_pixels(&self, width: u64, height: u64) -> Result<(), LimitError> {
191        let pixels = width.saturating_mul(height);
192        if pixels > self.max_image_pixels {
193            Err(LimitError::ImageTooLarge {
194                width,
195                height,
196                pixels,
197                limit_pixels: self.max_image_pixels,
198            })
199        } else {
200            Ok(())
201        }
202    }
203
204    /// Check if an object depth is within limits.
205    pub fn check_object_depth(&self, depth: u32) -> Result<(), LimitError> {
206        if depth > self.max_object_depth {
207            Err(LimitError::ObjectDepthExceeded {
208                depth,
209                limit: self.max_object_depth,
210            })
211        } else {
212            Ok(())
213        }
214    }
215}
216
217/// Error returned when a configured resource limit is exceeded.
218///
219/// A `LimitError` is a hard stop on the current operation — the caller
220/// should either raise the relevant cap on the [`ProcessingLimits`] used
221/// to construct the engine, or refuse the input. All variants carry both
222/// the observed value and the limit so callers can produce useful error
223/// messages without re-running detection.
224#[derive(Debug, Clone, PartialEq, Eq)]
225pub enum LimitError {
226    /// PDF file size exceeded [`ProcessingLimits::max_file_bytes`].
227    /// Defends against trivially-DoS-ing the parser with an oversized
228    /// input.
229    FileTooLarge {
230        /// Observed file size in bytes.
231        actual_bytes: u64,
232        /// Configured maximum file size in bytes.
233        limit_bytes: u64,
234    },
235    /// A decompressed stream exceeded
236    /// [`ProcessingLimits::max_stream_bytes`]. Defends against
237    /// "decompression bomb" inputs (Flate, LZW, ASCII85) that expand
238    /// far beyond their compressed footprint.
239    StreamTooLarge {
240        /// Observed decompressed stream size in bytes.
241        actual_bytes: u64,
242        /// Configured maximum stream size in bytes.
243        limit_bytes: u64,
244    },
245    /// An image XObject exceeded [`ProcessingLimits::max_image_pixels`].
246    /// `pixels = width * height` regardless of color depth. Guards the
247    /// memory used by rasterization and color-space conversion.
248    ImageTooLarge {
249        /// Observed image width in pixels.
250        width: u64,
251        /// Observed image height in pixels.
252        height: u64,
253        /// Observed total pixel count (`width * height`).
254        pixels: u64,
255        /// Configured maximum pixel count.
256        limit_pixels: u64,
257    },
258    /// Indirect-object reference chain exceeded
259    /// [`ProcessingLimits::max_object_depth`]. Defends against cyclic or
260    /// pathologically nested object graphs that would otherwise blow
261    /// the parser stack.
262    ObjectDepthExceeded {
263        /// Observed reference-chain depth.
264        depth: u32,
265        /// Configured maximum reference-chain depth.
266        limit: u32,
267    },
268    /// Content-stream operator count exceeded
269    /// [`ProcessingLimits::max_operator_count`]. Caps per-page
270    /// rendering work so a billion no-op operators can't pin a CPU.
271    TooManyOperators {
272        /// Observed operator count.
273        count: u64,
274        /// Configured maximum operator count.
275        limit: u64,
276    },
277    /// XFA template subform nesting exceeded
278    /// [`ProcessingLimits::max_xfa_nesting_depth`]. Defends the XFA
279    /// layout engine against pathologically nested templates.
280    XfaNestingTooDeep {
281        /// Observed XFA subform nesting depth.
282        depth: u32,
283        /// Configured maximum nesting depth.
284        limit: u32,
285    },
286    /// FormCalc expression recursion exceeded
287    /// [`ProcessingLimits::max_formcalc_depth`]. Stops infinite or
288    /// deeply mutually-recursive expressions from blowing the FormCalc
289    /// interpreter stack.
290    FormCalcRecursionTooDeep {
291        /// Observed recursion depth.
292        depth: u32,
293        /// Configured maximum recursion depth.
294        limit: u32,
295    },
296}
297
298impl std::fmt::Display for LimitError {
299    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
300        match self {
301            Self::FileTooLarge {
302                actual_bytes,
303                limit_bytes,
304            } => write!(
305                f,
306                "PDF file too large: {} MB (limit: {} MB)",
307                actual_bytes / 1024 / 1024,
308                limit_bytes / 1024 / 1024
309            ),
310            Self::StreamTooLarge {
311                actual_bytes,
312                limit_bytes,
313            } => write!(
314                f,
315                "Decompressed stream too large: {} MB (limit: {} MB)",
316                actual_bytes / 1024 / 1024,
317                limit_bytes / 1024 / 1024
318            ),
319            Self::ImageTooLarge {
320                width,
321                height,
322                pixels,
323                limit_pixels,
324            } => write!(
325                f,
326                "Image too large: {}×{} ({} MP, limit: {} MP)",
327                width,
328                height,
329                pixels / 1_000_000,
330                limit_pixels / 1_000_000
331            ),
332            Self::ObjectDepthExceeded { depth, limit } => write!(
333                f,
334                "Object reference depth exceeded: {} (limit: {})",
335                depth, limit
336            ),
337            Self::TooManyOperators { count, limit } => write!(
338                f,
339                "Content stream has too many operators: {} (limit: {})",
340                count, limit
341            ),
342            Self::XfaNestingTooDeep { depth, limit } => write!(
343                f,
344                "XFA template nesting too deep: {} (limit: {})",
345                depth, limit
346            ),
347            Self::FormCalcRecursionTooDeep { depth, limit } => write!(
348                f,
349                "FormCalc recursion too deep: {} (limit: {})",
350                depth, limit
351            ),
352        }
353    }
354}
355
356impl std::error::Error for LimitError {}
357
358#[cfg(test)]
359mod tests {
360    use super::*;
361
362    #[test]
363    fn test_default_limits() {
364        let l = ProcessingLimits::default();
365        assert_eq!(l.max_file_bytes, 500 * 1024 * 1024);
366        assert_eq!(l.max_stream_bytes, 256 * 1024 * 1024);
367        assert_eq!(l.max_image_pixels, 16384 * 16384);
368    }
369
370    #[test]
371    fn test_wasm_limits_stricter_than_default() {
372        let wasm = ProcessingLimits::wasm();
373        let default = ProcessingLimits::default();
374        assert!(wasm.max_file_bytes < default.max_file_bytes);
375        assert!(wasm.max_stream_bytes < default.max_stream_bytes);
376        assert!(wasm.max_image_pixels < default.max_image_pixels);
377    }
378
379    #[test]
380    fn test_file_size_check() {
381        let l = ProcessingLimits::default();
382        assert!(l.check_file_size(100 * 1024 * 1024).is_ok()); // 100 MB: ok
383        assert!(l.check_file_size(500 * 1024 * 1024).is_ok()); // 500 MB: ok (at limit)
384        assert!(l.check_file_size(501 * 1024 * 1024).is_err()); // 501 MB: exceeded
385    }
386
387    #[test]
388    fn test_image_pixel_check() {
389        let l = ProcessingLimits::default();
390        assert!(l.check_image_pixels(1920, 1080).is_ok());
391        assert!(l.check_image_pixels(16384, 16384).is_ok()); // at limit
392        assert!(l.check_image_pixels(16385, 16384).is_err()); // just over
393    }
394
395    #[test]
396    fn test_stream_size_check() {
397        let l = ProcessingLimits::default();
398        assert!(l.check_stream_size(100 * 1024 * 1024).is_ok());
399        assert!(l.check_stream_size(256 * 1024 * 1024).is_ok()); // at limit
400        assert!(l.check_stream_size(257 * 1024 * 1024).is_err()); // exceeded
401    }
402
403    #[test]
404    fn test_builder_pattern() {
405        let l = ProcessingLimits::default()
406            .max_file_bytes(10 * 1024 * 1024)
407            .max_stream_bytes(5 * 1024 * 1024);
408        assert_eq!(l.max_file_bytes, 10 * 1024 * 1024);
409        assert_eq!(l.max_stream_bytes, 5 * 1024 * 1024);
410    }
411
412    #[test]
413    fn test_limit_error_display() {
414        let err = LimitError::FileTooLarge {
415            actual_bytes: 600 * 1024 * 1024,
416            limit_bytes: 500 * 1024 * 1024,
417        };
418        let msg = err.to_string();
419        assert!(msg.contains("600 MB"));
420        assert!(msg.contains("500 MB"));
421    }
422}