pdf_engine/limits.rs
1//! Resource limits for PDF processing.
2//!
3//! These limits protect against adversarial inputs that could cause OOM, stack overflow,
4//! CPU exhaustion, or zip bombs. All limits are enforced with clean error returns — no panics.
5//!
6//! # Default limits
7//!
8//! The defaults cover 99.9% of real-world PDFs while providing strong safety guarantees:
9//!
10//! | Resource | Default |
11//! |---|---|
12//! | PDF file size | 500 MB |
13//! | Single decompressed stream | 256 MB |
14//! | Total memory per document | 1 GB |
15//! | Object reference depth | 100 levels |
16//! | Content stream operators | 10,000,000 |
17//! | Image pixel count | 256 megapixels (16384×16384) |
18//! | XFA template nesting depth | 50 levels |
19//! | FormCalc recursion depth | 200 levels |
20
21/// Resource limits for a single PDF processing operation.
22///
23/// Construct via [`ProcessingLimits::default()`] for standard limits,
24/// or use the builder methods to customize for your use case.
25///
26/// # Examples
27///
28/// ```rust
29/// use pdf_engine::limits::ProcessingLimits;
30///
31/// // Default limits (recommended for server-side processing):
32/// let limits = ProcessingLimits::default();
33///
34/// // Stricter limits for WASM/browser context:
35/// let wasm_limits = ProcessingLimits::wasm();
36///
37/// // Custom limits:
38/// let custom = ProcessingLimits::default()
39/// .max_file_bytes(100 * 1024 * 1024) // 100 MB
40/// .max_stream_bytes(64 * 1024 * 1024); // 64 MB per stream
41/// ```
42#[derive(Debug, Clone)]
43pub struct ProcessingLimits {
44 /// Maximum PDF file size in bytes. Default: 500 MB.
45 pub max_file_bytes: u64,
46 /// Maximum decompressed size of any single stream. Default: 256 MB.
47 /// Prevents zip bombs via FlateDecode or LZWDecode.
48 pub max_stream_bytes: u64,
49 /// Maximum total memory allocated per document. Default: 1 GB.
50 pub max_total_memory_bytes: u64,
51 /// Maximum object reference depth (prevents stack overflow on recursive refs). Default: 100.
52 pub max_object_depth: u32,
53 /// Maximum content stream operators per page. Default: 10,000,000.
54 pub max_operator_count: u64,
55 /// Maximum pixel count per image (width × height). Default: 268,435,456 (16384²).
56 pub max_image_pixels: u64,
57 /// Maximum XFA template XML nesting depth. Default: 50.
58 pub max_xfa_nesting_depth: u32,
59 /// Maximum FormCalc recursion depth. Default: 200.
60 pub max_formcalc_depth: u32,
61}
62
63impl Default for ProcessingLimits {
64 fn default() -> Self {
65 Self {
66 max_file_bytes: 500 * 1024 * 1024, // 500 MB
67 max_stream_bytes: 256 * 1024 * 1024, // 256 MB
68 max_total_memory_bytes: 1024 * 1024 * 1024, // 1 GB
69 max_object_depth: 100,
70 max_operator_count: 10_000_000,
71 max_image_pixels: 16384 * 16384, // 268 MP
72 max_xfa_nesting_depth: 50,
73 max_formcalc_depth: 200,
74 }
75 }
76}
77
78impl ProcessingLimits {
79 /// Create a new set of limits with default values.
80 pub fn new() -> Self {
81 Self::default()
82 }
83
84 /// Strict limits for WASM/browser contexts with limited memory.
85 ///
86 /// - Max file: 50 MB
87 /// - Max stream: 32 MB
88 /// - Max total memory: 128 MB
89 /// - Image pixels: 64 MP (8192×8192)
90 pub fn wasm() -> Self {
91 Self {
92 max_file_bytes: 50 * 1024 * 1024, // 50 MB
93 max_stream_bytes: 32 * 1024 * 1024, // 32 MB
94 max_total_memory_bytes: 128 * 1024 * 1024, // 128 MB
95 max_object_depth: 50,
96 max_operator_count: 1_000_000,
97 max_image_pixels: 8192 * 8192, // 64 MP
98 max_xfa_nesting_depth: 30,
99 max_formcalc_depth: 100,
100 }
101 }
102
103 /// Unlimited: no resource limits (use only in trusted environments).
104 pub fn unlimited() -> Self {
105 Self {
106 max_file_bytes: u64::MAX,
107 max_stream_bytes: u64::MAX,
108 max_total_memory_bytes: u64::MAX,
109 max_object_depth: u32::MAX,
110 max_operator_count: u64::MAX,
111 max_image_pixels: u64::MAX,
112 max_xfa_nesting_depth: u32::MAX,
113 max_formcalc_depth: u32::MAX,
114 }
115 }
116
117 /// Set maximum PDF file size.
118 pub fn max_file_bytes(mut self, bytes: u64) -> Self {
119 self.max_file_bytes = bytes;
120 self
121 }
122
123 /// Set maximum decompressed stream size.
124 pub fn max_stream_bytes(mut self, bytes: u64) -> Self {
125 self.max_stream_bytes = bytes;
126 self
127 }
128
129 /// Set maximum total memory per document.
130 pub fn max_total_memory_bytes(mut self, bytes: u64) -> Self {
131 self.max_total_memory_bytes = bytes;
132 self
133 }
134
135 /// Set maximum object reference depth.
136 pub fn max_object_depth(mut self, depth: u32) -> Self {
137 self.max_object_depth = depth;
138 self
139 }
140
141 /// Set maximum content stream operator count.
142 pub fn max_operator_count(mut self, count: u64) -> Self {
143 self.max_operator_count = count;
144 self
145 }
146
147 /// Set maximum image pixel count (width × height).
148 pub fn max_image_pixels(mut self, pixels: u64) -> Self {
149 self.max_image_pixels = pixels;
150 self
151 }
152
153 /// Set maximum XFA template nesting depth.
154 pub fn max_xfa_nesting_depth(mut self, depth: u32) -> Self {
155 self.max_xfa_nesting_depth = depth;
156 self
157 }
158
159 /// Set maximum FormCalc recursion depth.
160 pub fn max_formcalc_depth(mut self, depth: u32) -> Self {
161 self.max_formcalc_depth = depth;
162 self
163 }
164
165 /// Check if a file size is within limits. Returns `Err` with a descriptive message if exceeded.
166 pub fn check_file_size(&self, bytes: u64) -> Result<(), LimitError> {
167 if bytes > self.max_file_bytes {
168 Err(LimitError::FileTooLarge {
169 actual_bytes: bytes,
170 limit_bytes: self.max_file_bytes,
171 })
172 } else {
173 Ok(())
174 }
175 }
176
177 /// Check if a decompressed stream size is within limits.
178 pub fn check_stream_size(&self, bytes: u64) -> Result<(), LimitError> {
179 if bytes > self.max_stream_bytes {
180 Err(LimitError::StreamTooLarge {
181 actual_bytes: bytes,
182 limit_bytes: self.max_stream_bytes,
183 })
184 } else {
185 Ok(())
186 }
187 }
188
189 /// Check if image dimensions are within limits.
190 pub fn check_image_pixels(&self, width: u64, height: u64) -> Result<(), LimitError> {
191 let pixels = width.saturating_mul(height);
192 if pixels > self.max_image_pixels {
193 Err(LimitError::ImageTooLarge {
194 width,
195 height,
196 pixels,
197 limit_pixels: self.max_image_pixels,
198 })
199 } else {
200 Ok(())
201 }
202 }
203
204 /// Check if an object depth is within limits.
205 pub fn check_object_depth(&self, depth: u32) -> Result<(), LimitError> {
206 if depth > self.max_object_depth {
207 Err(LimitError::ObjectDepthExceeded {
208 depth,
209 limit: self.max_object_depth,
210 })
211 } else {
212 Ok(())
213 }
214 }
215}
216
217/// Error returned when a configured resource limit is exceeded.
218///
219/// A `LimitError` is a hard stop on the current operation — the caller
220/// should either raise the relevant cap on the [`ProcessingLimits`] used
221/// to construct the engine, or refuse the input. All variants carry both
222/// the observed value and the limit so callers can produce useful error
223/// messages without re-running detection.
224#[derive(Debug, Clone, PartialEq, Eq)]
225pub enum LimitError {
226 /// PDF file size exceeded [`ProcessingLimits::max_file_bytes`].
227 /// Defends against trivially-DoS-ing the parser with an oversized
228 /// input.
229 FileTooLarge {
230 /// Observed file size in bytes.
231 actual_bytes: u64,
232 /// Configured maximum file size in bytes.
233 limit_bytes: u64,
234 },
235 /// A decompressed stream exceeded
236 /// [`ProcessingLimits::max_stream_bytes`]. Defends against
237 /// "decompression bomb" inputs (Flate, LZW, ASCII85) that expand
238 /// far beyond their compressed footprint.
239 StreamTooLarge {
240 /// Observed decompressed stream size in bytes.
241 actual_bytes: u64,
242 /// Configured maximum stream size in bytes.
243 limit_bytes: u64,
244 },
245 /// An image XObject exceeded [`ProcessingLimits::max_image_pixels`].
246 /// `pixels = width * height` regardless of color depth. Guards the
247 /// memory used by rasterization and color-space conversion.
248 ImageTooLarge {
249 /// Observed image width in pixels.
250 width: u64,
251 /// Observed image height in pixels.
252 height: u64,
253 /// Observed total pixel count (`width * height`).
254 pixels: u64,
255 /// Configured maximum pixel count.
256 limit_pixels: u64,
257 },
258 /// Indirect-object reference chain exceeded
259 /// [`ProcessingLimits::max_object_depth`]. Defends against cyclic or
260 /// pathologically nested object graphs that would otherwise blow
261 /// the parser stack.
262 ObjectDepthExceeded {
263 /// Observed reference-chain depth.
264 depth: u32,
265 /// Configured maximum reference-chain depth.
266 limit: u32,
267 },
268 /// Content-stream operator count exceeded
269 /// [`ProcessingLimits::max_operator_count`]. Caps per-page
270 /// rendering work so a billion no-op operators can't pin a CPU.
271 TooManyOperators {
272 /// Observed operator count.
273 count: u64,
274 /// Configured maximum operator count.
275 limit: u64,
276 },
277 /// XFA template subform nesting exceeded
278 /// [`ProcessingLimits::max_xfa_nesting_depth`]. Defends the XFA
279 /// layout engine against pathologically nested templates.
280 XfaNestingTooDeep {
281 /// Observed XFA subform nesting depth.
282 depth: u32,
283 /// Configured maximum nesting depth.
284 limit: u32,
285 },
286 /// FormCalc expression recursion exceeded
287 /// [`ProcessingLimits::max_formcalc_depth`]. Stops infinite or
288 /// deeply mutually-recursive expressions from blowing the FormCalc
289 /// interpreter stack.
290 FormCalcRecursionTooDeep {
291 /// Observed recursion depth.
292 depth: u32,
293 /// Configured maximum recursion depth.
294 limit: u32,
295 },
296}
297
298impl std::fmt::Display for LimitError {
299 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
300 match self {
301 Self::FileTooLarge {
302 actual_bytes,
303 limit_bytes,
304 } => write!(
305 f,
306 "PDF file too large: {} MB (limit: {} MB)",
307 actual_bytes / 1024 / 1024,
308 limit_bytes / 1024 / 1024
309 ),
310 Self::StreamTooLarge {
311 actual_bytes,
312 limit_bytes,
313 } => write!(
314 f,
315 "Decompressed stream too large: {} MB (limit: {} MB)",
316 actual_bytes / 1024 / 1024,
317 limit_bytes / 1024 / 1024
318 ),
319 Self::ImageTooLarge {
320 width,
321 height,
322 pixels,
323 limit_pixels,
324 } => write!(
325 f,
326 "Image too large: {}×{} ({} MP, limit: {} MP)",
327 width,
328 height,
329 pixels / 1_000_000,
330 limit_pixels / 1_000_000
331 ),
332 Self::ObjectDepthExceeded { depth, limit } => write!(
333 f,
334 "Object reference depth exceeded: {} (limit: {})",
335 depth, limit
336 ),
337 Self::TooManyOperators { count, limit } => write!(
338 f,
339 "Content stream has too many operators: {} (limit: {})",
340 count, limit
341 ),
342 Self::XfaNestingTooDeep { depth, limit } => write!(
343 f,
344 "XFA template nesting too deep: {} (limit: {})",
345 depth, limit
346 ),
347 Self::FormCalcRecursionTooDeep { depth, limit } => write!(
348 f,
349 "FormCalc recursion too deep: {} (limit: {})",
350 depth, limit
351 ),
352 }
353 }
354}
355
356impl std::error::Error for LimitError {}
357
358#[cfg(test)]
359mod tests {
360 use super::*;
361
362 #[test]
363 fn test_default_limits() {
364 let l = ProcessingLimits::default();
365 assert_eq!(l.max_file_bytes, 500 * 1024 * 1024);
366 assert_eq!(l.max_stream_bytes, 256 * 1024 * 1024);
367 assert_eq!(l.max_image_pixels, 16384 * 16384);
368 }
369
370 #[test]
371 fn test_wasm_limits_stricter_than_default() {
372 let wasm = ProcessingLimits::wasm();
373 let default = ProcessingLimits::default();
374 assert!(wasm.max_file_bytes < default.max_file_bytes);
375 assert!(wasm.max_stream_bytes < default.max_stream_bytes);
376 assert!(wasm.max_image_pixels < default.max_image_pixels);
377 }
378
379 #[test]
380 fn test_file_size_check() {
381 let l = ProcessingLimits::default();
382 assert!(l.check_file_size(100 * 1024 * 1024).is_ok()); // 100 MB: ok
383 assert!(l.check_file_size(500 * 1024 * 1024).is_ok()); // 500 MB: ok (at limit)
384 assert!(l.check_file_size(501 * 1024 * 1024).is_err()); // 501 MB: exceeded
385 }
386
387 #[test]
388 fn test_image_pixel_check() {
389 let l = ProcessingLimits::default();
390 assert!(l.check_image_pixels(1920, 1080).is_ok());
391 assert!(l.check_image_pixels(16384, 16384).is_ok()); // at limit
392 assert!(l.check_image_pixels(16385, 16384).is_err()); // just over
393 }
394
395 #[test]
396 fn test_stream_size_check() {
397 let l = ProcessingLimits::default();
398 assert!(l.check_stream_size(100 * 1024 * 1024).is_ok());
399 assert!(l.check_stream_size(256 * 1024 * 1024).is_ok()); // at limit
400 assert!(l.check_stream_size(257 * 1024 * 1024).is_err()); // exceeded
401 }
402
403 #[test]
404 fn test_builder_pattern() {
405 let l = ProcessingLimits::default()
406 .max_file_bytes(10 * 1024 * 1024)
407 .max_stream_bytes(5 * 1024 * 1024);
408 assert_eq!(l.max_file_bytes, 10 * 1024 * 1024);
409 assert_eq!(l.max_stream_bytes, 5 * 1024 * 1024);
410 }
411
412 #[test]
413 fn test_limit_error_display() {
414 let err = LimitError::FileTooLarge {
415 actual_bytes: 600 * 1024 * 1024,
416 limit_bytes: 500 * 1024 * 1024,
417 };
418 let msg = err.to_string();
419 assert!(msg.contains("600 MB"));
420 assert!(msg.contains("500 MB"));
421 }
422}