Skip to main content

sanitize_engine/processor/
limits.rs

1//! Centralized safety limits for all structured processors.
2//!
3//! Keeping limits in one place makes it easy to audit, compare, and update
4//! them together. Non-default values are documented with the reason they
5//! differ from the standard.
6
7// ---------------------------------------------------------------------------
8// Input size caps (bytes)
9// ---------------------------------------------------------------------------
10
11/// Standard maximum input size for most structured processors (256 MiB).
12/// Inputs exceeding this are rejected before parsing to prevent OOM (F-04 fix).
13pub(crate) const DEFAULT_INPUT_SIZE: usize = 256 * 1024 * 1024;
14
15/// Maximum input size for YAML (64 MiB).
16/// Smaller than the default because serde_yaml fully expands aliases/anchors
17/// during deserialization, so a small file can balloon into gigabytes of
18/// in-memory nodes (alias/anchor bomb, F-06 fix).
19pub(crate) const YAML_INPUT_SIZE: usize = 64 * 1024 * 1024;
20
21// ---------------------------------------------------------------------------
22// Recursion / nesting depth caps
23// ---------------------------------------------------------------------------
24
25/// Standard maximum recursion depth for tree-walking processors (JSON, YAML, TOML).
26/// Prevents stack overflow from deeply nested or malicious inputs (R-4 fix).
27pub(crate) const DEFAULT_DEPTH: usize = 128;
28
29/// Maximum element nesting depth for the XML processor.
30/// Higher than the default because deeply nested XML documents are common in
31/// practice (e.g. Maven POMs, Android manifests) and XML is iterative rather
32/// than recursive in this processor (R-5 fix).
33pub(crate) const XML_DEPTH: usize = 256;
34
35// ---------------------------------------------------------------------------
36// YAML-specific limits
37// ---------------------------------------------------------------------------
38
39/// Maximum number of distinct YAML nodes after alias expansion.
40/// serde_yaml_ng expands aliases into full value copies during deserialization;
41/// this caps total node count to prevent exponential growth (F-06 fix).
42pub(crate) const YAML_NODE_COUNT: usize = 10_000_000;
43
44// ---------------------------------------------------------------------------
45// Archive limits
46// ---------------------------------------------------------------------------
47
48/// Maximum size (bytes) for a single archive entry loaded into memory for
49/// structured processing. Larger entries are streamed through the scanner
50/// instead (M-3 fix).
51pub(crate) const STRUCTURED_ENTRY_SIZE: u64 = 256 * 1024 * 1024;
52
53/// Maximum total uncompressed data size (bytes) across all zip entries before
54/// the parallel processing path is disabled. Above this threshold the zip
55/// processor falls back to sequential entry processing to avoid holding the
56/// entire archive in memory at once.
57pub(crate) const PARALLEL_ZIP_DATA_SIZE: u64 = 256 * 1024 * 1024;
58
59/// Maximum total buffered data size (bytes) across all tar entries before
60/// parallel processing is disabled.
61///
62/// Unlike zip, tar has no central directory so entry sizes cannot be known
63/// before reading. Entries are buffered speculatively; if the running total
64/// exceeds this cap the parallel path is abandoned and remaining entries are
65/// processed sequentially from the stream.
66pub(crate) const PARALLEL_TAR_DATA_SIZE: u64 = 256 * 1024 * 1024;
67
68/// Default maximum nesting depth for recursive archive processing.
69///
70/// Depth 0 is the top-level archive. Nested archives at depths 1 through
71/// `DEFAULT_ARCHIVE_DEPTH` are recursively extracted and sanitized. Exceeding
72/// this limit returns [`SanitizeError::RecursionDepthExceeded`](crate::error::SanitizeError::RecursionDepthExceeded).
73pub const DEFAULT_ARCHIVE_DEPTH: u32 = 5;
74
75/// Absolute maximum allowed value for `--max-archive-depth`.
76/// Each nesting level can buffer up to [`STRUCTURED_ENTRY_SIZE`] bytes, so
77/// capping at 10 bounds peak memory to ~2.5 GiB in the worst case.
78pub(crate) const MAX_ARCHIVE_DEPTH: u32 = 10;
79
80/// Minimum number of file entries in an archive before parallel entry
81/// processing is enabled. Below this threshold rayon task overhead exceeds
82/// the parallelism benefit.
83pub(crate) const PARALLEL_ENTRY_THRESHOLD: usize = 4;