Skip to main content

rust_yaml/
limits.rs

1//! Resource limits for secure YAML processing
2
3use crate::{Error, Result};
4use std::time::Duration;
5
6/// Resource limits configuration for YAML processing
7#[derive(Debug, Clone)]
8pub struct Limits {
9    /// Maximum nesting depth for collections
10    pub max_depth: usize,
11    /// Maximum number of anchors in a document
12    pub max_anchors: usize,
13    /// Maximum document size in bytes
14    pub max_document_size: usize,
15    /// Maximum string length in characters
16    pub max_string_length: usize,
17    /// Maximum alias expansion depth
18    pub max_alias_depth: usize,
19    /// Maximum number of items in a collection
20    pub max_collection_size: usize,
21    /// Maximum complexity score (calculated based on structure)
22    pub max_complexity_score: usize,
23    /// Maximum total number of nodes materialized by alias expansion in one
24    /// document. Closes the billion-laughs gap where wide alias fan-out
25    /// allocates millions of nodes before `max_complexity_score` fires.
26    /// The check runs *before* each alias clone so memory cannot blow up
27    /// between the check and the materialization.
28    pub max_total_alias_nodes: usize,
29    /// Timeout for parsing operations
30    pub timeout: Option<Duration>,
31}
32
33impl Default for Limits {
34    fn default() -> Self {
35        Self {
36            max_depth: 1000,
37            max_anchors: 10_000,
38            max_document_size: 100 * 1024 * 1024, // 100MB
39            max_string_length: 10 * 1024 * 1024,  // 10MB
40            max_alias_depth: 100,
41            max_collection_size: 1_000_000,
42            max_complexity_score: 1_000_000,
43            max_total_alias_nodes: 100_000,
44            timeout: None,
45        }
46    }
47}
48
49impl Limits {
50    /// Creates strict limits for untrusted input
51    pub fn strict() -> Self {
52        Self {
53            max_depth: 50,
54            max_anchors: 100,
55            max_document_size: 1024 * 1024, // 1MB
56            max_string_length: 64 * 1024,   // 64KB
57            max_alias_depth: 5,
58            max_collection_size: 10_000,
59            max_complexity_score: 10_000,
60            max_total_alias_nodes: 1_000,
61            timeout: Some(Duration::from_secs(5)),
62        }
63    }
64
65    /// Creates permissive limits for trusted input
66    pub fn permissive() -> Self {
67        Self {
68            max_depth: 10_000,
69            max_anchors: 100_000,
70            max_document_size: 1024 * 1024 * 1024, // 1GB
71            max_string_length: 100 * 1024 * 1024,  // 100MB
72            max_alias_depth: 1000,
73            max_collection_size: 10_000_000,
74            max_complexity_score: 100_000_000,
75            max_total_alias_nodes: 10_000_000,
76            timeout: None,
77        }
78    }
79
80    /// Creates unlimited configuration (use with caution)
81    pub fn unlimited() -> Self {
82        Self {
83            max_depth: usize::MAX,
84            max_anchors: usize::MAX,
85            max_document_size: usize::MAX,
86            max_string_length: usize::MAX,
87            max_alias_depth: usize::MAX,
88            max_collection_size: usize::MAX,
89            max_complexity_score: usize::MAX,
90            max_total_alias_nodes: usize::MAX,
91            timeout: None,
92        }
93    }
94}
95
96/// Tracks resource usage during parsing
97#[derive(Debug, Clone, Default)]
98pub struct ResourceTracker {
99    current_depth: usize,
100    max_depth_seen: usize,
101    anchor_count: usize,
102    bytes_processed: usize,
103    alias_depth: usize,
104    complexity_score: usize,
105    collection_items: usize,
106    /// Cumulative count of nodes materialized via alias expansion in the
107    /// current document. Guards the billion-laughs gap where
108    /// `complexity_score` alone trips only *after* substantial allocation.
109    total_alias_nodes: usize,
110}
111
112impl ResourceTracker {
113    /// Creates a new resource tracker
114    pub fn new() -> Self {
115        Self::default()
116    }
117
118    /// Checks if depth limit is exceeded
119    pub fn check_depth(&mut self, limits: &Limits, depth: usize) -> Result<()> {
120        self.current_depth = depth;
121        self.max_depth_seen = self.max_depth_seen.max(depth);
122
123        if depth > limits.max_depth {
124            return Err(Error::limit_exceeded(format!(
125                "Maximum depth {} exceeded",
126                limits.max_depth
127            )));
128        }
129        Ok(())
130    }
131
132    /// Increments and checks anchor count
133    pub fn add_anchor(&mut self, limits: &Limits) -> Result<()> {
134        self.anchor_count += 1;
135        if self.anchor_count > limits.max_anchors {
136            return Err(Error::limit_exceeded(format!(
137                "Maximum anchor count {} exceeded",
138                limits.max_anchors
139            )));
140        }
141        Ok(())
142    }
143
144    /// Tracks bytes processed
145    pub fn add_bytes(&mut self, limits: &Limits, bytes: usize) -> Result<()> {
146        self.bytes_processed += bytes;
147        if self.bytes_processed > limits.max_document_size {
148            return Err(Error::limit_exceeded(format!(
149                "Maximum document size {} exceeded",
150                limits.max_document_size
151            )));
152        }
153        Ok(())
154    }
155
156    /// Checks string length
157    pub fn check_string_length(&self, limits: &Limits, length: usize) -> Result<()> {
158        if length > limits.max_string_length {
159            return Err(Error::limit_exceeded(format!(
160                "Maximum string length {} exceeded",
161                limits.max_string_length
162            )));
163        }
164        Ok(())
165    }
166
167    /// Tracks alias expansion depth
168    pub fn enter_alias(&mut self, limits: &Limits) -> Result<()> {
169        if self.alias_depth + 1 > limits.max_alias_depth {
170            return Err(Error::limit_exceeded(format!(
171                "Maximum alias depth {} exceeded",
172                limits.max_alias_depth
173            )));
174        }
175        self.alias_depth += 1;
176        Ok(())
177    }
178
179    /// Exits alias expansion
180    pub fn exit_alias(&mut self) {
181        if self.alias_depth > 0 {
182            self.alias_depth -= 1;
183        }
184    }
185
186    /// Tracks collection items
187    pub fn add_collection_item(&mut self, limits: &Limits) -> Result<()> {
188        self.collection_items += 1;
189        if self.collection_items > limits.max_collection_size {
190            return Err(Error::limit_exceeded(format!(
191                "Maximum collection size {} exceeded",
192                limits.max_collection_size
193            )));
194        }
195        Ok(())
196    }
197
198    /// Adds to complexity score
199    pub fn add_complexity(&mut self, limits: &Limits, score: usize) -> Result<()> {
200        self.complexity_score += score;
201        if self.complexity_score > limits.max_complexity_score {
202            return Err(Error::limit_exceeded(format!(
203                "Maximum complexity score {} exceeded",
204                limits.max_complexity_score
205            )));
206        }
207        Ok(())
208    }
209
210    /// Charges an alias-expansion materialization against the cumulative
211    /// node-count budget. Call this *before* cloning the anchored value so
212    /// the check fires before memory is committed.
213    ///
214    /// `nodes` is the node count of the resolved value (e.g.
215    /// `calculate_value_complexity`).
216    ///
217    /// # Errors
218    /// Returns an error if the cumulative materialization would exceed
219    /// `limits.max_total_alias_nodes`.
220    pub fn add_alias_materialization(&mut self, limits: &Limits, nodes: usize) -> Result<()> {
221        self.total_alias_nodes = self.total_alias_nodes.saturating_add(nodes);
222        if self.total_alias_nodes > limits.max_total_alias_nodes {
223            return Err(Error::limit_exceeded(format!(
224                "Maximum cumulative alias materialization {} exceeded \
225                 (attempted to materialize {nodes} more nodes)",
226                limits.max_total_alias_nodes
227            )));
228        }
229        Ok(())
230    }
231
232    /// Resets the tracker for a new document
233    pub fn reset(&mut self) {
234        *self = Self::new();
235    }
236
237    /// Gets current statistics
238    pub fn stats(&self) -> ResourceStats {
239        ResourceStats {
240            max_depth: self.max_depth_seen,
241            anchor_count: self.anchor_count,
242            bytes_processed: self.bytes_processed,
243            complexity_score: self.complexity_score,
244            collection_items: self.collection_items,
245        }
246    }
247}
248
249/// Resource usage statistics
250#[derive(Debug, Clone)]
251pub struct ResourceStats {
252    /// Maximum depth reached during processing
253    pub max_depth: usize,
254    /// Total number of anchors encountered
255    pub anchor_count: usize,
256    /// Total bytes processed
257    pub bytes_processed: usize,
258    /// Total complexity score
259    pub complexity_score: usize,
260    /// Total collection items processed
261    pub collection_items: usize,
262}
263
264#[cfg(test)]
265mod tests {
266    use super::*;
267
268    #[test]
269    fn test_default_limits() {
270        let limits = Limits::default();
271        assert_eq!(limits.max_depth, 1000);
272        assert_eq!(limits.max_anchors, 10_000);
273    }
274
275    #[test]
276    fn test_strict_limits() {
277        let limits = Limits::strict();
278        assert_eq!(limits.max_depth, 50);
279        assert_eq!(limits.max_anchors, 100);
280        assert!(limits.timeout.is_some());
281    }
282
283    #[test]
284    fn test_resource_tracker() {
285        let limits = Limits::strict();
286        let mut tracker = ResourceTracker::new();
287
288        // Test depth checking
289        assert!(tracker.check_depth(&limits, 10).is_ok());
290        assert!(tracker.check_depth(&limits, 51).is_err());
291
292        // Test anchor counting
293        for _ in 0..100 {
294            assert!(tracker.add_anchor(&limits).is_ok());
295        }
296        assert!(tracker.add_anchor(&limits).is_err());
297    }
298
299    #[test]
300    fn test_alias_depth_tracking() {
301        let limits = Limits::strict();
302        let mut tracker = ResourceTracker::new();
303
304        // Test entering aliases
305        for _ in 0..5 {
306            assert!(tracker.enter_alias(&limits).is_ok());
307        }
308        assert!(tracker.enter_alias(&limits).is_err());
309
310        // Test exiting aliases
311        tracker.exit_alias();
312        assert!(tracker.enter_alias(&limits).is_ok());
313    }
314}