feedparser_rs/
limits.rs

1//! Parser limits to prevent `DoS` attacks and excessive memory usage
2
3/// Parser limits for protecting against denial-of-service attacks
4///
5/// These limits prevent malicious or malformed feeds from causing excessive
6/// memory allocation, deep recursion, or other resource exhaustion issues.
7///
8/// # Examples
9///
10/// ```
11/// use feedparser_rs::ParserLimits;
12///
13/// let limits = ParserLimits::default();
14/// assert_eq!(limits.max_entries, 10_000);
15///
16/// // Custom limits for restricted environments
17/// let strict = ParserLimits {
18///     max_entries: 1_000,
19///     max_feed_size_bytes: 10 * 1024 * 1024, // 10MB
20///     ..Default::default()
21/// };
22/// ```
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub struct ParserLimits {
25    /// Maximum number of entries/items in a feed
26    ///
27    /// Prevents memory exhaustion from feeds with millions of items.
28    /// Typical feeds have 10-100 entries, large feeds may have up to 1000.
29    ///
30    /// Default: 10,000 entries
31    pub max_entries: usize,
32
33    /// Maximum number of links per feed (channel-level)
34    ///
35    /// Prevents link bombing attacks.
36    ///
37    /// Default: 100 links
38    pub max_links_per_feed: usize,
39
40    /// Maximum number of links per entry
41    ///
42    /// Prevents link bombing in individual entries.
43    ///
44    /// Default: 50 links
45    pub max_links_per_entry: usize,
46
47    /// Maximum number of authors per feed or entry
48    ///
49    /// Default: 20 authors
50    pub max_authors: usize,
51
52    /// Maximum number of contributors per feed or entry
53    ///
54    /// Default: 20 contributors
55    pub max_contributors: usize,
56
57    /// Maximum number of tags/categories per feed or entry
58    ///
59    /// Default: 100 tags
60    pub max_tags: usize,
61
62    /// Maximum number of content blocks per entry
63    ///
64    /// Atom feeds can have multiple content elements.
65    ///
66    /// Default: 10 content blocks
67    pub max_content_blocks: usize,
68
69    /// Maximum number of enclosures per entry
70    ///
71    /// Podcast feeds typically have 1 enclosure per episode.
72    ///
73    /// Default: 20 enclosures
74    pub max_enclosures: usize,
75
76    /// Maximum number of XML namespaces
77    ///
78    /// Prevents namespace pollution attacks.
79    ///
80    /// Default: 100 namespaces
81    pub max_namespaces: usize,
82
83    /// Maximum XML nesting depth
84    ///
85    /// Prevents stack overflow from deeply nested XML.
86    ///
87    /// Default: 100 levels
88    pub max_nesting_depth: usize,
89
90    /// Maximum text field length in bytes
91    ///
92    /// Prevents excessive memory from huge title/description fields.
93    ///
94    /// Default: 10 MB
95    pub max_text_length: usize,
96
97    /// Maximum total feed size in bytes
98    ///
99    /// The entire feed must fit within this limit.
100    ///
101    /// Default: 100 MB
102    pub max_feed_size_bytes: usize,
103
104    /// Maximum attribute value length in bytes
105    ///
106    /// XML attributes should be reasonably sized.
107    ///
108    /// Default: 64 KB
109    pub max_attribute_length: usize,
110}
111
112impl Default for ParserLimits {
113    /// Creates default parser limits suitable for general use
114    ///
115    /// These defaults are conservative and should work for most feeds,
116    /// including large podcast feeds and news aggregators.
117    fn default() -> Self {
118        Self {
119            max_entries: 10_000,
120            max_links_per_feed: 100,
121            max_links_per_entry: 50,
122            max_authors: 20,
123            max_contributors: 20,
124            max_tags: 100,
125            max_content_blocks: 10,
126            max_enclosures: 20,
127            max_namespaces: 100,
128            max_nesting_depth: 100,
129            max_text_length: 10 * 1024 * 1024,      // 10 MB
130            max_feed_size_bytes: 100 * 1024 * 1024, // 100 MB
131            max_attribute_length: 64 * 1024,        // 64 KB
132        }
133    }
134}
135
136impl ParserLimits {
137    /// Creates strict limits for resource-constrained environments
138    ///
139    /// Use this for embedded systems or when parsing untrusted feeds
140    /// with minimal resources.
141    ///
142    /// # Examples
143    ///
144    /// ```
145    /// use feedparser_rs::ParserLimits;
146    ///
147    /// let limits = ParserLimits::strict();
148    /// assert_eq!(limits.max_entries, 1_000);
149    /// ```
150    #[must_use]
151    pub const fn strict() -> Self {
152        Self {
153            max_entries: 1_000,
154            max_links_per_feed: 20,
155            max_links_per_entry: 10,
156            max_authors: 5,
157            max_contributors: 5,
158            max_tags: 20,
159            max_content_blocks: 3,
160            max_enclosures: 5,
161            max_namespaces: 20,
162            max_nesting_depth: 50,
163            max_text_length: 1024 * 1024,          // 1 MB
164            max_feed_size_bytes: 10 * 1024 * 1024, // 10 MB
165            max_attribute_length: 8 * 1024,        // 8 KB
166        }
167    }
168
169    /// Creates permissive limits for trusted feeds
170    ///
171    /// Use this only for feeds from trusted sources where you expect
172    /// large data volumes (e.g., feed archives).
173    ///
174    /// # Examples
175    ///
176    /// ```
177    /// use feedparser_rs::ParserLimits;
178    ///
179    /// let limits = ParserLimits::permissive();
180    /// assert_eq!(limits.max_entries, 100_000);
181    /// ```
182    #[must_use]
183    pub const fn permissive() -> Self {
184        Self {
185            max_entries: 100_000,
186            max_links_per_feed: 500,
187            max_links_per_entry: 200,
188            max_authors: 100,
189            max_contributors: 100,
190            max_tags: 500,
191            max_content_blocks: 50,
192            max_enclosures: 100,
193            max_namespaces: 500,
194            max_nesting_depth: 200,
195            max_text_length: 50 * 1024 * 1024,      // 50 MB
196            max_feed_size_bytes: 500 * 1024 * 1024, // 500 MB
197            max_attribute_length: 256 * 1024,       // 256 KB
198        }
199    }
200
201    /// Validates that a feed size is within limits
202    ///
203    /// Call this before starting to parse a feed.
204    ///
205    /// # Errors
206    ///
207    /// Returns an error if the feed exceeds `max_feed_size_bytes`.
208    pub const fn check_feed_size(&self, size: usize) -> Result<(), LimitError> {
209        if size > self.max_feed_size_bytes {
210            Err(LimitError::FeedTooLarge {
211                size,
212                max: self.max_feed_size_bytes,
213            })
214        } else {
215            Ok(())
216        }
217    }
218
219    /// Validates that a collection size is within limits
220    ///
221    /// Use this during parsing to check collection sizes.
222    ///
223    /// # Errors
224    ///
225    /// Returns an error if the collection size exceeds the specified limit.
226    pub const fn check_collection_size(
227        &self,
228        current: usize,
229        limit: usize,
230        name: &'static str,
231    ) -> Result<(), LimitError> {
232        if current >= limit {
233            Err(LimitError::CollectionTooLarge {
234                name,
235                size: current,
236                max: limit,
237            })
238        } else {
239            Ok(())
240        }
241    }
242
243    /// Validates XML nesting depth
244    ///
245    /// # Errors
246    ///
247    /// Returns an error if nesting depth exceeds `max_nesting_depth`.
248    pub const fn check_nesting_depth(&self, depth: usize) -> Result<(), LimitError> {
249        if depth > self.max_nesting_depth {
250            Err(LimitError::NestingTooDeep {
251                depth,
252                max: self.max_nesting_depth,
253            })
254        } else {
255            Ok(())
256        }
257    }
258
259    /// Validates text field length
260    ///
261    /// # Errors
262    ///
263    /// Returns an error if text length exceeds `max_text_length`.
264    pub const fn check_text_length(&self, length: usize) -> Result<(), LimitError> {
265        if length > self.max_text_length {
266            Err(LimitError::TextTooLong {
267                length,
268                max: self.max_text_length,
269            })
270        } else {
271            Ok(())
272        }
273    }
274}
275
276/// Errors that occur when parser limits are exceeded
277#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
278#[allow(missing_docs)] // Fields are self-explanatory from error messages
279pub enum LimitError {
280    /// Feed size exceeds maximum allowed
281    #[error("Feed size ({size} bytes) exceeds maximum ({max} bytes)")]
282    FeedTooLarge { size: usize, max: usize },
283
284    /// Collection (entries, links, etc.) has too many items
285    #[error("Collection '{name}' has {size} items, exceeds maximum ({max})")]
286    CollectionTooLarge {
287        name: &'static str,
288        size: usize,
289        max: usize,
290    },
291
292    /// XML nesting is too deep
293    #[error("XML nesting depth ({depth}) exceeds maximum ({max})")]
294    NestingTooDeep { depth: usize, max: usize },
295
296    /// Text field is too long
297    #[error("Text field length ({length} bytes) exceeds maximum ({max} bytes)")]
298    TextTooLong { length: usize, max: usize },
299}
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304
305    #[test]
306    fn test_default_limits() {
307        let limits = ParserLimits::default();
308        assert_eq!(limits.max_entries, 10_000);
309        assert_eq!(limits.max_feed_size_bytes, 100 * 1024 * 1024);
310    }
311
312    #[test]
313    fn test_strict_limits() {
314        let limits = ParserLimits::strict();
315        assert_eq!(limits.max_entries, 1_000);
316        assert!(limits.max_entries < ParserLimits::default().max_entries);
317    }
318
319    #[test]
320    fn test_permissive_limits() {
321        let limits = ParserLimits::permissive();
322        assert_eq!(limits.max_entries, 100_000);
323        assert!(limits.max_entries > ParserLimits::default().max_entries);
324    }
325
326    #[test]
327    fn test_check_feed_size_ok() {
328        let limits = ParserLimits::default();
329        assert!(limits.check_feed_size(1024).is_ok());
330    }
331
332    #[test]
333    fn test_check_feed_size_too_large() {
334        let limits = ParserLimits::default();
335        let result = limits.check_feed_size(200 * 1024 * 1024);
336        assert!(result.is_err());
337        assert!(matches!(result, Err(LimitError::FeedTooLarge { .. })));
338    }
339
340    #[test]
341    fn test_check_collection_size_ok() {
342        let limits = ParserLimits::default();
343        assert!(
344            limits
345                .check_collection_size(50, limits.max_entries, "entries")
346                .is_ok()
347        );
348    }
349
350    #[test]
351    fn test_check_collection_size_too_large() {
352        let limits = ParserLimits::default();
353        let result = limits.check_collection_size(10_001, limits.max_entries, "entries");
354        assert!(result.is_err());
355        assert!(matches!(result, Err(LimitError::CollectionTooLarge { .. })));
356    }
357
358    #[test]
359    fn test_check_nesting_depth_ok() {
360        let limits = ParserLimits::default();
361        assert!(limits.check_nesting_depth(50).is_ok());
362    }
363
364    #[test]
365    fn test_check_nesting_depth_too_deep() {
366        let limits = ParserLimits::default();
367        let result = limits.check_nesting_depth(101);
368        assert!(result.is_err());
369        assert!(matches!(result, Err(LimitError::NestingTooDeep { .. })));
370    }
371
372    #[test]
373    fn test_check_text_length_ok() {
374        let limits = ParserLimits::default();
375        assert!(limits.check_text_length(1024).is_ok());
376    }
377
378    #[test]
379    fn test_check_text_length_too_long() {
380        let limits = ParserLimits::default();
381        let result = limits.check_text_length(20 * 1024 * 1024);
382        assert!(result.is_err());
383        assert!(matches!(result, Err(LimitError::TextTooLong { .. })));
384    }
385
386    #[test]
387    fn test_limit_error_display() {
388        let err = LimitError::FeedTooLarge {
389            size: 200_000_000,
390            max: 100_000_000,
391        };
392        let msg = err.to_string();
393        assert!(msg.contains("200000000"));
394        assert!(msg.contains("100000000"));
395    }
396}