feedparser_rs/
limits.rs

1//! Parser limits to prevent `DoS` attacks and excessive memory usage
2
3/// Parser limits for protecting against denial-of-service attacks
4///
5/// These limits prevent malicious or malformed feeds from causing excessive
6/// memory allocation, deep recursion, or other resource exhaustion issues.
7///
8/// # Examples
9///
10/// ```
11/// use feedparser_rs::ParserLimits;
12///
13/// let limits = ParserLimits::default();
14/// assert_eq!(limits.max_entries, 10_000);
15///
16/// // Custom limits for restricted environments
17/// let strict = ParserLimits {
18///     max_entries: 1_000,
19///     max_feed_size_bytes: 10 * 1024 * 1024, // 10MB
20///     ..Default::default()
21/// };
22/// ```
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub struct ParserLimits {
25    /// Maximum number of entries/items in a feed
26    ///
27    /// Prevents memory exhaustion from feeds with millions of items.
28    /// Typical feeds have 10-100 entries, large feeds may have up to 1000.
29    ///
30    /// Default: 10,000 entries
31    pub max_entries: usize,
32
33    /// Maximum number of links per feed (channel-level)
34    ///
35    /// Prevents link bombing attacks.
36    ///
37    /// Default: 100 links
38    pub max_links_per_feed: usize,
39
40    /// Maximum number of links per entry
41    ///
42    /// Prevents link bombing in individual entries.
43    ///
44    /// Default: 50 links
45    pub max_links_per_entry: usize,
46
47    /// Maximum number of authors per feed or entry
48    ///
49    /// Default: 20 authors
50    pub max_authors: usize,
51
52    /// Maximum number of contributors per feed or entry
53    ///
54    /// Default: 20 contributors
55    pub max_contributors: usize,
56
57    /// Maximum number of tags/categories per feed or entry
58    ///
59    /// Default: 100 tags
60    pub max_tags: usize,
61
62    /// Maximum number of content blocks per entry
63    ///
64    /// Atom feeds can have multiple content elements.
65    ///
66    /// Default: 10 content blocks
67    pub max_content_blocks: usize,
68
69    /// Maximum number of enclosures per entry
70    ///
71    /// Podcast feeds typically have 1 enclosure per episode.
72    ///
73    /// Default: 20 enclosures
74    pub max_enclosures: usize,
75
76    /// Maximum number of XML namespaces
77    ///
78    /// Prevents namespace pollution attacks.
79    ///
80    /// Default: 100 namespaces
81    pub max_namespaces: usize,
82
83    /// Maximum XML nesting depth
84    ///
85    /// Prevents stack overflow from deeply nested XML.
86    ///
87    /// Default: 100 levels
88    pub max_nesting_depth: usize,
89
90    /// Maximum text field length in bytes
91    ///
92    /// Prevents excessive memory from huge title/description fields.
93    ///
94    /// Default: 10 MB
95    pub max_text_length: usize,
96
97    /// Maximum total feed size in bytes
98    ///
99    /// The entire feed must fit within this limit.
100    ///
101    /// Default: 100 MB
102    pub max_feed_size_bytes: usize,
103
104    /// Maximum attribute value length in bytes
105    ///
106    /// XML attributes should be reasonably sized.
107    ///
108    /// Default: 64 KB
109    pub max_attribute_length: usize,
110
111    /// Maximum number of podcast soundbites per entry
112    ///
113    /// Podcast 2.0 soundbite elements for shareable clips.
114    ///
115    /// Default: 10 soundbites
116    pub max_podcast_soundbites: usize,
117
118    /// Maximum number of podcast transcripts per entry
119    ///
120    /// Podcast 2.0 transcript elements.
121    ///
122    /// Default: 20 transcripts
123    pub max_podcast_transcripts: usize,
124
125    /// Maximum number of podcast funding elements per feed
126    ///
127    /// Podcast 2.0 funding elements for donation links.
128    ///
129    /// Default: 20 funding elements
130    pub max_podcast_funding: usize,
131
132    /// Maximum number of podcast person elements per entry
133    ///
134    /// Podcast 2.0 person elements for hosts, guests, etc.
135    ///
136    /// Default: 50 persons
137    pub max_podcast_persons: usize,
138
139    /// Maximum number of podcast value recipients per feed
140    ///
141    /// Podcast 2.0 value recipients for payment splitting.
142    /// Prevents `DoS` from feeds with excessive recipient lists.
143    ///
144    /// Default: 20 recipients
145    pub max_value_recipients: usize,
146}
147
148impl Default for ParserLimits {
149    /// Creates default parser limits suitable for general use
150    ///
151    /// These defaults are conservative and should work for most feeds,
152    /// including large podcast feeds and news aggregators.
153    fn default() -> Self {
154        Self {
155            max_entries: 10_000,
156            max_links_per_feed: 100,
157            max_links_per_entry: 50,
158            max_authors: 20,
159            max_contributors: 20,
160            max_tags: 100,
161            max_content_blocks: 10,
162            max_enclosures: 20,
163            max_namespaces: 100,
164            max_nesting_depth: 100,
165            max_text_length: 10 * 1024 * 1024,      // 10 MB
166            max_feed_size_bytes: 100 * 1024 * 1024, // 100 MB
167            max_attribute_length: 64 * 1024,        // 64 KB
168            max_podcast_soundbites: 10,
169            max_podcast_transcripts: 20,
170            max_podcast_funding: 20,
171            max_podcast_persons: 50,
172            max_value_recipients: 20,
173        }
174    }
175}
176
177impl ParserLimits {
178    /// Creates strict limits for resource-constrained environments
179    ///
180    /// Use this for embedded systems or when parsing untrusted feeds
181    /// with minimal resources.
182    ///
183    /// # Examples
184    ///
185    /// ```
186    /// use feedparser_rs::ParserLimits;
187    ///
188    /// let limits = ParserLimits::strict();
189    /// assert_eq!(limits.max_entries, 1_000);
190    /// ```
191    #[must_use]
192    pub const fn strict() -> Self {
193        Self {
194            max_entries: 1_000,
195            max_links_per_feed: 20,
196            max_links_per_entry: 10,
197            max_authors: 5,
198            max_contributors: 5,
199            max_tags: 20,
200            max_content_blocks: 3,
201            max_enclosures: 5,
202            max_namespaces: 20,
203            max_nesting_depth: 50,
204            max_text_length: 1024 * 1024,          // 1 MB
205            max_feed_size_bytes: 10 * 1024 * 1024, // 10 MB
206            max_attribute_length: 8 * 1024,        // 8 KB
207            max_podcast_soundbites: 5,
208            max_podcast_transcripts: 5,
209            max_podcast_funding: 5,
210            max_podcast_persons: 10,
211            max_value_recipients: 5,
212        }
213    }
214
215    /// Creates permissive limits for trusted feeds
216    ///
217    /// Use this only for feeds from trusted sources where you expect
218    /// large data volumes (e.g., feed archives).
219    ///
220    /// # Examples
221    ///
222    /// ```
223    /// use feedparser_rs::ParserLimits;
224    ///
225    /// let limits = ParserLimits::permissive();
226    /// assert_eq!(limits.max_entries, 100_000);
227    /// ```
228    #[must_use]
229    pub const fn permissive() -> Self {
230        Self {
231            max_entries: 100_000,
232            max_links_per_feed: 500,
233            max_links_per_entry: 200,
234            max_authors: 100,
235            max_contributors: 100,
236            max_tags: 500,
237            max_content_blocks: 50,
238            max_enclosures: 100,
239            max_namespaces: 500,
240            max_nesting_depth: 200,
241            max_text_length: 50 * 1024 * 1024,      // 50 MB
242            max_feed_size_bytes: 500 * 1024 * 1024, // 500 MB
243            max_attribute_length: 256 * 1024,       // 256 KB
244            max_podcast_soundbites: 50,
245            max_podcast_transcripts: 100,
246            max_podcast_funding: 50,
247            max_podcast_persons: 200,
248            max_value_recipients: 50,
249        }
250    }
251
252    /// Validates that a feed size is within limits
253    ///
254    /// Call this before starting to parse a feed.
255    ///
256    /// # Errors
257    ///
258    /// Returns an error if the feed exceeds `max_feed_size_bytes`.
259    pub const fn check_feed_size(&self, size: usize) -> Result<(), LimitError> {
260        if size > self.max_feed_size_bytes {
261            Err(LimitError::FeedTooLarge {
262                size,
263                max: self.max_feed_size_bytes,
264            })
265        } else {
266            Ok(())
267        }
268    }
269
270    /// Validates that a collection size is within limits
271    ///
272    /// Use this during parsing to check collection sizes.
273    ///
274    /// # Errors
275    ///
276    /// Returns an error if the collection size exceeds the specified limit.
277    pub const fn check_collection_size(
278        &self,
279        current: usize,
280        limit: usize,
281        name: &'static str,
282    ) -> Result<(), LimitError> {
283        if current >= limit {
284            Err(LimitError::CollectionTooLarge {
285                name,
286                size: current,
287                max: limit,
288            })
289        } else {
290            Ok(())
291        }
292    }
293
294    /// Validates XML nesting depth
295    ///
296    /// # Errors
297    ///
298    /// Returns an error if nesting depth exceeds `max_nesting_depth`.
299    pub const fn check_nesting_depth(&self, depth: usize) -> Result<(), LimitError> {
300        if depth > self.max_nesting_depth {
301            Err(LimitError::NestingTooDeep {
302                depth,
303                max: self.max_nesting_depth,
304            })
305        } else {
306            Ok(())
307        }
308    }
309
310    /// Validates text field length
311    ///
312    /// # Errors
313    ///
314    /// Returns an error if text length exceeds `max_text_length`.
315    pub const fn check_text_length(&self, length: usize) -> Result<(), LimitError> {
316        if length > self.max_text_length {
317            Err(LimitError::TextTooLong {
318                length,
319                max: self.max_text_length,
320            })
321        } else {
322            Ok(())
323        }
324    }
325}
326
327/// Errors that occur when parser limits are exceeded
328#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
329#[allow(missing_docs)] // Fields are self-explanatory from error messages
330pub enum LimitError {
331    /// Feed size exceeds maximum allowed
332    #[error("Feed size ({size} bytes) exceeds maximum ({max} bytes)")]
333    FeedTooLarge { size: usize, max: usize },
334
335    /// Collection (entries, links, etc.) has too many items
336    #[error("Collection '{name}' has {size} items, exceeds maximum ({max})")]
337    CollectionTooLarge {
338        name: &'static str,
339        size: usize,
340        max: usize,
341    },
342
343    /// XML nesting is too deep
344    #[error("XML nesting depth ({depth}) exceeds maximum ({max})")]
345    NestingTooDeep { depth: usize, max: usize },
346
347    /// Text field is too long
348    #[error("Text field length ({length} bytes) exceeds maximum ({max} bytes)")]
349    TextTooLong { length: usize, max: usize },
350}
351
352#[cfg(test)]
353mod tests {
354    use super::*;
355
356    #[test]
357    fn test_default_limits() {
358        let limits = ParserLimits::default();
359        assert_eq!(limits.max_entries, 10_000);
360        assert_eq!(limits.max_feed_size_bytes, 100 * 1024 * 1024);
361    }
362
363    #[test]
364    fn test_strict_limits() {
365        let limits = ParserLimits::strict();
366        assert_eq!(limits.max_entries, 1_000);
367        assert!(limits.max_entries < ParserLimits::default().max_entries);
368    }
369
370    #[test]
371    fn test_permissive_limits() {
372        let limits = ParserLimits::permissive();
373        assert_eq!(limits.max_entries, 100_000);
374        assert!(limits.max_entries > ParserLimits::default().max_entries);
375    }
376
377    #[test]
378    fn test_check_feed_size_ok() {
379        let limits = ParserLimits::default();
380        assert!(limits.check_feed_size(1024).is_ok());
381    }
382
383    #[test]
384    fn test_check_feed_size_too_large() {
385        let limits = ParserLimits::default();
386        let result = limits.check_feed_size(200 * 1024 * 1024);
387        assert!(result.is_err());
388        assert!(matches!(result, Err(LimitError::FeedTooLarge { .. })));
389    }
390
391    #[test]
392    fn test_check_collection_size_ok() {
393        let limits = ParserLimits::default();
394        assert!(
395            limits
396                .check_collection_size(50, limits.max_entries, "entries")
397                .is_ok()
398        );
399    }
400
401    #[test]
402    fn test_check_collection_size_too_large() {
403        let limits = ParserLimits::default();
404        let result = limits.check_collection_size(10_001, limits.max_entries, "entries");
405        assert!(result.is_err());
406        assert!(matches!(result, Err(LimitError::CollectionTooLarge { .. })));
407    }
408
409    #[test]
410    fn test_check_nesting_depth_ok() {
411        let limits = ParserLimits::default();
412        assert!(limits.check_nesting_depth(50).is_ok());
413    }
414
415    #[test]
416    fn test_check_nesting_depth_too_deep() {
417        let limits = ParserLimits::default();
418        let result = limits.check_nesting_depth(101);
419        assert!(result.is_err());
420        assert!(matches!(result, Err(LimitError::NestingTooDeep { .. })));
421    }
422
423    #[test]
424    fn test_check_text_length_ok() {
425        let limits = ParserLimits::default();
426        assert!(limits.check_text_length(1024).is_ok());
427    }
428
429    #[test]
430    fn test_check_text_length_too_long() {
431        let limits = ParserLimits::default();
432        let result = limits.check_text_length(20 * 1024 * 1024);
433        assert!(result.is_err());
434        assert!(matches!(result, Err(LimitError::TextTooLong { .. })));
435    }
436
437    #[test]
438    fn test_limit_error_display() {
439        let err = LimitError::FeedTooLarge {
440            size: 200_000_000,
441            max: 100_000_000,
442        };
443        let msg = err.to_string();
444        assert!(msg.contains("200000000"));
445        assert!(msg.contains("100000000"));
446    }
447
448    #[test]
449    fn test_max_value_recipients_default() {
450        let limits = ParserLimits::default();
451        assert_eq!(limits.max_value_recipients, 20);
452    }
453
454    #[test]
455    fn test_max_value_recipients_strict() {
456        let limits = ParserLimits::strict();
457        assert_eq!(limits.max_value_recipients, 5);
458        assert!(limits.max_value_recipients < ParserLimits::default().max_value_recipients);
459    }
460
461    #[test]
462    fn test_max_value_recipients_permissive() {
463        let limits = ParserLimits::permissive();
464        assert_eq!(limits.max_value_recipients, 50);
465        assert!(limits.max_value_recipients > ParserLimits::default().max_value_recipients);
466    }
467
468    #[test]
469    fn test_value_recipients_limit_enforcement() {
470        let limits = ParserLimits::default();
471
472        // Within limit
473        assert!(
474            limits
475                .check_collection_size(19, limits.max_value_recipients, "value_recipients")
476                .is_ok()
477        );
478
479        // At limit
480        assert!(
481            limits
482                .check_collection_size(20, limits.max_value_recipients, "value_recipients")
483                .is_err()
484        );
485
486        // Exceeds limit
487        let result =
488            limits.check_collection_size(21, limits.max_value_recipients, "value_recipients");
489        assert!(result.is_err());
490        assert!(matches!(result, Err(LimitError::CollectionTooLarge { .. })));
491    }
492}