Skip to main content

feedparser_rs/
limits.rs

1//! Parser limits to prevent `DoS` attacks and excessive memory usage
2
3/// Parser limits for protecting against denial-of-service attacks
4///
5/// These limits prevent malicious or malformed feeds from causing excessive
6/// memory allocation, deep recursion, or other resource exhaustion issues.
7///
8/// # Examples
9///
10/// ```
11/// use feedparser_rs::ParserLimits;
12///
13/// let limits = ParserLimits::default();
14/// assert_eq!(limits.max_entries, 10_000);
15///
16/// // Custom limits for restricted environments
17/// let strict = ParserLimits {
18///     max_entries: 1_000,
19///     max_feed_size_bytes: 10 * 1024 * 1024, // 10MB
20///     ..Default::default()
21/// };
22/// ```
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub struct ParserLimits {
25    /// Maximum number of entries/items in a feed
26    ///
27    /// Prevents memory exhaustion from feeds with millions of items.
28    /// Typical feeds have 10-100 entries, large feeds may have up to 1000.
29    ///
30    /// Default: 10,000 entries
31    pub max_entries: usize,
32
33    /// Maximum number of links per feed (channel-level)
34    ///
35    /// Prevents link bombing attacks.
36    ///
37    /// Default: 100 links
38    pub max_links_per_feed: usize,
39
40    /// Maximum number of links per entry
41    ///
42    /// Prevents link bombing in individual entries.
43    ///
44    /// Default: 50 links
45    pub max_links_per_entry: usize,
46
47    /// Maximum number of authors per feed or entry
48    ///
49    /// Default: 20 authors
50    pub max_authors: usize,
51
52    /// Maximum number of contributors per feed or entry
53    ///
54    /// Default: 20 contributors
55    pub max_contributors: usize,
56
57    /// Maximum number of tags/categories per feed or entry
58    ///
59    /// Default: 100 tags
60    pub max_tags: usize,
61
62    /// Maximum number of content blocks per entry
63    ///
64    /// Atom feeds can have multiple content elements.
65    ///
66    /// Default: 10 content blocks
67    pub max_content_blocks: usize,
68
69    /// Maximum number of enclosures per entry
70    ///
71    /// Podcast feeds typically have 1 enclosure per episode.
72    ///
73    /// Default: 20 enclosures
74    pub max_enclosures: usize,
75
76    /// Maximum number of XML namespaces
77    ///
78    /// Prevents namespace pollution attacks.
79    ///
80    /// Default: 100 namespaces
81    pub max_namespaces: usize,
82
83    /// Maximum XML nesting depth
84    ///
85    /// Prevents stack overflow from deeply nested XML.
86    ///
87    /// Default: 100 levels
88    pub max_nesting_depth: usize,
89
90    /// Maximum text field length in bytes
91    ///
92    /// Prevents excessive memory from huge title/description fields.
93    ///
94    /// Default: 10 MB
95    pub max_text_length: usize,
96
97    /// Maximum total feed size in bytes
98    ///
99    /// The entire feed must fit within this limit.
100    ///
101    /// Default: 100 MB
102    pub max_feed_size_bytes: usize,
103
104    /// Maximum attribute value length in bytes
105    ///
106    /// XML attributes should be reasonably sized.
107    ///
108    /// Default: 64 KB
109    pub max_attribute_length: usize,
110
111    /// Maximum number of podcast soundbites per entry
112    ///
113    /// Podcast 2.0 soundbite elements for shareable clips.
114    ///
115    /// Default: 10 soundbites
116    pub max_podcast_soundbites: usize,
117
118    /// Maximum number of podcast transcripts per entry
119    ///
120    /// Podcast 2.0 transcript elements.
121    ///
122    /// Default: 20 transcripts
123    pub max_podcast_transcripts: usize,
124
125    /// Maximum number of podcast funding elements per feed
126    ///
127    /// Podcast 2.0 funding elements for donation links.
128    ///
129    /// Default: 20 funding elements
130    pub max_podcast_funding: usize,
131
132    /// Maximum number of podcast person elements per entry
133    ///
134    /// Podcast 2.0 person elements for hosts, guests, etc.
135    ///
136    /// Default: 50 persons
137    pub max_podcast_persons: usize,
138
139    /// Maximum number of podcast value recipients per feed
140    ///
141    /// Podcast 2.0 value recipients for payment splitting.
142    /// Prevents `DoS` from feeds with excessive recipient lists.
143    ///
144    /// Default: 20 recipients
145    pub max_value_recipients: usize,
146
147    /// Maximum number of alternate enclosures per entry
148    ///
149    /// Default: 20
150    pub max_podcast_alternate_enclosures: usize,
151
152    /// Maximum number of sources per alternate enclosure
153    ///
154    /// Default: 10
155    pub max_podcast_alternate_enclosure_sources: usize,
156
157    /// Maximum number of podroll entries per feed
158    ///
159    /// Default: 50
160    pub max_podcast_podroll: usize,
161
162    /// Maximum number of socialInteract elements per entry
163    ///
164    /// Default: 20
165    pub max_podcast_social_interact: usize,
166
167    /// Maximum number of txt records per feed or entry
168    ///
169    /// Default: 20
170    pub max_podcast_txt: usize,
171
172    /// Maximum number of follow links per feed or entry
173    ///
174    /// Default: 20
175    pub max_podcast_follow: usize,
176}
177
178impl Default for ParserLimits {
179    /// Creates default parser limits suitable for general use
180    ///
181    /// These defaults are conservative and should work for most feeds,
182    /// including large podcast feeds and news aggregators.
183    fn default() -> Self {
184        Self {
185            max_entries: 10_000,
186            max_links_per_feed: 100,
187            max_links_per_entry: 50,
188            max_authors: 20,
189            max_contributors: 20,
190            max_tags: 100,
191            max_content_blocks: 10,
192            max_enclosures: 20,
193            max_namespaces: 100,
194            max_nesting_depth: 100,
195            max_text_length: 10 * 1024 * 1024,      // 10 MB
196            max_feed_size_bytes: 100 * 1024 * 1024, // 100 MB
197            max_attribute_length: 64 * 1024,        // 64 KB
198            max_podcast_soundbites: 10,
199            max_podcast_transcripts: 20,
200            max_podcast_funding: 20,
201            max_podcast_persons: 50,
202            max_value_recipients: 20,
203            max_podcast_alternate_enclosures: 20,
204            max_podcast_alternate_enclosure_sources: 10,
205            max_podcast_podroll: 50,
206            max_podcast_social_interact: 20,
207            max_podcast_txt: 20,
208            max_podcast_follow: 20,
209        }
210    }
211}
212
213impl ParserLimits {
214    /// Creates strict limits for resource-constrained environments
215    ///
216    /// Use this for embedded systems or when parsing untrusted feeds
217    /// with minimal resources.
218    ///
219    /// # Examples
220    ///
221    /// ```
222    /// use feedparser_rs::ParserLimits;
223    ///
224    /// let limits = ParserLimits::strict();
225    /// assert_eq!(limits.max_entries, 1_000);
226    /// ```
227    #[must_use]
228    pub const fn strict() -> Self {
229        Self {
230            max_entries: 1_000,
231            max_links_per_feed: 20,
232            max_links_per_entry: 10,
233            max_authors: 5,
234            max_contributors: 5,
235            max_tags: 20,
236            max_content_blocks: 3,
237            max_enclosures: 5,
238            max_namespaces: 20,
239            max_nesting_depth: 50,
240            max_text_length: 1024 * 1024,          // 1 MB
241            max_feed_size_bytes: 10 * 1024 * 1024, // 10 MB
242            max_attribute_length: 8 * 1024,        // 8 KB
243            max_podcast_soundbites: 5,
244            max_podcast_transcripts: 5,
245            max_podcast_funding: 5,
246            max_podcast_persons: 10,
247            max_value_recipients: 5,
248            max_podcast_alternate_enclosures: 5,
249            max_podcast_alternate_enclosure_sources: 3,
250            max_podcast_podroll: 10,
251            max_podcast_social_interact: 5,
252            max_podcast_txt: 5,
253            max_podcast_follow: 5,
254        }
255    }
256
257    /// Creates permissive limits for trusted feeds
258    ///
259    /// Use this only for feeds from trusted sources where you expect
260    /// large data volumes (e.g., feed archives).
261    ///
262    /// # Examples
263    ///
264    /// ```
265    /// use feedparser_rs::ParserLimits;
266    ///
267    /// let limits = ParserLimits::permissive();
268    /// assert_eq!(limits.max_entries, 100_000);
269    /// ```
270    #[must_use]
271    pub const fn permissive() -> Self {
272        Self {
273            max_entries: 100_000,
274            max_links_per_feed: 500,
275            max_links_per_entry: 200,
276            max_authors: 100,
277            max_contributors: 100,
278            max_tags: 500,
279            max_content_blocks: 50,
280            max_enclosures: 100,
281            max_namespaces: 500,
282            max_nesting_depth: 200,
283            max_text_length: 50 * 1024 * 1024,      // 50 MB
284            max_feed_size_bytes: 500 * 1024 * 1024, // 500 MB
285            max_attribute_length: 256 * 1024,       // 256 KB
286            max_podcast_soundbites: 50,
287            max_podcast_transcripts: 100,
288            max_podcast_funding: 50,
289            max_podcast_persons: 200,
290            max_value_recipients: 50,
291            max_podcast_alternate_enclosures: 100,
292            max_podcast_alternate_enclosure_sources: 50,
293            max_podcast_podroll: 200,
294            max_podcast_social_interact: 100,
295            max_podcast_txt: 100,
296            max_podcast_follow: 100,
297        }
298    }
299
300    /// Validates that a feed size is within limits
301    ///
302    /// Call this before starting to parse a feed.
303    ///
304    /// # Errors
305    ///
306    /// Returns an error if the feed exceeds `max_feed_size_bytes`.
307    pub const fn check_feed_size(&self, size: usize) -> Result<(), LimitError> {
308        if size > self.max_feed_size_bytes {
309            Err(LimitError::FeedTooLarge {
310                size,
311                max: self.max_feed_size_bytes,
312            })
313        } else {
314            Ok(())
315        }
316    }
317
318    /// Validates that a collection size is within limits
319    ///
320    /// Use this during parsing to check collection sizes.
321    ///
322    /// # Errors
323    ///
324    /// Returns an error if the collection size exceeds the specified limit.
325    pub const fn check_collection_size(
326        &self,
327        current: usize,
328        limit: usize,
329        name: &'static str,
330    ) -> Result<(), LimitError> {
331        if current >= limit {
332            Err(LimitError::CollectionTooLarge {
333                name,
334                size: current,
335                max: limit,
336            })
337        } else {
338            Ok(())
339        }
340    }
341
342    /// Validates XML nesting depth
343    ///
344    /// # Errors
345    ///
346    /// Returns an error if nesting depth exceeds `max_nesting_depth`.
347    pub const fn check_nesting_depth(&self, depth: usize) -> Result<(), LimitError> {
348        if depth > self.max_nesting_depth {
349            Err(LimitError::NestingTooDeep {
350                depth,
351                max: self.max_nesting_depth,
352            })
353        } else {
354            Ok(())
355        }
356    }
357
358    /// Validates text field length
359    ///
360    /// # Errors
361    ///
362    /// Returns an error if text length exceeds `max_text_length`.
363    pub const fn check_text_length(&self, length: usize) -> Result<(), LimitError> {
364        if length > self.max_text_length {
365            Err(LimitError::TextTooLong {
366                length,
367                max: self.max_text_length,
368            })
369        } else {
370            Ok(())
371        }
372    }
373}
374
375/// Errors that occur when parser limits are exceeded
376#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
377#[allow(missing_docs)] // Fields are self-explanatory from error messages
378pub enum LimitError {
379    /// Feed size exceeds maximum allowed
380    #[error("Feed size ({size} bytes) exceeds maximum ({max} bytes)")]
381    FeedTooLarge { size: usize, max: usize },
382
383    /// Collection (entries, links, etc.) has too many items
384    #[error("Collection '{name}' has {size} items, exceeds maximum ({max})")]
385    CollectionTooLarge {
386        name: &'static str,
387        size: usize,
388        max: usize,
389    },
390
391    /// XML nesting is too deep
392    #[error("XML nesting depth ({depth}) exceeds maximum ({max})")]
393    NestingTooDeep { depth: usize, max: usize },
394
395    /// Text field is too long
396    #[error("Text field length ({length} bytes) exceeds maximum ({max} bytes)")]
397    TextTooLong { length: usize, max: usize },
398}
399
400#[cfg(test)]
401mod tests {
402    use super::*;
403
404    #[test]
405    fn test_default_limits() {
406        let limits = ParserLimits::default();
407        assert_eq!(limits.max_entries, 10_000);
408        assert_eq!(limits.max_feed_size_bytes, 100 * 1024 * 1024);
409    }
410
411    #[test]
412    fn test_strict_limits() {
413        let limits = ParserLimits::strict();
414        assert_eq!(limits.max_entries, 1_000);
415        assert!(limits.max_entries < ParserLimits::default().max_entries);
416    }
417
418    #[test]
419    fn test_permissive_limits() {
420        let limits = ParserLimits::permissive();
421        assert_eq!(limits.max_entries, 100_000);
422        assert!(limits.max_entries > ParserLimits::default().max_entries);
423    }
424
425    #[test]
426    fn test_check_feed_size_ok() {
427        let limits = ParserLimits::default();
428        assert!(limits.check_feed_size(1024).is_ok());
429    }
430
431    #[test]
432    fn test_check_feed_size_too_large() {
433        let limits = ParserLimits::default();
434        let result = limits.check_feed_size(200 * 1024 * 1024);
435        assert!(result.is_err());
436        assert!(matches!(result, Err(LimitError::FeedTooLarge { .. })));
437    }
438
439    #[test]
440    fn test_check_collection_size_ok() {
441        let limits = ParserLimits::default();
442        assert!(
443            limits
444                .check_collection_size(50, limits.max_entries, "entries")
445                .is_ok()
446        );
447    }
448
449    #[test]
450    fn test_check_collection_size_too_large() {
451        let limits = ParserLimits::default();
452        let result = limits.check_collection_size(10_001, limits.max_entries, "entries");
453        assert!(result.is_err());
454        assert!(matches!(result, Err(LimitError::CollectionTooLarge { .. })));
455    }
456
457    #[test]
458    fn test_check_nesting_depth_ok() {
459        let limits = ParserLimits::default();
460        assert!(limits.check_nesting_depth(50).is_ok());
461    }
462
463    #[test]
464    fn test_check_nesting_depth_too_deep() {
465        let limits = ParserLimits::default();
466        let result = limits.check_nesting_depth(101);
467        assert!(result.is_err());
468        assert!(matches!(result, Err(LimitError::NestingTooDeep { .. })));
469    }
470
471    #[test]
472    fn test_check_text_length_ok() {
473        let limits = ParserLimits::default();
474        assert!(limits.check_text_length(1024).is_ok());
475    }
476
477    #[test]
478    fn test_check_text_length_too_long() {
479        let limits = ParserLimits::default();
480        let result = limits.check_text_length(20 * 1024 * 1024);
481        assert!(result.is_err());
482        assert!(matches!(result, Err(LimitError::TextTooLong { .. })));
483    }
484
485    #[test]
486    fn test_limit_error_display() {
487        let err = LimitError::FeedTooLarge {
488            size: 200_000_000,
489            max: 100_000_000,
490        };
491        let msg = err.to_string();
492        assert!(msg.contains("200000000"));
493        assert!(msg.contains("100000000"));
494    }
495
496    #[test]
497    fn test_max_value_recipients_default() {
498        let limits = ParserLimits::default();
499        assert_eq!(limits.max_value_recipients, 20);
500    }
501
502    #[test]
503    fn test_max_value_recipients_strict() {
504        let limits = ParserLimits::strict();
505        assert_eq!(limits.max_value_recipients, 5);
506        assert!(limits.max_value_recipients < ParserLimits::default().max_value_recipients);
507    }
508
509    #[test]
510    fn test_max_value_recipients_permissive() {
511        let limits = ParserLimits::permissive();
512        assert_eq!(limits.max_value_recipients, 50);
513        assert!(limits.max_value_recipients > ParserLimits::default().max_value_recipients);
514    }
515
516    #[test]
517    fn test_value_recipients_limit_enforcement() {
518        let limits = ParserLimits::default();
519
520        // Within limit
521        assert!(
522            limits
523                .check_collection_size(19, limits.max_value_recipients, "value_recipients")
524                .is_ok()
525        );
526
527        // At limit
528        assert!(
529            limits
530                .check_collection_size(20, limits.max_value_recipients, "value_recipients")
531                .is_err()
532        );
533
534        // Exceeds limit
535        let result =
536            limits.check_collection_size(21, limits.max_value_recipients, "value_recipients");
537        assert!(result.is_err());
538        assert!(matches!(result, Err(LimitError::CollectionTooLarge { .. })));
539    }
540}