feedparser_rs/types/
feed.rs

1use super::{
2    common::{Generator, Image, Link, Person, Tag, TextConstruct},
3    entry::Entry,
4    generics::LimitedCollectionExt,
5    podcast::{ItunesFeedMeta, PodcastMeta},
6    version::FeedVersion,
7};
8use crate::namespace::syndication::SyndicationMeta;
9use crate::{ParserLimits, error::Result};
10use chrono::{DateTime, Utc};
11use quick_xml::Reader;
12use std::collections::HashMap;
13
14/// Feed metadata
15#[derive(Debug, Clone, Default)]
16pub struct FeedMeta {
17    /// Feed title
18    pub title: Option<String>,
19    /// Detailed title with metadata
20    pub title_detail: Option<TextConstruct>,
21    /// Primary feed link
22    pub link: Option<String>,
23    /// All links associated with this feed
24    pub links: Vec<Link>,
25    /// Feed subtitle/description
26    pub subtitle: Option<String>,
27    /// Detailed subtitle with metadata
28    pub subtitle_detail: Option<TextConstruct>,
29    /// Last update date
30    pub updated: Option<DateTime<Utc>>,
31    /// Initial publication date (RSS pubDate, Atom published)
32    pub published: Option<DateTime<Utc>>,
33    /// Primary author name
34    pub author: Option<String>,
35    /// Detailed author information
36    pub author_detail: Option<Person>,
37    /// All authors
38    pub authors: Vec<Person>,
39    /// Contributors
40    pub contributors: Vec<Person>,
41    /// Publisher name
42    pub publisher: Option<String>,
43    /// Detailed publisher information
44    pub publisher_detail: Option<Person>,
45    /// Feed language (e.g., "en-us")
46    pub language: Option<String>,
47    /// Copyright/rights statement
48    pub rights: Option<String>,
49    /// Detailed rights with metadata
50    pub rights_detail: Option<TextConstruct>,
51    /// Generator name
52    pub generator: Option<String>,
53    /// Detailed generator information
54    pub generator_detail: Option<Generator>,
55    /// Feed image
56    pub image: Option<Image>,
57    /// Icon URL (small image)
58    pub icon: Option<String>,
59    /// Logo URL (larger image)
60    pub logo: Option<String>,
61    /// Feed-level tags/categories
62    pub tags: Vec<Tag>,
63    /// Unique feed identifier
64    pub id: Option<String>,
65    /// Time-to-live (update frequency hint) in minutes
66    pub ttl: Option<u32>,
67    /// iTunes podcast metadata (if present)
68    pub itunes: Option<ItunesFeedMeta>,
69    /// Podcast 2.0 namespace metadata (if present)
70    pub podcast: Option<PodcastMeta>,
71    /// Dublin Core creator (author fallback)
72    pub dc_creator: Option<String>,
73    /// Dublin Core publisher
74    pub dc_publisher: Option<String>,
75    /// Dublin Core rights (copyright)
76    pub dc_rights: Option<String>,
77    /// License URL (Creative Commons, etc.)
78    pub license: Option<String>,
79    /// Syndication module metadata (RSS 1.0)
80    pub syndication: Option<SyndicationMeta>,
81}
82
83/// Parsed feed result
84///
85/// This is the main result type returned by the parser, analogous to
86/// Python feedparser's `FeedParserDict`.
87#[derive(Debug, Clone, Default)]
88pub struct ParsedFeed {
89    /// Feed metadata
90    pub feed: FeedMeta,
91    /// Feed entries/items
92    pub entries: Vec<Entry>,
93    /// True if parsing encountered errors
94    pub bozo: bool,
95    /// Description of parsing error (if bozo is true)
96    pub bozo_exception: Option<String>,
97    /// Detected or declared encoding
98    pub encoding: String,
99    /// Detected feed format version
100    pub version: FeedVersion,
101    /// XML namespaces (prefix -> URI)
102    pub namespaces: HashMap<String, String>,
103    /// HTTP status code (if fetched from URL)
104    pub status: Option<u16>,
105    /// Final URL after redirects (if fetched from URL)
106    pub href: Option<String>,
107    /// `ETag` header from HTTP response
108    pub etag: Option<String>,
109    /// Last-Modified header from HTTP response
110    pub modified: Option<String>,
111    /// HTTP response headers (if fetched from URL)
112    #[cfg(feature = "http")]
113    pub headers: Option<HashMap<String, String>>,
114}
115
116impl ParsedFeed {
117    /// Creates a new `ParsedFeed` with default UTF-8 encoding
118    #[must_use]
119    pub fn new() -> Self {
120        Self {
121            encoding: String::from("utf-8"),
122            ..Default::default()
123        }
124    }
125
126    /// Creates a `ParsedFeed` with pre-allocated capacity for entries
127    ///
128    /// This method pre-allocates space for the expected number of entries,
129    /// reducing memory allocations during parsing.
130    ///
131    /// # Arguments
132    ///
133    /// * `entry_count` - Expected number of entries in the feed
134    ///
135    /// # Examples
136    ///
137    /// ```
138    /// use feedparser_rs::ParsedFeed;
139    ///
140    /// let feed = ParsedFeed::with_capacity(50);
141    /// assert_eq!(feed.encoding, "utf-8");
142    /// ```
143    #[must_use]
144    pub fn with_capacity(entry_count: usize) -> Self {
145        Self {
146            entries: Vec::with_capacity(entry_count),
147            namespaces: HashMap::with_capacity(8), // Typical feeds have 3-8 namespaces
148            encoding: String::from("utf-8"),
149            ..Default::default()
150        }
151    }
152
153    /// Check if entry limit is reached, set bozo flag and skip element if so
154    ///
155    /// This helper consolidates the duplicate entry limit checking logic used in
156    /// RSS and Atom parsers. If the entry limit is reached, it:
157    /// - Sets `bozo` flag to true
158    /// - Sets `bozo_exception` with descriptive error message
159    /// - Skips the entry element
160    /// - Returns `Ok(false)` to signal that the entry should not be processed
161    ///
162    /// # Arguments
163    ///
164    /// * `reader` - XML reader positioned at the entry element
165    /// * `buf` - Buffer for XML event reading
166    /// * `limits` - Parser limits including `max_entries`
167    /// * `depth` - Current nesting depth (will be decremented)
168    ///
169    /// # Returns
170    ///
171    /// * `Ok(true)` - Entry can be processed (limit not reached)
172    /// * `Ok(false)` - Entry limit reached, element was skipped
173    ///
174    /// # Errors
175    ///
176    /// Returns an error if:
177    /// - Skipping the entry element fails (e.g., malformed XML)
178    /// - Nesting depth exceeds limits while skipping
179    ///
180    /// # Examples
181    ///
182    /// ```ignore
183    /// // In parser:
184    /// if !feed.check_entry_limit(reader, &mut buf, limits, depth)? {
185    ///     continue;
186    /// }
187    /// // Process entry...
188    /// ```
189    #[inline]
190    pub fn check_entry_limit(
191        &mut self,
192        reader: &mut Reader<&[u8]>,
193        buf: &mut Vec<u8>,
194        limits: &ParserLimits,
195        depth: &mut usize,
196    ) -> Result<bool> {
197        use crate::parser::skip_element;
198
199        if self.entries.is_at_limit(limits.max_entries) {
200            self.bozo = true;
201            self.bozo_exception = Some(format!("Entry limit exceeded: {}", limits.max_entries));
202            skip_element(reader, buf, limits, *depth)?;
203            *depth = depth.saturating_sub(1);
204            Ok(false)
205        } else {
206            Ok(true)
207        }
208    }
209}
210
211impl FeedMeta {
212    /// Creates `FeedMeta` with capacity hints for typical RSS 2.0 feeds
213    ///
214    /// Pre-allocates collections based on common RSS 2.0 field usage:
215    /// - 1-2 links (channel link, self link)
216    /// - 1 author (managingEditor)
217    /// - 0-3 tags (categories)
218    ///
219    /// # Examples
220    ///
221    /// ```
222    /// use feedparser_rs::FeedMeta;
223    ///
224    /// let meta = FeedMeta::with_rss_capacity();
225    /// ```
226    #[must_use]
227    pub fn with_rss_capacity() -> Self {
228        Self {
229            links: Vec::with_capacity(2),
230            authors: Vec::with_capacity(1),
231            contributors: Vec::with_capacity(0),
232            tags: Vec::with_capacity(3),
233            ..Default::default()
234        }
235    }
236
237    /// Creates `FeedMeta` with capacity hints for typical Atom 1.0 feeds
238    ///
239    /// Pre-allocates collections based on common Atom 1.0 field usage:
240    /// - 3-5 links (alternate, self, related, etc.)
241    /// - 1-2 authors
242    /// - 1 contributor
243    /// - 3-5 tags (categories)
244    ///
245    /// # Examples
246    ///
247    /// ```
248    /// use feedparser_rs::FeedMeta;
249    ///
250    /// let meta = FeedMeta::with_atom_capacity();
251    /// ```
252    #[must_use]
253    pub fn with_atom_capacity() -> Self {
254        Self {
255            links: Vec::with_capacity(4),
256            authors: Vec::with_capacity(2),
257            contributors: Vec::with_capacity(1),
258            tags: Vec::with_capacity(5),
259            ..Default::default()
260        }
261    }
262
263    /// Sets title field with `TextConstruct`, storing both simple and detailed versions
264    ///
265    /// # Examples
266    ///
267    /// ```
268    /// use feedparser_rs::{FeedMeta, TextConstruct};
269    ///
270    /// let mut meta = FeedMeta::default();
271    /// meta.set_title(TextConstruct::text("Example Feed"));
272    /// assert_eq!(meta.title.as_deref(), Some("Example Feed"));
273    /// ```
274    #[inline]
275    pub fn set_title(&mut self, mut text: TextConstruct) {
276        self.title = Some(std::mem::take(&mut text.value));
277        self.title_detail = Some(text);
278    }
279
280    /// Sets subtitle field with `TextConstruct`, storing both simple and detailed versions
281    ///
282    /// # Examples
283    ///
284    /// ```
285    /// use feedparser_rs::{FeedMeta, TextConstruct};
286    ///
287    /// let mut meta = FeedMeta::default();
288    /// meta.set_subtitle(TextConstruct::text("A great feed"));
289    /// assert_eq!(meta.subtitle.as_deref(), Some("A great feed"));
290    /// ```
291    #[inline]
292    pub fn set_subtitle(&mut self, mut text: TextConstruct) {
293        self.subtitle = Some(std::mem::take(&mut text.value));
294        self.subtitle_detail = Some(text);
295    }
296
297    /// Sets rights field with `TextConstruct`, storing both simple and detailed versions
298    ///
299    /// # Examples
300    ///
301    /// ```
302    /// use feedparser_rs::{FeedMeta, TextConstruct};
303    ///
304    /// let mut meta = FeedMeta::default();
305    /// meta.set_rights(TextConstruct::text("© 2025 Example"));
306    /// assert_eq!(meta.rights.as_deref(), Some("© 2025 Example"));
307    /// ```
308    #[inline]
309    pub fn set_rights(&mut self, mut text: TextConstruct) {
310        self.rights = Some(std::mem::take(&mut text.value));
311        self.rights_detail = Some(text);
312    }
313
314    /// Sets generator field with `Generator`, storing both simple and detailed versions
315    ///
316    /// # Examples
317    ///
318    /// ```
319    /// use feedparser_rs::{FeedMeta, Generator};
320    ///
321    /// # fn main() {
322    /// let mut meta = FeedMeta::default();
323    /// let generator = Generator {
324    ///     value: "Example Generator".to_string(),
325    ///     uri: None,
326    ///     version: None,
327    /// };
328    /// meta.set_generator(generator);
329    /// assert_eq!(meta.generator.as_deref(), Some("Example Generator"));
330    /// # }
331    /// ```
332    #[inline]
333    pub fn set_generator(&mut self, mut generator: Generator) {
334        self.generator = Some(std::mem::take(&mut generator.value));
335        self.generator_detail = Some(generator);
336    }
337
338    /// Sets author field with `Person`, storing both simple and detailed versions
339    ///
340    /// # Examples
341    ///
342    /// ```
343    /// use feedparser_rs::{FeedMeta, Person};
344    ///
345    /// let mut meta = FeedMeta::default();
346    /// meta.set_author(Person::from_name("John Doe"));
347    /// assert_eq!(meta.author.as_deref(), Some("John Doe"));
348    /// ```
349    #[inline]
350    pub fn set_author(&mut self, mut person: Person) {
351        self.author = person.name.take();
352        self.author_detail = Some(person);
353    }
354
355    /// Sets publisher field with `Person`, storing both simple and detailed versions
356    ///
357    /// # Examples
358    ///
359    /// ```
360    /// use feedparser_rs::{FeedMeta, Person};
361    ///
362    /// let mut meta = FeedMeta::default();
363    /// meta.set_publisher(Person::from_name("ACME Corp"));
364    /// assert_eq!(meta.publisher.as_deref(), Some("ACME Corp"));
365    /// ```
366    #[inline]
367    pub fn set_publisher(&mut self, mut person: Person) {
368        self.publisher = person.name.take();
369        self.publisher_detail = Some(person);
370    }
371
372    /// Sets the primary link and adds it to the links collection
373    ///
374    /// This is a convenience method that:
375    /// 1. Sets the `link` field (if not already set)
376    /// 2. Adds an "alternate" link to the `links` collection
377    ///
378    /// # Examples
379    ///
380    /// ```
381    /// use feedparser_rs::FeedMeta;
382    ///
383    /// let mut meta = FeedMeta::default();
384    /// meta.set_alternate_link("https://example.com".to_string(), 10);
385    /// assert_eq!(meta.link.as_deref(), Some("https://example.com"));
386    /// assert_eq!(meta.links.len(), 1);
387    /// assert_eq!(meta.links[0].rel.as_deref(), Some("alternate"));
388    /// ```
389    #[inline]
390    pub fn set_alternate_link(&mut self, href: String, max_links: usize) {
391        if self.link.is_none() {
392            self.link = Some(href.clone());
393        }
394        self.links.try_push_limited(
395            Link {
396                href,
397                rel: Some("alternate".to_string()),
398                ..Default::default()
399            },
400            max_links,
401        );
402    }
403}
404
405#[cfg(test)]
406mod tests {
407    use super::*;
408
409    #[test]
410    fn test_feed_meta_default() {
411        let meta = FeedMeta::default();
412        assert!(meta.title.is_none());
413        assert!(meta.links.is_empty());
414        assert!(meta.authors.is_empty());
415    }
416
417    #[test]
418    fn test_parsed_feed_default() {
419        let feed = ParsedFeed::default();
420        assert!(!feed.bozo);
421        assert!(feed.bozo_exception.is_none());
422        assert_eq!(feed.version, FeedVersion::Unknown);
423        assert!(feed.entries.is_empty());
424    }
425
426    #[test]
427    fn test_parsed_feed_new() {
428        let feed = ParsedFeed::new();
429        assert_eq!(feed.encoding, "utf-8");
430        assert!(!feed.bozo);
431    }
432
433    #[test]
434    fn test_parsed_feed_clone() {
435        let feed = ParsedFeed {
436            version: FeedVersion::Rss20,
437            bozo: true,
438            ..ParsedFeed::new()
439        };
440
441        assert_eq!(feed.version, FeedVersion::Rss20);
442        assert!(feed.bozo);
443    }
444}