feedparser_rs/types/
feed.rs

1use super::{
2    common::{Generator, Image, Link, Person, Tag, TextConstruct},
3    entry::Entry,
4    generics::LimitedCollectionExt,
5    podcast::{ItunesFeedMeta, PodcastMeta},
6    version::FeedVersion,
7};
8use crate::namespace::syndication::SyndicationMeta;
9use crate::{ParserLimits, error::Result};
10use chrono::{DateTime, Utc};
11use quick_xml::Reader;
12use std::collections::HashMap;
13
14/// Feed metadata
15#[derive(Debug, Clone, Default)]
16pub struct FeedMeta {
17    /// Feed title
18    pub title: Option<String>,
19    /// Detailed title with metadata
20    pub title_detail: Option<TextConstruct>,
21    /// Primary feed link
22    pub link: Option<String>,
23    /// All links associated with this feed
24    pub links: Vec<Link>,
25    /// Feed subtitle/description
26    pub subtitle: Option<String>,
27    /// Detailed subtitle with metadata
28    pub subtitle_detail: Option<TextConstruct>,
29    /// Last update date
30    pub updated: Option<DateTime<Utc>>,
31    /// Initial publication date (RSS pubDate, Atom published)
32    pub published: Option<DateTime<Utc>>,
33    /// Primary author name (stored inline for names ≤24 bytes)
34    pub author: Option<super::common::SmallString>,
35    /// Detailed author information
36    pub author_detail: Option<Person>,
37    /// All authors
38    pub authors: Vec<Person>,
39    /// Contributors
40    pub contributors: Vec<Person>,
41    /// Publisher name (stored inline for names ≤24 bytes)
42    pub publisher: Option<super::common::SmallString>,
43    /// Detailed publisher information
44    pub publisher_detail: Option<Person>,
45    /// Feed language (e.g., "en-us") - stored inline as lang codes are ≤24 bytes
46    pub language: Option<super::common::SmallString>,
47    /// Copyright/rights statement
48    pub rights: Option<String>,
49    /// Detailed rights with metadata
50    pub rights_detail: Option<TextConstruct>,
51    /// Generator name
52    pub generator: Option<String>,
53    /// Detailed generator information
54    pub generator_detail: Option<Generator>,
55    /// Feed image
56    pub image: Option<Image>,
57    /// Icon URL (small image)
58    pub icon: Option<String>,
59    /// Logo URL (larger image)
60    pub logo: Option<String>,
61    /// Feed-level tags/categories
62    pub tags: Vec<Tag>,
63    /// Unique feed identifier
64    pub id: Option<String>,
65    /// Time-to-live (update frequency hint) in minutes
66    pub ttl: Option<u32>,
67    /// iTunes podcast metadata (if present)
68    pub itunes: Option<Box<ItunesFeedMeta>>,
69    /// Podcast 2.0 namespace metadata (if present)
70    pub podcast: Option<Box<PodcastMeta>>,
71    /// Dublin Core creator (author fallback) - stored inline for names ≤24 bytes
72    pub dc_creator: Option<super::common::SmallString>,
73    /// Dublin Core publisher (stored inline for names ≤24 bytes)
74    pub dc_publisher: Option<super::common::SmallString>,
75    /// Dublin Core rights (copyright)
76    pub dc_rights: Option<String>,
77    /// License URL (Creative Commons, etc.)
78    pub license: Option<String>,
79    /// Syndication module metadata (RSS 1.0)
80    pub syndication: Option<Box<SyndicationMeta>>,
81    /// Geographic location from `GeoRSS` namespace (feed level)
82    pub geo: Option<Box<crate::namespace::georss::GeoLocation>>,
83}
84
85/// Parsed feed result
86///
87/// This is the main result type returned by the parser, analogous to
88/// Python feedparser's `FeedParserDict`.
89#[derive(Debug, Clone, Default)]
90pub struct ParsedFeed {
91    /// Feed metadata
92    pub feed: FeedMeta,
93    /// Feed entries/items
94    pub entries: Vec<Entry>,
95    /// True if parsing encountered errors
96    pub bozo: bool,
97    /// Description of parsing error (if bozo is true)
98    pub bozo_exception: Option<String>,
99    /// Detected or declared encoding
100    pub encoding: String,
101    /// Detected feed format version
102    pub version: FeedVersion,
103    /// XML namespaces (prefix -> URI)
104    pub namespaces: HashMap<String, String>,
105    /// HTTP status code (if fetched from URL)
106    pub status: Option<u16>,
107    /// Final URL after redirects (if fetched from URL)
108    pub href: Option<String>,
109    /// `ETag` header from HTTP response
110    pub etag: Option<String>,
111    /// Last-Modified header from HTTP response
112    pub modified: Option<String>,
113    /// HTTP response headers (if fetched from URL)
114    #[cfg(feature = "http")]
115    pub headers: Option<HashMap<String, String>>,
116}
117
118impl ParsedFeed {
119    /// Creates a new `ParsedFeed` with default UTF-8 encoding
120    #[must_use]
121    pub fn new() -> Self {
122        Self {
123            encoding: String::from("utf-8"),
124            ..Default::default()
125        }
126    }
127
128    /// Creates a `ParsedFeed` with pre-allocated capacity for entries
129    ///
130    /// This method pre-allocates space for the expected number of entries,
131    /// reducing memory allocations during parsing.
132    ///
133    /// # Arguments
134    ///
135    /// * `entry_count` - Expected number of entries in the feed
136    ///
137    /// # Examples
138    ///
139    /// ```
140    /// use feedparser_rs::ParsedFeed;
141    ///
142    /// let feed = ParsedFeed::with_capacity(50);
143    /// assert_eq!(feed.encoding, "utf-8");
144    /// ```
145    #[must_use]
146    pub fn with_capacity(entry_count: usize) -> Self {
147        Self {
148            entries: Vec::with_capacity(entry_count),
149            namespaces: HashMap::with_capacity(8), // Typical feeds have 3-8 namespaces
150            encoding: String::from("utf-8"),
151            ..Default::default()
152        }
153    }
154
155    /// Check if entry limit is reached, set bozo flag and skip element if so
156    ///
157    /// This helper consolidates the duplicate entry limit checking logic used in
158    /// RSS and Atom parsers. If the entry limit is reached, it:
159    /// - Sets `bozo` flag to true
160    /// - Sets `bozo_exception` with descriptive error message
161    /// - Skips the entry element
162    /// - Returns `Ok(false)` to signal that the entry should not be processed
163    ///
164    /// # Arguments
165    ///
166    /// * `reader` - XML reader positioned at the entry element
167    /// * `buf` - Buffer for XML event reading
168    /// * `limits` - Parser limits including `max_entries`
169    /// * `depth` - Current nesting depth (will be decremented)
170    ///
171    /// # Returns
172    ///
173    /// * `Ok(true)` - Entry can be processed (limit not reached)
174    /// * `Ok(false)` - Entry limit reached, element was skipped
175    ///
176    /// # Errors
177    ///
178    /// Returns an error if:
179    /// - Skipping the entry element fails (e.g., malformed XML)
180    /// - Nesting depth exceeds limits while skipping
181    ///
182    /// # Examples
183    ///
184    /// ```ignore
185    /// // In parser:
186    /// if !feed.check_entry_limit(reader, &mut buf, limits, depth)? {
187    ///     continue;
188    /// }
189    /// // Process entry...
190    /// ```
191    #[inline]
192    pub fn check_entry_limit(
193        &mut self,
194        reader: &mut Reader<&[u8]>,
195        buf: &mut Vec<u8>,
196        limits: &ParserLimits,
197        depth: &mut usize,
198    ) -> Result<bool> {
199        use crate::parser::skip_element;
200
201        if self.entries.is_at_limit(limits.max_entries) {
202            self.bozo = true;
203            self.bozo_exception = Some(format!("Entry limit exceeded: {}", limits.max_entries));
204            skip_element(reader, buf, limits, *depth)?;
205            *depth = depth.saturating_sub(1);
206            Ok(false)
207        } else {
208            Ok(true)
209        }
210    }
211}
212
213impl FeedMeta {
214    /// Creates `FeedMeta` with capacity hints for typical RSS 2.0 feeds
215    ///
216    /// Pre-allocates collections based on common RSS 2.0 field usage:
217    /// - 1-2 links (channel link, self link)
218    /// - 1 author (managingEditor)
219    /// - 0-3 tags (categories)
220    ///
221    /// # Examples
222    ///
223    /// ```
224    /// use feedparser_rs::FeedMeta;
225    ///
226    /// let meta = FeedMeta::with_rss_capacity();
227    /// ```
228    #[must_use]
229    pub fn with_rss_capacity() -> Self {
230        Self {
231            links: Vec::with_capacity(2),
232            authors: Vec::with_capacity(1),
233            contributors: Vec::with_capacity(0),
234            tags: Vec::with_capacity(3),
235            ..Default::default()
236        }
237    }
238
239    /// Creates `FeedMeta` with capacity hints for typical Atom 1.0 feeds
240    ///
241    /// Pre-allocates collections based on common Atom 1.0 field usage:
242    /// - 3-5 links (alternate, self, related, etc.)
243    /// - 1-2 authors
244    /// - 1 contributor
245    /// - 3-5 tags (categories)
246    ///
247    /// # Examples
248    ///
249    /// ```
250    /// use feedparser_rs::FeedMeta;
251    ///
252    /// let meta = FeedMeta::with_atom_capacity();
253    /// ```
254    #[must_use]
255    pub fn with_atom_capacity() -> Self {
256        Self {
257            links: Vec::with_capacity(4),
258            authors: Vec::with_capacity(2),
259            contributors: Vec::with_capacity(1),
260            tags: Vec::with_capacity(5),
261            ..Default::default()
262        }
263    }
264
265    /// Sets title field with `TextConstruct`, storing both simple and detailed versions
266    ///
267    /// # Examples
268    ///
269    /// ```
270    /// use feedparser_rs::{FeedMeta, TextConstruct};
271    ///
272    /// let mut meta = FeedMeta::default();
273    /// meta.set_title(TextConstruct::text("Example Feed"));
274    /// assert_eq!(meta.title.as_deref(), Some("Example Feed"));
275    /// ```
276    #[inline]
277    pub fn set_title(&mut self, mut text: TextConstruct) {
278        self.title = Some(std::mem::take(&mut text.value));
279        self.title_detail = Some(text);
280    }
281
282    /// Sets subtitle field with `TextConstruct`, storing both simple and detailed versions
283    ///
284    /// # Examples
285    ///
286    /// ```
287    /// use feedparser_rs::{FeedMeta, TextConstruct};
288    ///
289    /// let mut meta = FeedMeta::default();
290    /// meta.set_subtitle(TextConstruct::text("A great feed"));
291    /// assert_eq!(meta.subtitle.as_deref(), Some("A great feed"));
292    /// ```
293    #[inline]
294    pub fn set_subtitle(&mut self, mut text: TextConstruct) {
295        self.subtitle = Some(std::mem::take(&mut text.value));
296        self.subtitle_detail = Some(text);
297    }
298
299    /// Sets rights field with `TextConstruct`, storing both simple and detailed versions
300    ///
301    /// # Examples
302    ///
303    /// ```
304    /// use feedparser_rs::{FeedMeta, TextConstruct};
305    ///
306    /// let mut meta = FeedMeta::default();
307    /// meta.set_rights(TextConstruct::text("© 2025 Example"));
308    /// assert_eq!(meta.rights.as_deref(), Some("© 2025 Example"));
309    /// ```
310    #[inline]
311    pub fn set_rights(&mut self, mut text: TextConstruct) {
312        self.rights = Some(std::mem::take(&mut text.value));
313        self.rights_detail = Some(text);
314    }
315
316    /// Sets generator field with `Generator`, storing both simple and detailed versions
317    ///
318    /// # Examples
319    ///
320    /// ```
321    /// use feedparser_rs::{FeedMeta, Generator};
322    ///
323    /// # fn main() {
324    /// let mut meta = FeedMeta::default();
325    /// let generator = Generator {
326    ///     value: "Example Generator".to_string(),
327    ///     uri: None,
328    ///     version: None,
329    /// };
330    /// meta.set_generator(generator);
331    /// assert_eq!(meta.generator.as_deref(), Some("Example Generator"));
332    /// # }
333    /// ```
334    #[inline]
335    pub fn set_generator(&mut self, mut generator: Generator) {
336        self.generator = Some(std::mem::take(&mut generator.value));
337        self.generator_detail = Some(generator);
338    }
339
340    /// Sets author field with `Person`, storing both simple and detailed versions
341    ///
342    /// # Examples
343    ///
344    /// ```
345    /// use feedparser_rs::{FeedMeta, Person};
346    ///
347    /// let mut meta = FeedMeta::default();
348    /// meta.set_author(Person::from_name("John Doe"));
349    /// assert_eq!(meta.author.as_deref(), Some("John Doe"));
350    /// ```
351    #[inline]
352    pub fn set_author(&mut self, mut person: Person) {
353        self.author = person.name.take();
354        self.author_detail = Some(person);
355    }
356
357    /// Sets publisher field with `Person`, storing both simple and detailed versions
358    ///
359    /// # Examples
360    ///
361    /// ```
362    /// use feedparser_rs::{FeedMeta, Person};
363    ///
364    /// let mut meta = FeedMeta::default();
365    /// meta.set_publisher(Person::from_name("ACME Corp"));
366    /// assert_eq!(meta.publisher.as_deref(), Some("ACME Corp"));
367    /// ```
368    #[inline]
369    pub fn set_publisher(&mut self, mut person: Person) {
370        self.publisher = person.name.take();
371        self.publisher_detail = Some(person);
372    }
373
374    /// Sets the primary link and adds it to the links collection
375    ///
376    /// This is a convenience method that:
377    /// 1. Sets the `link` field (if not already set)
378    /// 2. Adds an "alternate" link to the `links` collection
379    ///
380    /// # Examples
381    ///
382    /// ```
383    /// use feedparser_rs::FeedMeta;
384    ///
385    /// let mut meta = FeedMeta::default();
386    /// meta.set_alternate_link("https://example.com".to_string(), 10);
387    /// assert_eq!(meta.link.as_deref(), Some("https://example.com"));
388    /// assert_eq!(meta.links.len(), 1);
389    /// assert_eq!(meta.links[0].rel.as_deref(), Some("alternate"));
390    /// ```
391    #[inline]
392    pub fn set_alternate_link(&mut self, href: String, max_links: usize) {
393        if self.link.is_none() {
394            self.link = Some(href.clone());
395        }
396        self.links.try_push_limited(
397            Link {
398                href: href.into(),
399                rel: Some("alternate".into()),
400                ..Default::default()
401            },
402            max_links,
403        );
404    }
405}
406
407#[cfg(test)]
408mod tests {
409    use super::*;
410
411    #[test]
412    fn test_feed_meta_default() {
413        let meta = FeedMeta::default();
414        assert!(meta.title.is_none());
415        assert!(meta.links.is_empty());
416        assert!(meta.authors.is_empty());
417    }
418
419    #[test]
420    fn test_parsed_feed_default() {
421        let feed = ParsedFeed::default();
422        assert!(!feed.bozo);
423        assert!(feed.bozo_exception.is_none());
424        assert_eq!(feed.version, FeedVersion::Unknown);
425        assert!(feed.entries.is_empty());
426    }
427
428    #[test]
429    fn test_parsed_feed_new() {
430        let feed = ParsedFeed::new();
431        assert_eq!(feed.encoding, "utf-8");
432        assert!(!feed.bozo);
433    }
434
435    #[test]
436    fn test_parsed_feed_clone() {
437        let feed = ParsedFeed {
438            version: FeedVersion::Rss20,
439            bozo: true,
440            ..ParsedFeed::new()
441        };
442
443        assert_eq!(feed.version, FeedVersion::Rss20);
444        assert!(feed.bozo);
445    }
446}