Skip to main content

feedparser_rs/types/
feed.rs

1use super::{
2    common::{Generator, Image, Link, Person, Tag, TextConstruct},
3    entry::Entry,
4    generics::LimitedCollectionExt,
5    podcast::{ItunesFeedMeta, PodcastMeta},
6    version::FeedVersion,
7};
8use crate::namespace::syndication::SyndicationMeta;
9use crate::{ParserLimits, error::Result};
10use chrono::{DateTime, Utc};
11use quick_xml::Reader;
12use std::collections::HashMap;
13
14/// Feed metadata
15#[derive(Debug, Clone, Default)]
16pub struct FeedMeta {
17    /// Feed title
18    pub title: Option<String>,
19    /// Detailed title with metadata
20    pub title_detail: Option<TextConstruct>,
21    /// Primary feed link
22    pub link: Option<String>,
23    /// All links associated with this feed
24    pub links: Vec<Link>,
25    /// Feed subtitle/description
26    pub subtitle: Option<String>,
27    /// Detailed subtitle with metadata
28    pub subtitle_detail: Option<TextConstruct>,
29    /// Last update date
30    pub updated: Option<DateTime<Utc>>,
31    /// Initial publication date (RSS pubDate, Atom published)
32    pub published: Option<DateTime<Utc>>,
33    /// Primary author name (stored inline for names ≤24 bytes)
34    pub author: Option<super::common::SmallString>,
35    /// Detailed author information
36    pub author_detail: Option<Person>,
37    /// All authors
38    pub authors: Vec<Person>,
39    /// Contributors
40    pub contributors: Vec<Person>,
41    /// Publisher name (stored inline for names ≤24 bytes)
42    pub publisher: Option<super::common::SmallString>,
43    /// Detailed publisher information
44    pub publisher_detail: Option<Person>,
45    /// Feed language (e.g., "en-us") - stored inline as lang codes are ≤24 bytes
46    pub language: Option<super::common::SmallString>,
47    /// Copyright/rights statement
48    pub rights: Option<String>,
49    /// Detailed rights with metadata
50    pub rights_detail: Option<TextConstruct>,
51    /// Generator name
52    pub generator: Option<String>,
53    /// Detailed generator information
54    pub generator_detail: Option<Generator>,
55    /// Feed image
56    pub image: Option<Image>,
57    /// Icon URL (small image)
58    pub icon: Option<String>,
59    /// Logo URL (larger image)
60    pub logo: Option<String>,
61    /// Feed-level tags/categories
62    pub tags: Vec<Tag>,
63    /// Unique feed identifier
64    pub id: Option<String>,
65    /// Time-to-live (update frequency hint) in minutes
66    pub ttl: Option<u32>,
67    /// iTunes podcast metadata (if present)
68    pub itunes: Option<Box<ItunesFeedMeta>>,
69    /// Podcast 2.0 namespace metadata (if present)
70    pub podcast: Option<Box<PodcastMeta>>,
71    /// Dublin Core creator (author fallback) - stored inline for names ≤24 bytes
72    pub dc_creator: Option<super::common::SmallString>,
73    /// Dublin Core publisher (stored inline for names ≤24 bytes)
74    pub dc_publisher: Option<super::common::SmallString>,
75    /// Dublin Core rights (copyright)
76    pub dc_rights: Option<String>,
77    /// License URL (Creative Commons, etc.)
78    pub license: Option<String>,
79    /// Syndication module metadata (RSS 1.0)
80    pub syndication: Option<Box<SyndicationMeta>>,
81    /// Geographic location from `GeoRSS` namespace (feed level)
82    pub geo: Option<Box<crate::namespace::georss::GeoLocation>>,
83    /// Pagination URL for the next page of results (JSON Feed `next_url`, RFC 5005 `<link rel="next">`)
84    pub next_url: Option<String>,
85}
86
87/// Parsed feed result
88///
89/// This is the main result type returned by the parser, analogous to
90/// Python feedparser's `FeedParserDict`.
91#[derive(Debug, Clone, Default)]
92pub struct ParsedFeed {
93    /// Feed metadata
94    pub feed: FeedMeta,
95    /// Feed entries/items
96    pub entries: Vec<Entry>,
97    /// True if parsing encountered errors
98    pub bozo: bool,
99    /// Description of parsing error (if bozo is true)
100    pub bozo_exception: Option<String>,
101    /// Detected or declared encoding
102    pub encoding: String,
103    /// Detected feed format version
104    pub version: FeedVersion,
105    /// XML namespaces (prefix -> URI)
106    pub namespaces: HashMap<String, String>,
107    /// HTTP status code (if fetched from URL)
108    pub status: Option<u16>,
109    /// Final URL after redirects (if fetched from URL)
110    pub href: Option<String>,
111    /// `ETag` header from HTTP response
112    pub etag: Option<String>,
113    /// Last-Modified header from HTTP response
114    pub modified: Option<String>,
115    /// HTTP response headers (if fetched from URL)
116    #[cfg(feature = "http")]
117    pub headers: Option<HashMap<String, String>>,
118}
119
120impl ParsedFeed {
121    /// Creates a new `ParsedFeed` with default UTF-8 encoding
122    #[must_use]
123    pub fn new() -> Self {
124        Self {
125            encoding: String::from("utf-8"),
126            ..Default::default()
127        }
128    }
129
130    /// Creates a `ParsedFeed` with pre-allocated capacity for entries
131    ///
132    /// This method pre-allocates space for the expected number of entries,
133    /// reducing memory allocations during parsing.
134    ///
135    /// # Arguments
136    ///
137    /// * `entry_count` - Expected number of entries in the feed
138    ///
139    /// # Examples
140    ///
141    /// ```
142    /// use feedparser_rs::ParsedFeed;
143    ///
144    /// let feed = ParsedFeed::with_capacity(50);
145    /// assert_eq!(feed.encoding, "utf-8");
146    /// ```
147    #[must_use]
148    pub fn with_capacity(entry_count: usize) -> Self {
149        Self {
150            entries: Vec::with_capacity(entry_count),
151            namespaces: HashMap::with_capacity(8), // Typical feeds have 3-8 namespaces
152            encoding: String::from("utf-8"),
153            ..Default::default()
154        }
155    }
156
157    /// Check if entry limit is reached, set bozo flag and skip element if so
158    ///
159    /// This helper consolidates the duplicate entry limit checking logic used in
160    /// RSS and Atom parsers. If the entry limit is reached, it:
161    /// - Sets `bozo` flag to true
162    /// - Sets `bozo_exception` with descriptive error message
163    /// - Skips the entry element
164    /// - Returns `Ok(false)` to signal that the entry should not be processed
165    ///
166    /// # Arguments
167    ///
168    /// * `reader` - XML reader positioned at the entry element
169    /// * `buf` - Buffer for XML event reading
170    /// * `limits` - Parser limits including `max_entries`
171    /// * `depth` - Current nesting depth (will be decremented)
172    ///
173    /// # Returns
174    ///
175    /// * `Ok(true)` - Entry can be processed (limit not reached)
176    /// * `Ok(false)` - Entry limit reached, element was skipped
177    ///
178    /// # Errors
179    ///
180    /// Returns an error if:
181    /// - Skipping the entry element fails (e.g., malformed XML)
182    /// - Nesting depth exceeds limits while skipping
183    ///
184    /// # Examples
185    ///
186    /// ```ignore
187    /// // In parser:
188    /// if !feed.check_entry_limit(reader, &mut buf, limits, depth)? {
189    ///     continue;
190    /// }
191    /// // Process entry...
192    /// ```
193    #[inline]
194    pub fn check_entry_limit(
195        &mut self,
196        reader: &mut Reader<&[u8]>,
197        buf: &mut Vec<u8>,
198        limits: &ParserLimits,
199        depth: &mut usize,
200    ) -> Result<bool> {
201        use crate::parser::skip_element;
202
203        if self.entries.is_at_limit(limits.max_entries) {
204            self.bozo = true;
205            self.bozo_exception = Some(format!("Entry limit exceeded: {}", limits.max_entries));
206            skip_element(reader, buf, limits, *depth)?;
207            *depth = depth.saturating_sub(1);
208            Ok(false)
209        } else {
210            Ok(true)
211        }
212    }
213}
214
215impl FeedMeta {
216    /// Creates `FeedMeta` with capacity hints for typical RSS 2.0 feeds
217    ///
218    /// Pre-allocates collections based on common RSS 2.0 field usage:
219    /// - 1-2 links (channel link, self link)
220    /// - 1 author (managingEditor)
221    /// - 0-3 tags (categories)
222    ///
223    /// # Examples
224    ///
225    /// ```
226    /// use feedparser_rs::FeedMeta;
227    ///
228    /// let meta = FeedMeta::with_rss_capacity();
229    /// ```
230    #[must_use]
231    pub fn with_rss_capacity() -> Self {
232        Self {
233            links: Vec::with_capacity(2),
234            authors: Vec::with_capacity(1),
235            contributors: Vec::with_capacity(0),
236            tags: Vec::with_capacity(3),
237            ..Default::default()
238        }
239    }
240
241    /// Creates `FeedMeta` with capacity hints for typical Atom 1.0 feeds
242    ///
243    /// Pre-allocates collections based on common Atom 1.0 field usage:
244    /// - 3-5 links (alternate, self, related, etc.)
245    /// - 1-2 authors
246    /// - 1 contributor
247    /// - 3-5 tags (categories)
248    ///
249    /// # Examples
250    ///
251    /// ```
252    /// use feedparser_rs::FeedMeta;
253    ///
254    /// let meta = FeedMeta::with_atom_capacity();
255    /// ```
256    #[must_use]
257    pub fn with_atom_capacity() -> Self {
258        Self {
259            links: Vec::with_capacity(4),
260            authors: Vec::with_capacity(2),
261            contributors: Vec::with_capacity(1),
262            tags: Vec::with_capacity(5),
263            ..Default::default()
264        }
265    }
266
267    /// Sets title field with `TextConstruct`, storing both simple and detailed versions
268    ///
269    /// # Examples
270    ///
271    /// ```
272    /// use feedparser_rs::{FeedMeta, TextConstruct};
273    ///
274    /// let mut meta = FeedMeta::default();
275    /// meta.set_title(TextConstruct::text("Example Feed"));
276    /// assert_eq!(meta.title.as_deref(), Some("Example Feed"));
277    /// ```
278    #[inline]
279    pub fn set_title(&mut self, mut text: TextConstruct) {
280        self.title = Some(std::mem::take(&mut text.value));
281        self.title_detail = Some(text);
282    }
283
284    /// Sets subtitle field with `TextConstruct`, storing both simple and detailed versions
285    ///
286    /// # Examples
287    ///
288    /// ```
289    /// use feedparser_rs::{FeedMeta, TextConstruct};
290    ///
291    /// let mut meta = FeedMeta::default();
292    /// meta.set_subtitle(TextConstruct::text("A great feed"));
293    /// assert_eq!(meta.subtitle.as_deref(), Some("A great feed"));
294    /// ```
295    #[inline]
296    pub fn set_subtitle(&mut self, mut text: TextConstruct) {
297        self.subtitle = Some(std::mem::take(&mut text.value));
298        self.subtitle_detail = Some(text);
299    }
300
301    /// Sets rights field with `TextConstruct`, storing both simple and detailed versions
302    ///
303    /// # Examples
304    ///
305    /// ```
306    /// use feedparser_rs::{FeedMeta, TextConstruct};
307    ///
308    /// let mut meta = FeedMeta::default();
309    /// meta.set_rights(TextConstruct::text("© 2025 Example"));
310    /// assert_eq!(meta.rights.as_deref(), Some("© 2025 Example"));
311    /// ```
312    #[inline]
313    pub fn set_rights(&mut self, mut text: TextConstruct) {
314        self.rights = Some(std::mem::take(&mut text.value));
315        self.rights_detail = Some(text);
316    }
317
318    /// Sets generator field with `Generator`, storing both simple and detailed versions
319    ///
320    /// # Examples
321    ///
322    /// ```
323    /// use feedparser_rs::{FeedMeta, Generator};
324    ///
325    /// # fn main() {
326    /// let mut meta = FeedMeta::default();
327    /// let generator = Generator {
328    ///     value: "Example Generator".to_string(),
329    ///     uri: None,
330    ///     version: None,
331    /// };
332    /// meta.set_generator(generator);
333    /// assert_eq!(meta.generator.as_deref(), Some("Example Generator"));
334    /// # }
335    /// ```
336    #[inline]
337    pub fn set_generator(&mut self, mut generator: Generator) {
338        self.generator = Some(std::mem::take(&mut generator.value));
339        self.generator_detail = Some(generator);
340    }
341
342    /// Sets author field with `Person`, storing both simple and detailed versions
343    ///
344    /// # Examples
345    ///
346    /// ```
347    /// use feedparser_rs::{FeedMeta, Person};
348    ///
349    /// let mut meta = FeedMeta::default();
350    /// meta.set_author(Person::from_name("John Doe"));
351    /// assert_eq!(meta.author.as_deref(), Some("John Doe"));
352    /// ```
353    #[inline]
354    pub fn set_author(&mut self, mut person: Person) {
355        self.author = person.name.take();
356        self.author_detail = Some(person);
357    }
358
359    /// Sets publisher field with `Person`, storing both simple and detailed versions
360    ///
361    /// # Examples
362    ///
363    /// ```
364    /// use feedparser_rs::{FeedMeta, Person};
365    ///
366    /// let mut meta = FeedMeta::default();
367    /// meta.set_publisher(Person::from_name("ACME Corp"));
368    /// assert_eq!(meta.publisher.as_deref(), Some("ACME Corp"));
369    /// ```
370    #[inline]
371    pub fn set_publisher(&mut self, mut person: Person) {
372        self.publisher = person.name.take();
373        self.publisher_detail = Some(person);
374    }
375
376    /// Sets the primary link and adds it to the links collection
377    ///
378    /// This is a convenience method that:
379    /// 1. Sets the `link` field (if not already set)
380    /// 2. Adds an "alternate" link to the `links` collection
381    ///
382    /// # Examples
383    ///
384    /// ```
385    /// use feedparser_rs::FeedMeta;
386    ///
387    /// let mut meta = FeedMeta::default();
388    /// meta.set_alternate_link("https://example.com".to_string(), 10);
389    /// assert_eq!(meta.link.as_deref(), Some("https://example.com"));
390    /// assert_eq!(meta.links.len(), 1);
391    /// assert_eq!(meta.links[0].rel.as_deref(), Some("alternate"));
392    /// ```
393    #[inline]
394    pub fn set_alternate_link(&mut self, href: String, max_links: usize) {
395        if self.link.is_none() {
396            self.link = Some(href.clone());
397        }
398        self.links.try_push_limited(
399            Link {
400                href: href.into(),
401                rel: Some("alternate".into()),
402                ..Default::default()
403            },
404            max_links,
405        );
406    }
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412
413    #[test]
414    fn test_feed_meta_default() {
415        let meta = FeedMeta::default();
416        assert!(meta.title.is_none());
417        assert!(meta.links.is_empty());
418        assert!(meta.authors.is_empty());
419    }
420
421    #[test]
422    fn test_parsed_feed_default() {
423        let feed = ParsedFeed::default();
424        assert!(!feed.bozo);
425        assert!(feed.bozo_exception.is_none());
426        assert_eq!(feed.version, FeedVersion::Unknown);
427        assert!(feed.entries.is_empty());
428    }
429
430    #[test]
431    fn test_parsed_feed_new() {
432        let feed = ParsedFeed::new();
433        assert_eq!(feed.encoding, "utf-8");
434        assert!(!feed.bozo);
435    }
436
437    #[test]
438    fn test_parsed_feed_clone() {
439        let feed = ParsedFeed {
440            version: FeedVersion::Rss20,
441            bozo: true,
442            ..ParsedFeed::new()
443        };
444
445        assert_eq!(feed.version, FeedVersion::Rss20);
446        assert!(feed.bozo);
447    }
448}