Skip to main content

feedparser_rs/types/
feed.rs

1use super::{
2    common::{Generator, Image, Link, Person, Tag, TextConstruct},
3    entry::Entry,
4    generics::LimitedCollectionExt,
5    podcast::{ItunesFeedMeta, PodcastMeta},
6    version::FeedVersion,
7};
8use crate::namespace::syndication::SyndicationMeta;
9use crate::{ParserLimits, error::Result};
10use chrono::{DateTime, Utc};
11use quick_xml::Reader;
12use std::collections::HashMap;
13
14/// Feed metadata
15#[derive(Debug, Clone, Default)]
16pub struct FeedMeta {
17    /// Feed title
18    pub title: Option<String>,
19    /// Detailed title with metadata
20    pub title_detail: Option<TextConstruct>,
21    /// Primary feed link
22    pub link: Option<String>,
23    /// All links associated with this feed
24    pub links: Vec<Link>,
25    /// Feed subtitle/description
26    pub subtitle: Option<String>,
27    /// Detailed subtitle with metadata
28    pub subtitle_detail: Option<TextConstruct>,
29    /// Last update date
30    pub updated: Option<DateTime<Utc>>,
31    /// Original update date string as found in the feed (timezone preserved)
32    pub updated_str: Option<String>,
33    /// Initial publication date (RSS pubDate, Atom published)
34    pub published: Option<DateTime<Utc>>,
35    /// Original publication date string as found in the feed (timezone preserved)
36    pub published_str: Option<String>,
37    /// Primary author name (stored inline for names ≤24 bytes)
38    pub author: Option<super::common::SmallString>,
39    /// Detailed author information
40    pub author_detail: Option<Person>,
41    /// All authors
42    pub authors: Vec<Person>,
43    /// Contributors
44    pub contributors: Vec<Person>,
45    /// Publisher name (stored inline for names ≤24 bytes)
46    pub publisher: Option<super::common::SmallString>,
47    /// Detailed publisher information
48    pub publisher_detail: Option<Person>,
49    /// Feed language (e.g., "en-us") - stored inline as lang codes are ≤24 bytes
50    pub language: Option<super::common::SmallString>,
51    /// Copyright/rights statement
52    pub rights: Option<String>,
53    /// Detailed rights with metadata
54    pub rights_detail: Option<TextConstruct>,
55    /// Generator name
56    pub generator: Option<String>,
57    /// Detailed generator information
58    pub generator_detail: Option<Generator>,
59    /// Feed image
60    pub image: Option<Image>,
61    /// Icon URL (small image)
62    pub icon: Option<String>,
63    /// Logo URL (larger image)
64    pub logo: Option<String>,
65    /// Feed-level tags/categories
66    pub tags: Vec<Tag>,
67    /// Unique feed identifier
68    pub id: Option<String>,
69    /// Time-to-live (update frequency hint) in minutes (kept as string for API compatibility)
70    pub ttl: Option<String>,
71    /// URL of documentation for the RSS format used
72    pub docs: Option<String>,
73    /// iTunes podcast metadata (if present)
74    pub itunes: Option<Box<ItunesFeedMeta>>,
75    /// Podcast 2.0 namespace metadata (if present)
76    pub podcast: Option<Box<PodcastMeta>>,
77    /// Dublin Core creator (author fallback) - stored inline for names ≤24 bytes
78    pub dc_creator: Option<super::common::SmallString>,
79    /// Dublin Core publisher (stored inline for names ≤24 bytes)
80    pub dc_publisher: Option<super::common::SmallString>,
81    /// Dublin Core rights (copyright)
82    pub dc_rights: Option<String>,
83    /// License URL (Creative Commons, etc.)
84    pub license: Option<String>,
85    /// Syndication module metadata (RSS 1.0)
86    pub syndication: Option<Box<SyndicationMeta>>,
87    /// Geographic location from `GeoRSS` namespace (feed level, exposed as `where` per Python feedparser API)
88    pub r#where: Option<Box<crate::namespace::georss::GeoLocation>>,
89    /// Pagination URL for the next page of results (JSON Feed `next_url`, RFC 5005 `<link rel="next">`)
90    pub next_url: Option<String>,
91}
92
93/// Parsed feed result
94///
95/// This is the main result type returned by the parser, analogous to
96/// Python feedparser's `FeedParserDict`.
97#[derive(Debug, Clone, Default)]
98pub struct ParsedFeed {
99    /// Feed metadata
100    pub feed: FeedMeta,
101    /// Feed entries/items
102    pub entries: Vec<Entry>,
103    /// True if parsing encountered errors
104    pub bozo: bool,
105    /// Description of parsing error (if bozo is true)
106    pub bozo_exception: Option<String>,
107    /// Detected or declared encoding
108    pub encoding: String,
109    /// Detected feed format version
110    pub version: FeedVersion,
111    /// XML namespaces (prefix -> URI)
112    pub namespaces: HashMap<String, String>,
113    /// HTTP status code (if fetched from URL)
114    pub status: Option<u16>,
115    /// Final URL after redirects (if fetched from URL)
116    pub href: Option<String>,
117    /// `ETag` header from HTTP response
118    pub etag: Option<String>,
119    /// Last-Modified header from HTTP response
120    pub modified: Option<String>,
121    /// HTTP response headers (if fetched from URL)
122    #[cfg(feature = "http")]
123    pub headers: Option<HashMap<String, String>>,
124}
125
126impl ParsedFeed {
127    /// Creates a new `ParsedFeed` with default UTF-8 encoding
128    #[must_use]
129    pub fn new() -> Self {
130        Self {
131            encoding: String::from("utf-8"),
132            ..Default::default()
133        }
134    }
135
136    /// Creates a `ParsedFeed` with pre-allocated capacity for entries
137    ///
138    /// This method pre-allocates space for the expected number of entries,
139    /// reducing memory allocations during parsing.
140    ///
141    /// # Arguments
142    ///
143    /// * `entry_count` - Expected number of entries in the feed
144    ///
145    /// # Examples
146    ///
147    /// ```
148    /// use feedparser_rs::ParsedFeed;
149    ///
150    /// let feed = ParsedFeed::with_capacity(50);
151    /// assert_eq!(feed.encoding, "utf-8");
152    /// ```
153    #[must_use]
154    pub fn with_capacity(entry_count: usize) -> Self {
155        Self {
156            entries: Vec::with_capacity(entry_count),
157            namespaces: HashMap::with_capacity(8), // Typical feeds have 3-8 namespaces
158            encoding: String::from("utf-8"),
159            ..Default::default()
160        }
161    }
162
163    /// Check if entry limit is reached, set bozo flag and skip element if so
164    ///
165    /// This helper consolidates the duplicate entry limit checking logic used in
166    /// RSS and Atom parsers. If the entry limit is reached, it:
167    /// - Sets `bozo` flag to true
168    /// - Sets `bozo_exception` with descriptive error message
169    /// - Skips the entry element
170    /// - Returns `Ok(false)` to signal that the entry should not be processed
171    ///
172    /// # Arguments
173    ///
174    /// * `reader` - XML reader positioned at the entry element
175    /// * `buf` - Buffer for XML event reading
176    /// * `limits` - Parser limits including `max_entries`
177    /// * `depth` - Current nesting depth (will be decremented)
178    ///
179    /// # Returns
180    ///
181    /// * `Ok(true)` - Entry can be processed (limit not reached)
182    /// * `Ok(false)` - Entry limit reached, element was skipped
183    ///
184    /// # Errors
185    ///
186    /// Returns an error if:
187    /// - Skipping the entry element fails (e.g., malformed XML)
188    /// - Nesting depth exceeds limits while skipping
189    ///
190    /// # Examples
191    ///
192    /// ```ignore
193    /// // In parser:
194    /// if !feed.check_entry_limit(reader, &mut buf, limits, depth)? {
195    ///     continue;
196    /// }
197    /// // Process entry...
198    /// ```
199    #[inline]
200    pub fn check_entry_limit(
201        &mut self,
202        reader: &mut Reader<&[u8]>,
203        buf: &mut Vec<u8>,
204        limits: &ParserLimits,
205        depth: &mut usize,
206    ) -> Result<bool> {
207        use crate::parser::skip_element;
208
209        if self.entries.is_at_limit(limits.max_entries) {
210            self.bozo = true;
211            self.bozo_exception = Some(format!("Entry limit exceeded: {}", limits.max_entries));
212            skip_element(reader, buf, limits, *depth)?;
213            *depth = depth.saturating_sub(1);
214            Ok(false)
215        } else {
216            Ok(true)
217        }
218    }
219}
220
221impl FeedMeta {
222    /// Creates `FeedMeta` with capacity hints for typical RSS 2.0 feeds
223    ///
224    /// Pre-allocates collections based on common RSS 2.0 field usage:
225    /// - 1-2 links (channel link, self link)
226    /// - 1 author (managingEditor)
227    /// - 0-3 tags (categories)
228    ///
229    /// # Examples
230    ///
231    /// ```
232    /// use feedparser_rs::FeedMeta;
233    ///
234    /// let meta = FeedMeta::with_rss_capacity();
235    /// ```
236    #[must_use]
237    pub fn with_rss_capacity() -> Self {
238        Self {
239            links: Vec::with_capacity(2),
240            authors: Vec::with_capacity(1),
241            contributors: Vec::with_capacity(0),
242            tags: Vec::with_capacity(3),
243            ..Default::default()
244        }
245    }
246
247    /// Creates `FeedMeta` with capacity hints for typical Atom 1.0 feeds
248    ///
249    /// Pre-allocates collections based on common Atom 1.0 field usage:
250    /// - 3-5 links (alternate, self, related, etc.)
251    /// - 1-2 authors
252    /// - 1 contributor
253    /// - 3-5 tags (categories)
254    ///
255    /// # Examples
256    ///
257    /// ```
258    /// use feedparser_rs::FeedMeta;
259    ///
260    /// let meta = FeedMeta::with_atom_capacity();
261    /// ```
262    #[must_use]
263    pub fn with_atom_capacity() -> Self {
264        Self {
265            links: Vec::with_capacity(4),
266            authors: Vec::with_capacity(2),
267            contributors: Vec::with_capacity(1),
268            tags: Vec::with_capacity(5),
269            ..Default::default()
270        }
271    }
272
273    /// Sets title field with `TextConstruct`, storing both simple and detailed versions
274    ///
275    /// # Examples
276    ///
277    /// ```
278    /// use feedparser_rs::{FeedMeta, TextConstruct};
279    ///
280    /// let mut meta = FeedMeta::default();
281    /// meta.set_title(TextConstruct::text("Example Feed"));
282    /// assert_eq!(meta.title.as_deref(), Some("Example Feed"));
283    /// ```
284    #[inline]
285    pub fn set_title(&mut self, text: TextConstruct) {
286        self.title = Some(text.value.clone());
287        self.title_detail = Some(text);
288    }
289
290    /// Sets subtitle field with `TextConstruct`, storing both simple and detailed versions
291    ///
292    /// # Examples
293    ///
294    /// ```
295    /// use feedparser_rs::{FeedMeta, TextConstruct};
296    ///
297    /// let mut meta = FeedMeta::default();
298    /// meta.set_subtitle(TextConstruct::text("A great feed"));
299    /// assert_eq!(meta.subtitle.as_deref(), Some("A great feed"));
300    /// ```
301    #[inline]
302    pub fn set_subtitle(&mut self, text: TextConstruct) {
303        self.subtitle = Some(text.value.clone());
304        self.subtitle_detail = Some(text);
305    }
306
307    /// Sets rights field with `TextConstruct`, storing both simple and detailed versions
308    ///
309    /// # Examples
310    ///
311    /// ```
312    /// use feedparser_rs::{FeedMeta, TextConstruct};
313    ///
314    /// let mut meta = FeedMeta::default();
315    /// meta.set_rights(TextConstruct::text("© 2025 Example"));
316    /// assert_eq!(meta.rights.as_deref(), Some("© 2025 Example"));
317    /// ```
318    #[inline]
319    pub fn set_rights(&mut self, text: TextConstruct) {
320        self.rights = Some(text.value.clone());
321        self.rights_detail = Some(text);
322    }
323
324    /// Sets generator field with `Generator`, storing both simple and detailed versions
325    ///
326    /// # Examples
327    ///
328    /// ```
329    /// use feedparser_rs::{FeedMeta, Generator};
330    ///
331    /// # fn main() {
332    /// let mut meta = FeedMeta::default();
333    /// let generator = Generator {
334    ///     name: "Example Generator".to_string(),
335    ///     href: None,
336    ///     version: None,
337    /// };
338    /// meta.set_generator(generator);
339    /// assert_eq!(meta.generator.as_deref(), Some("Example Generator"));
340    /// # }
341    /// ```
342    #[inline]
343    pub fn set_generator(&mut self, generator: Generator) {
344        // Clone the name for the flat `generator` field; the detail struct keeps its own copy.
345        self.generator = Some(generator.name.clone());
346        self.generator_detail = Some(generator);
347    }
348
349    /// Sets author field with `Person`, storing both simple and detailed versions
350    ///
351    /// # Examples
352    ///
353    /// ```
354    /// use feedparser_rs::{FeedMeta, Person};
355    ///
356    /// let mut meta = FeedMeta::default();
357    /// meta.set_author(Person::from_name("John Doe"));
358    /// assert_eq!(meta.author.as_deref(), Some("John Doe"));
359    /// ```
360    #[inline]
361    pub fn set_author(&mut self, person: Person) {
362        self.author.clone_from(&person.name);
363        self.author_detail = Some(person);
364    }
365
366    /// Sets publisher field with `Person`, storing both simple and detailed versions
367    ///
368    /// # Examples
369    ///
370    /// ```
371    /// use feedparser_rs::{FeedMeta, Person};
372    ///
373    /// let mut meta = FeedMeta::default();
374    /// meta.set_publisher(Person::from_name("ACME Corp"));
375    /// assert_eq!(meta.publisher.as_deref(), Some("ACME Corp"));
376    /// ```
377    #[inline]
378    pub fn set_publisher(&mut self, person: Person) {
379        self.publisher.clone_from(&person.name);
380        self.publisher_detail = Some(person);
381    }
382
383    /// Sets the primary link and adds it to the links collection
384    ///
385    /// This is a convenience method that:
386    /// 1. Sets the `link` field (if not already set)
387    /// 2. Adds an "alternate" link to the `links` collection
388    ///
389    /// # Examples
390    ///
391    /// ```
392    /// use feedparser_rs::FeedMeta;
393    ///
394    /// let mut meta = FeedMeta::default();
395    /// meta.set_alternate_link("https://example.com".to_string(), 10);
396    /// assert_eq!(meta.link.as_deref(), Some("https://example.com"));
397    /// assert_eq!(meta.links.len(), 1);
398    /// assert_eq!(meta.links[0].rel.as_deref(), Some("alternate"));
399    /// ```
400    #[inline]
401    pub fn set_alternate_link(&mut self, href: String, max_links: usize) {
402        if self.link.is_none() {
403            self.link = Some(href.clone());
404        }
405        self.links.try_push_limited(
406            Link {
407                href: href.into(),
408                rel: Some("alternate".into()),
409                ..Default::default()
410            },
411            max_links,
412        );
413    }
414}
415
416#[cfg(test)]
417mod tests {
418    use super::*;
419
420    #[test]
421    fn test_feed_meta_default() {
422        let meta = FeedMeta::default();
423        assert!(meta.title.is_none());
424        assert!(meta.links.is_empty());
425        assert!(meta.authors.is_empty());
426    }
427
428    #[test]
429    fn test_parsed_feed_default() {
430        let feed = ParsedFeed::default();
431        assert!(!feed.bozo);
432        assert!(feed.bozo_exception.is_none());
433        assert_eq!(feed.version, FeedVersion::Unknown);
434        assert!(feed.entries.is_empty());
435    }
436
437    #[test]
438    fn test_parsed_feed_new() {
439        let feed = ParsedFeed::new();
440        assert_eq!(feed.encoding, "utf-8");
441        assert!(!feed.bozo);
442    }
443
444    #[test]
445    fn test_parsed_feed_clone() {
446        let feed = ParsedFeed {
447            version: FeedVersion::Rss20,
448            bozo: true,
449            ..ParsedFeed::new()
450        };
451
452        assert_eq!(feed.version, FeedVersion::Rss20);
453        assert!(feed.bozo);
454    }
455}