Skip to main content

feedparser_rs/types/
feed.rs

1use super::{
2    common::{Cloud, Generator, Image, Link, MediaRating, Person, Tag, TextConstruct, TextInput},
3    entry::Entry,
4    generics::LimitedCollectionExt,
5    podcast::{ItunesFeedMeta, PodcastMeta},
6    version::FeedVersion,
7};
8use crate::namespace::syndication::SyndicationMeta;
9use crate::{ParserLimits, error::Result};
10use chrono::{DateTime, Utc};
11use quick_xml::Reader;
12use std::collections::HashMap;
13
14/// Feed metadata
15#[derive(Debug, Clone, Default)]
16pub struct FeedMeta {
17    /// Feed title
18    pub title: Option<String>,
19    /// Detailed title with metadata
20    pub title_detail: Option<TextConstruct>,
21    /// Primary feed link
22    pub link: Option<String>,
23    /// All links associated with this feed
24    pub links: Vec<Link>,
25    /// Feed subtitle/description
26    pub subtitle: Option<String>,
27    /// Detailed subtitle with metadata
28    pub subtitle_detail: Option<TextConstruct>,
29    /// Feed summary (populated from itunes:summary when present)
30    pub summary: Option<String>,
31    /// Detailed summary with metadata
32    pub summary_detail: Option<TextConstruct>,
33    /// Last update date
34    pub updated: Option<DateTime<Utc>>,
35    /// Original update date string as found in the feed (timezone preserved)
36    pub updated_str: Option<String>,
37    /// Initial publication date (RSS pubDate, Atom published)
38    pub published: Option<DateTime<Utc>>,
39    /// Original publication date string as found in the feed (timezone preserved)
40    pub published_str: Option<String>,
41    /// Primary author name (stored inline for names ≤24 bytes)
42    pub author: Option<super::common::SmallString>,
43    /// Detailed author information
44    pub author_detail: Option<Person>,
45    /// All authors
46    pub authors: Vec<Person>,
47    /// Contributors
48    pub contributors: Vec<Person>,
49    /// Publisher name (stored inline for names ≤24 bytes)
50    pub publisher: Option<super::common::SmallString>,
51    /// Detailed publisher information
52    pub publisher_detail: Option<Person>,
53    /// Feed language (e.g., "en-us") - stored inline as lang codes are ≤24 bytes
54    pub language: Option<super::common::SmallString>,
55    /// Copyright/rights statement
56    pub rights: Option<String>,
57    /// Detailed rights with metadata
58    pub rights_detail: Option<TextConstruct>,
59    /// Generator name
60    pub generator: Option<String>,
61    /// Detailed generator information
62    pub generator_detail: Option<Generator>,
63    /// Feed image
64    pub image: Option<Image>,
65    /// Icon URL (small image)
66    pub icon: Option<String>,
67    /// Logo URL (larger image)
68    pub logo: Option<String>,
69    /// Feed-level tags/categories
70    pub tags: Vec<Tag>,
71    /// Unique feed identifier
72    pub id: Option<String>,
73    /// Time-to-live (update frequency hint) in minutes (kept as string for API compatibility)
74    pub ttl: Option<String>,
75    /// URL of documentation for the RSS format used
76    pub docs: Option<String>,
77    /// iTunes podcast metadata (if present)
78    pub itunes: Option<Box<ItunesFeedMeta>>,
79    /// Podcast 2.0 namespace metadata (if present)
80    pub podcast: Option<Box<PodcastMeta>>,
81    /// Dublin Core creator (author fallback) - stored inline for names ≤24 bytes
82    pub dc_creator: Option<super::common::SmallString>,
83    /// Dublin Core publisher (stored inline for names ≤24 bytes)
84    pub dc_publisher: Option<super::common::SmallString>,
85    /// Dublin Core rights (copyright)
86    pub dc_rights: Option<String>,
87    /// License URL (Creative Commons, etc.)
88    pub license: Option<String>,
89    /// Syndication module metadata (RSS 1.0)
90    pub syndication: Option<Box<SyndicationMeta>>,
91    /// Geographic location from `GeoRSS` namespace (feed level, exposed as `where` per Python feedparser API)
92    pub r#where: Option<Box<crate::namespace::georss::GeoLocation>>,
93    /// W3C Basic Geo latitude (`geo:lat`)
94    pub geo_lat: Option<String>,
95    /// W3C Basic Geo longitude (`geo:long`)
96    pub geo_long: Option<String>,
97    /// Pagination URL for the next page of results (JSON Feed `next_url`, RFC 5005 `<link rel="next">`)
98    pub next_url: Option<String>,
99    /// Media RSS thumbnails at feed/channel level
100    pub media_thumbnail: Vec<super::common::MediaThumbnail>,
101    /// Media RSS content items at feed/channel level
102    pub media_content: Vec<super::common::MediaContent>,
103    /// Media RSS rating (`media:rating`) at feed level
104    pub media_rating: Option<MediaRating>,
105    /// Media RSS keywords (`media:keywords`) at feed level, comma-separated string
106    pub media_keywords: Option<String>,
107    /// RSS 2.0 `<cloud>` element — subscription endpoint for notifications
108    pub cloud: Option<Cloud>,
109    /// RSS 2.0 `<textInput>` element — text input form associated with the channel
110    pub textinput: Option<TextInput>,
111    /// RSS 2.0 `<skipHours>` — hours of the day when the channel may be skipped (0–23)
112    pub skiphours: Vec<u32>,
113    /// RSS 2.0 `<skipDays>` — days of the week when the channel may be skipped
114    pub skipdays: Vec<String>,
115}
116
117/// Parsed feed result
118///
119/// This is the main result type returned by the parser, analogous to
120/// Python feedparser's `FeedParserDict`.
121#[derive(Debug, Clone, Default)]
122pub struct ParsedFeed {
123    /// Feed metadata
124    pub feed: FeedMeta,
125    /// Feed entries/items
126    pub entries: Vec<Entry>,
127    /// True if parsing encountered errors
128    pub bozo: bool,
129    /// Description of parsing error (if bozo is true)
130    pub bozo_exception: Option<String>,
131    /// Detected or declared encoding
132    pub encoding: String,
133    /// Detected feed format version
134    pub version: FeedVersion,
135    /// XML namespaces (prefix -> URI)
136    pub namespaces: HashMap<String, String>,
137    /// HTTP status code (if fetched from URL)
138    pub status: Option<u16>,
139    /// Final URL after redirects (if fetched from URL)
140    pub href: Option<String>,
141    /// `ETag` header from HTTP response
142    pub etag: Option<String>,
143    /// Last-Modified header from HTTP response
144    pub modified: Option<String>,
145    /// HTTP response headers (if fetched from URL)
146    #[cfg(feature = "http")]
147    pub headers: Option<HashMap<String, String>>,
148}
149
150impl ParsedFeed {
151    /// Creates a new `ParsedFeed` with default UTF-8 encoding
152    #[must_use]
153    pub fn new() -> Self {
154        Self {
155            encoding: String::from("utf-8"),
156            ..Default::default()
157        }
158    }
159
160    /// Creates a `ParsedFeed` with pre-allocated capacity for entries
161    ///
162    /// This method pre-allocates space for the expected number of entries,
163    /// reducing memory allocations during parsing.
164    ///
165    /// # Arguments
166    ///
167    /// * `entry_count` - Expected number of entries in the feed
168    ///
169    /// # Examples
170    ///
171    /// ```
172    /// use feedparser_rs::ParsedFeed;
173    ///
174    /// let feed = ParsedFeed::with_capacity(50);
175    /// assert_eq!(feed.encoding, "utf-8");
176    /// ```
177    #[must_use]
178    pub fn with_capacity(entry_count: usize) -> Self {
179        Self {
180            entries: Vec::with_capacity(entry_count),
181            namespaces: HashMap::with_capacity(8), // Typical feeds have 3-8 namespaces
182            encoding: String::from("utf-8"),
183            ..Default::default()
184        }
185    }
186
187    /// Check if entry limit is reached, set bozo flag and skip element if so
188    ///
189    /// This helper consolidates the duplicate entry limit checking logic used in
190    /// RSS and Atom parsers. If the entry limit is reached, it:
191    /// - Sets `bozo` flag to true
192    /// - Sets `bozo_exception` with descriptive error message
193    /// - Skips the entry element
194    /// - Returns `Ok(false)` to signal that the entry should not be processed
195    ///
196    /// # Arguments
197    ///
198    /// * `reader` - XML reader positioned at the entry element
199    /// * `buf` - Buffer for XML event reading
200    /// * `limits` - Parser limits including `max_entries`
201    /// * `depth` - Current nesting depth (will be decremented)
202    ///
203    /// # Returns
204    ///
205    /// * `Ok(true)` - Entry can be processed (limit not reached)
206    /// * `Ok(false)` - Entry limit reached, element was skipped
207    ///
208    /// # Errors
209    ///
210    /// Returns an error if:
211    /// - Skipping the entry element fails (e.g., malformed XML)
212    /// - Nesting depth exceeds limits while skipping
213    ///
214    /// # Examples
215    ///
216    /// ```ignore
217    /// // In parser:
218    /// if !feed.check_entry_limit(reader, &mut buf, limits, depth)? {
219    ///     continue;
220    /// }
221    /// // Process entry...
222    /// ```
223    #[inline]
224    pub fn check_entry_limit(
225        &mut self,
226        reader: &mut Reader<&[u8]>,
227        buf: &mut Vec<u8>,
228        limits: &ParserLimits,
229        depth: &mut usize,
230    ) -> Result<bool> {
231        use crate::parser::skip_element;
232
233        if self.entries.is_at_limit(limits.max_entries) {
234            self.bozo = true;
235            self.bozo_exception = Some(format!("Entry limit exceeded: {}", limits.max_entries));
236            skip_element(reader, buf, limits, *depth)?;
237            *depth = depth.saturating_sub(1);
238            Ok(false)
239        } else {
240            Ok(true)
241        }
242    }
243}
244
245impl FeedMeta {
246    /// Creates `FeedMeta` with capacity hints for typical RSS 2.0 feeds
247    ///
248    /// Pre-allocates collections based on common RSS 2.0 field usage:
249    /// - 1-2 links (channel link, self link)
250    /// - 1 author (managingEditor)
251    /// - 0-3 tags (categories)
252    ///
253    /// # Examples
254    ///
255    /// ```
256    /// use feedparser_rs::FeedMeta;
257    ///
258    /// let meta = FeedMeta::with_rss_capacity();
259    /// ```
260    #[must_use]
261    pub fn with_rss_capacity() -> Self {
262        Self {
263            links: Vec::with_capacity(2),
264            authors: Vec::with_capacity(1),
265            contributors: Vec::with_capacity(0),
266            tags: Vec::with_capacity(3),
267            ..Default::default()
268        }
269    }
270
271    /// Creates `FeedMeta` with capacity hints for typical Atom 1.0 feeds
272    ///
273    /// Pre-allocates collections based on common Atom 1.0 field usage:
274    /// - 3-5 links (alternate, self, related, etc.)
275    /// - 1-2 authors
276    /// - 1 contributor
277    /// - 3-5 tags (categories)
278    ///
279    /// # Examples
280    ///
281    /// ```
282    /// use feedparser_rs::FeedMeta;
283    ///
284    /// let meta = FeedMeta::with_atom_capacity();
285    /// ```
286    #[must_use]
287    pub fn with_atom_capacity() -> Self {
288        Self {
289            links: Vec::with_capacity(4),
290            authors: Vec::with_capacity(2),
291            contributors: Vec::with_capacity(1),
292            tags: Vec::with_capacity(5),
293            ..Default::default()
294        }
295    }
296
297    /// Sets title field with `TextConstruct`, storing both simple and detailed versions
298    ///
299    /// # Examples
300    ///
301    /// ```
302    /// use feedparser_rs::{FeedMeta, TextConstruct};
303    ///
304    /// let mut meta = FeedMeta::default();
305    /// meta.set_title(TextConstruct::text("Example Feed"));
306    /// assert_eq!(meta.title.as_deref(), Some("Example Feed"));
307    /// ```
308    #[inline]
309    pub fn set_title(&mut self, text: TextConstruct) {
310        self.title = Some(text.value.clone());
311        self.title_detail = Some(text);
312    }
313
314    /// Sets subtitle field with `TextConstruct`, storing both simple and detailed versions
315    ///
316    /// # Examples
317    ///
318    /// ```
319    /// use feedparser_rs::{FeedMeta, TextConstruct};
320    ///
321    /// let mut meta = FeedMeta::default();
322    /// meta.set_subtitle(TextConstruct::text("A great feed"));
323    /// assert_eq!(meta.subtitle.as_deref(), Some("A great feed"));
324    /// ```
325    #[inline]
326    pub fn set_subtitle(&mut self, text: TextConstruct) {
327        self.subtitle = Some(text.value.clone());
328        self.subtitle_detail = Some(text);
329    }
330
331    /// Sets summary field with `TextConstruct`, storing both simple and detailed versions
332    ///
333    /// # Examples
334    ///
335    /// ```
336    /// use feedparser_rs::{FeedMeta, TextConstruct};
337    ///
338    /// let mut meta = FeedMeta::default();
339    /// meta.set_summary(TextConstruct::text("A detailed description"));
340    /// assert_eq!(meta.summary.as_deref(), Some("A detailed description"));
341    /// ```
342    #[inline]
343    pub fn set_summary(&mut self, text: TextConstruct) {
344        self.summary = Some(text.value.clone());
345        self.summary_detail = Some(text);
346    }
347
348    /// Sets rights field with `TextConstruct`, storing both simple and detailed versions
349    ///
350    /// # Examples
351    ///
352    /// ```
353    /// use feedparser_rs::{FeedMeta, TextConstruct};
354    ///
355    /// let mut meta = FeedMeta::default();
356    /// meta.set_rights(TextConstruct::text("© 2025 Example"));
357    /// assert_eq!(meta.rights.as_deref(), Some("© 2025 Example"));
358    /// ```
359    #[inline]
360    pub fn set_rights(&mut self, text: TextConstruct) {
361        self.rights = Some(text.value.clone());
362        self.rights_detail = Some(text);
363    }
364
365    /// Sets generator field with `Generator`, storing both simple and detailed versions
366    ///
367    /// # Examples
368    ///
369    /// ```
370    /// use feedparser_rs::{FeedMeta, Generator};
371    ///
372    /// # fn main() {
373    /// let mut meta = FeedMeta::default();
374    /// let generator = Generator {
375    ///     name: "Example Generator".to_string(),
376    ///     href: None,
377    ///     version: None,
378    /// };
379    /// meta.set_generator(generator);
380    /// assert_eq!(meta.generator.as_deref(), Some("Example Generator"));
381    /// # }
382    /// ```
383    #[inline]
384    pub fn set_generator(&mut self, generator: Generator) {
385        // Clone the name for the flat `generator` field; the detail struct keeps its own copy.
386        self.generator = Some(generator.name.clone());
387        self.generator_detail = Some(generator);
388    }
389
390    /// Sets author field with `Person`, storing both simple and detailed versions
391    ///
392    /// # Examples
393    ///
394    /// ```
395    /// use feedparser_rs::{FeedMeta, Person};
396    ///
397    /// let mut meta = FeedMeta::default();
398    /// meta.set_author(Person::from_name("John Doe"));
399    /// assert_eq!(meta.author.as_deref(), Some("John Doe"));
400    /// ```
401    #[inline]
402    pub fn set_author(&mut self, person: Person) {
403        self.author = person.flat_string();
404        self.author_detail = Some(person);
405    }
406
407    /// Sets publisher field with `Person`, storing both simple and detailed versions
408    ///
409    /// # Examples
410    ///
411    /// ```
412    /// use feedparser_rs::{FeedMeta, Person};
413    ///
414    /// let mut meta = FeedMeta::default();
415    /// meta.set_publisher(Person::from_name("ACME Corp"));
416    /// assert_eq!(meta.publisher.as_deref(), Some("ACME Corp"));
417    /// ```
418    #[inline]
419    pub fn set_publisher(&mut self, person: Person) {
420        self.publisher.clone_from(&person.name);
421        self.publisher_detail = Some(person);
422    }
423
424    /// Sets the primary link and adds it to the links collection
425    ///
426    /// This is a convenience method that:
427    /// 1. Sets the `link` field (if not already set)
428    /// 2. Adds an "alternate" link to the `links` collection
429    ///
430    /// # Examples
431    ///
432    /// ```
433    /// use feedparser_rs::FeedMeta;
434    ///
435    /// let mut meta = FeedMeta::default();
436    /// meta.set_alternate_link("https://example.com".to_string(), 10);
437    /// assert_eq!(meta.link.as_deref(), Some("https://example.com"));
438    /// assert_eq!(meta.links.len(), 1);
439    /// assert_eq!(meta.links[0].rel.as_deref(), Some("alternate"));
440    /// ```
441    #[inline]
442    pub fn set_alternate_link(&mut self, href: String, max_links: usize) {
443        if self.link.is_none() {
444            self.link = Some(href.clone());
445        }
446        self.links.try_push_limited(
447            Link {
448                href: href.into(),
449                rel: Some("alternate".into()),
450                ..Default::default()
451            },
452            max_links,
453        );
454    }
455}
456
457#[cfg(test)]
458mod tests {
459    use super::*;
460
461    #[test]
462    fn test_feed_meta_default() {
463        let meta = FeedMeta::default();
464        assert!(meta.title.is_none());
465        assert!(meta.links.is_empty());
466        assert!(meta.authors.is_empty());
467    }
468
469    #[test]
470    fn test_parsed_feed_default() {
471        let feed = ParsedFeed::default();
472        assert!(!feed.bozo);
473        assert!(feed.bozo_exception.is_none());
474        assert_eq!(feed.version, FeedVersion::Unknown);
475        assert!(feed.entries.is_empty());
476    }
477
478    #[test]
479    fn test_parsed_feed_new() {
480        let feed = ParsedFeed::new();
481        assert_eq!(feed.encoding, "utf-8");
482        assert!(!feed.bozo);
483    }
484
485    #[test]
486    fn test_parsed_feed_clone() {
487        let feed = ParsedFeed {
488            version: FeedVersion::Rss20,
489            bozo: true,
490            ..ParsedFeed::new()
491        };
492
493        assert_eq!(feed.version, FeedVersion::Rss20);
494        assert!(feed.bozo);
495    }
496}