feedparser_rs/types/feed.rs
1use super::{
2 common::{Cloud, Generator, Image, Link, MediaRating, Person, Tag, TextConstruct, TextInput},
3 entry::Entry,
4 generics::LimitedCollectionExt,
5 podcast::{ItunesFeedMeta, PodcastMeta},
6 version::FeedVersion,
7};
8use crate::namespace::syndication::SyndicationMeta;
9use crate::{ParserLimits, error::Result};
10use chrono::{DateTime, Utc};
11use quick_xml::Reader;
12use std::collections::HashMap;
13
14/// Feed metadata
15#[derive(Debug, Clone, Default)]
16pub struct FeedMeta {
17 /// Feed title
18 pub title: Option<String>,
19 /// Detailed title with metadata
20 pub title_detail: Option<TextConstruct>,
21 /// Primary feed link
22 pub link: Option<String>,
23 /// All links associated with this feed
24 pub links: Vec<Link>,
25 /// Feed subtitle/description
26 pub subtitle: Option<String>,
27 /// Detailed subtitle with metadata
28 pub subtitle_detail: Option<TextConstruct>,
29 /// Feed summary (populated from itunes:summary when present)
30 pub summary: Option<String>,
31 /// Detailed summary with metadata
32 pub summary_detail: Option<TextConstruct>,
33 /// Last update date
34 pub updated: Option<DateTime<Utc>>,
35 /// Original update date string as found in the feed (timezone preserved)
36 pub updated_str: Option<String>,
37 /// Initial publication date (RSS pubDate, Atom published)
38 pub published: Option<DateTime<Utc>>,
39 /// Original publication date string as found in the feed (timezone preserved)
40 pub published_str: Option<String>,
41 /// Primary author name (stored inline for names ≤24 bytes)
42 pub author: Option<super::common::SmallString>,
43 /// Detailed author information
44 pub author_detail: Option<Person>,
45 /// All authors
46 pub authors: Vec<Person>,
47 /// Contributors
48 pub contributors: Vec<Person>,
49 /// Publisher name (stored inline for names ≤24 bytes)
50 pub publisher: Option<super::common::SmallString>,
51 /// Detailed publisher information
52 pub publisher_detail: Option<Person>,
53 /// Feed language (e.g., "en-us") - stored inline as lang codes are ≤24 bytes
54 pub language: Option<super::common::SmallString>,
55 /// Copyright/rights statement
56 pub rights: Option<String>,
57 /// Detailed rights with metadata
58 pub rights_detail: Option<TextConstruct>,
59 /// Generator name
60 pub generator: Option<String>,
61 /// Detailed generator information
62 pub generator_detail: Option<Generator>,
63 /// Feed image
64 pub image: Option<Image>,
65 /// Icon URL (small image)
66 pub icon: Option<String>,
67 /// Logo URL (larger image)
68 pub logo: Option<String>,
69 /// Feed-level tags/categories
70 pub tags: Vec<Tag>,
71 /// Unique feed identifier
72 pub id: Option<String>,
73 /// Time-to-live (update frequency hint) in minutes (kept as string for API compatibility)
74 pub ttl: Option<String>,
75 /// URL of documentation for the RSS format used
76 pub docs: Option<String>,
77 /// iTunes podcast metadata (if present)
78 pub itunes: Option<Box<ItunesFeedMeta>>,
79 /// Podcast 2.0 namespace metadata (if present)
80 pub podcast: Option<Box<PodcastMeta>>,
81 /// Dublin Core creator (author fallback) - stored inline for names ≤24 bytes
82 pub dc_creator: Option<super::common::SmallString>,
83 /// Dublin Core publisher (stored inline for names ≤24 bytes)
84 pub dc_publisher: Option<super::common::SmallString>,
85 /// Dublin Core rights (copyright)
86 pub dc_rights: Option<String>,
87 /// License URL (Creative Commons, etc.)
88 pub license: Option<String>,
89 /// Syndication module metadata (RSS 1.0)
90 pub syndication: Option<Box<SyndicationMeta>>,
91 /// Geographic location from `GeoRSS` namespace (feed level, exposed as `where` per Python feedparser API)
92 pub r#where: Option<Box<crate::namespace::georss::GeoLocation>>,
93 /// W3C Basic Geo latitude (`geo:lat`)
94 pub geo_lat: Option<String>,
95 /// W3C Basic Geo longitude (`geo:long`)
96 pub geo_long: Option<String>,
97 /// Pagination URL for the next page of results (JSON Feed `next_url`, RFC 5005 `<link rel="next">`)
98 pub next_url: Option<String>,
99 /// Media RSS thumbnails at feed/channel level
100 pub media_thumbnail: Vec<super::common::MediaThumbnail>,
101 /// Media RSS content items at feed/channel level
102 pub media_content: Vec<super::common::MediaContent>,
103 /// Media RSS rating (`media:rating`) at feed level
104 pub media_rating: Option<MediaRating>,
105 /// Media RSS keywords (`media:keywords`) at feed level, comma-separated string
106 pub media_keywords: Option<String>,
107 /// RSS 2.0 `<cloud>` element — subscription endpoint for notifications
108 pub cloud: Option<Cloud>,
109 /// RSS 2.0 `<textInput>` element — text input form associated with the channel
110 pub textinput: Option<TextInput>,
111 /// RSS 2.0 `<skipHours>` — hours of the day when the channel may be skipped (0–23)
112 pub skiphours: Vec<u32>,
113 /// RSS 2.0 `<skipDays>` — days of the week when the channel may be skipped
114 pub skipdays: Vec<String>,
115}
116
117/// Parsed feed result
118///
119/// This is the main result type returned by the parser, analogous to
120/// Python feedparser's `FeedParserDict`.
121#[derive(Debug, Clone, Default)]
122pub struct ParsedFeed {
123 /// Feed metadata
124 pub feed: FeedMeta,
125 /// Feed entries/items
126 pub entries: Vec<Entry>,
127 /// True if parsing encountered errors
128 pub bozo: bool,
129 /// Description of parsing error (if bozo is true)
130 pub bozo_exception: Option<String>,
131 /// Detected or declared encoding
132 pub encoding: String,
133 /// Detected feed format version
134 pub version: FeedVersion,
135 /// XML namespaces (prefix -> URI)
136 pub namespaces: HashMap<String, String>,
137 /// HTTP status code (if fetched from URL)
138 pub status: Option<u16>,
139 /// Final URL after redirects (if fetched from URL)
140 pub href: Option<String>,
141 /// `ETag` header from HTTP response
142 pub etag: Option<String>,
143 /// Last-Modified header from HTTP response
144 pub modified: Option<String>,
145 /// HTTP response headers (if fetched from URL)
146 #[cfg(feature = "http")]
147 pub headers: Option<HashMap<String, String>>,
148}
149
150impl ParsedFeed {
151 /// Creates a new `ParsedFeed` with default UTF-8 encoding
152 #[must_use]
153 pub fn new() -> Self {
154 Self {
155 encoding: String::from("utf-8"),
156 ..Default::default()
157 }
158 }
159
160 /// Creates a `ParsedFeed` with pre-allocated capacity for entries
161 ///
162 /// This method pre-allocates space for the expected number of entries,
163 /// reducing memory allocations during parsing.
164 ///
165 /// # Arguments
166 ///
167 /// * `entry_count` - Expected number of entries in the feed
168 ///
169 /// # Examples
170 ///
171 /// ```
172 /// use feedparser_rs::ParsedFeed;
173 ///
174 /// let feed = ParsedFeed::with_capacity(50);
175 /// assert_eq!(feed.encoding, "utf-8");
176 /// ```
177 #[must_use]
178 pub fn with_capacity(entry_count: usize) -> Self {
179 Self {
180 entries: Vec::with_capacity(entry_count),
181 namespaces: HashMap::with_capacity(8), // Typical feeds have 3-8 namespaces
182 encoding: String::from("utf-8"),
183 ..Default::default()
184 }
185 }
186
187 /// Check if entry limit is reached, set bozo flag and skip element if so
188 ///
189 /// This helper consolidates the duplicate entry limit checking logic used in
190 /// RSS and Atom parsers. If the entry limit is reached, it:
191 /// - Sets `bozo` flag to true
192 /// - Sets `bozo_exception` with descriptive error message
193 /// - Skips the entry element
194 /// - Returns `Ok(false)` to signal that the entry should not be processed
195 ///
196 /// # Arguments
197 ///
198 /// * `reader` - XML reader positioned at the entry element
199 /// * `buf` - Buffer for XML event reading
200 /// * `limits` - Parser limits including `max_entries`
201 /// * `depth` - Current nesting depth (will be decremented)
202 ///
203 /// # Returns
204 ///
205 /// * `Ok(true)` - Entry can be processed (limit not reached)
206 /// * `Ok(false)` - Entry limit reached, element was skipped
207 ///
208 /// # Errors
209 ///
210 /// Returns an error if:
211 /// - Skipping the entry element fails (e.g., malformed XML)
212 /// - Nesting depth exceeds limits while skipping
213 ///
214 /// # Examples
215 ///
216 /// ```ignore
217 /// // In parser:
218 /// if !feed.check_entry_limit(reader, &mut buf, limits, depth)? {
219 /// continue;
220 /// }
221 /// // Process entry...
222 /// ```
223 #[inline]
224 pub fn check_entry_limit(
225 &mut self,
226 reader: &mut Reader<&[u8]>,
227 buf: &mut Vec<u8>,
228 limits: &ParserLimits,
229 depth: &mut usize,
230 ) -> Result<bool> {
231 use crate::parser::skip_element;
232
233 if self.entries.is_at_limit(limits.max_entries) {
234 self.bozo = true;
235 self.bozo_exception = Some(format!("Entry limit exceeded: {}", limits.max_entries));
236 skip_element(reader, buf, limits, *depth)?;
237 *depth = depth.saturating_sub(1);
238 Ok(false)
239 } else {
240 Ok(true)
241 }
242 }
243}
244
245impl FeedMeta {
246 /// Creates `FeedMeta` with capacity hints for typical RSS 2.0 feeds
247 ///
248 /// Pre-allocates collections based on common RSS 2.0 field usage:
249 /// - 1-2 links (channel link, self link)
250 /// - 1 author (managingEditor)
251 /// - 0-3 tags (categories)
252 ///
253 /// # Examples
254 ///
255 /// ```
256 /// use feedparser_rs::FeedMeta;
257 ///
258 /// let meta = FeedMeta::with_rss_capacity();
259 /// ```
260 #[must_use]
261 pub fn with_rss_capacity() -> Self {
262 Self {
263 links: Vec::with_capacity(2),
264 authors: Vec::with_capacity(1),
265 contributors: Vec::with_capacity(0),
266 tags: Vec::with_capacity(3),
267 ..Default::default()
268 }
269 }
270
271 /// Creates `FeedMeta` with capacity hints for typical Atom 1.0 feeds
272 ///
273 /// Pre-allocates collections based on common Atom 1.0 field usage:
274 /// - 3-5 links (alternate, self, related, etc.)
275 /// - 1-2 authors
276 /// - 1 contributor
277 /// - 3-5 tags (categories)
278 ///
279 /// # Examples
280 ///
281 /// ```
282 /// use feedparser_rs::FeedMeta;
283 ///
284 /// let meta = FeedMeta::with_atom_capacity();
285 /// ```
286 #[must_use]
287 pub fn with_atom_capacity() -> Self {
288 Self {
289 links: Vec::with_capacity(4),
290 authors: Vec::with_capacity(2),
291 contributors: Vec::with_capacity(1),
292 tags: Vec::with_capacity(5),
293 ..Default::default()
294 }
295 }
296
297 /// Sets title field with `TextConstruct`, storing both simple and detailed versions
298 ///
299 /// # Examples
300 ///
301 /// ```
302 /// use feedparser_rs::{FeedMeta, TextConstruct};
303 ///
304 /// let mut meta = FeedMeta::default();
305 /// meta.set_title(TextConstruct::text("Example Feed"));
306 /// assert_eq!(meta.title.as_deref(), Some("Example Feed"));
307 /// ```
308 #[inline]
309 pub fn set_title(&mut self, text: TextConstruct) {
310 self.title = Some(text.value.clone());
311 self.title_detail = Some(text);
312 }
313
314 /// Sets subtitle field with `TextConstruct`, storing both simple and detailed versions
315 ///
316 /// # Examples
317 ///
318 /// ```
319 /// use feedparser_rs::{FeedMeta, TextConstruct};
320 ///
321 /// let mut meta = FeedMeta::default();
322 /// meta.set_subtitle(TextConstruct::text("A great feed"));
323 /// assert_eq!(meta.subtitle.as_deref(), Some("A great feed"));
324 /// ```
325 #[inline]
326 pub fn set_subtitle(&mut self, text: TextConstruct) {
327 self.subtitle = Some(text.value.clone());
328 self.subtitle_detail = Some(text);
329 }
330
331 /// Sets summary field with `TextConstruct`, storing both simple and detailed versions
332 ///
333 /// # Examples
334 ///
335 /// ```
336 /// use feedparser_rs::{FeedMeta, TextConstruct};
337 ///
338 /// let mut meta = FeedMeta::default();
339 /// meta.set_summary(TextConstruct::text("A detailed description"));
340 /// assert_eq!(meta.summary.as_deref(), Some("A detailed description"));
341 /// ```
342 #[inline]
343 pub fn set_summary(&mut self, text: TextConstruct) {
344 self.summary = Some(text.value.clone());
345 self.summary_detail = Some(text);
346 }
347
348 /// Sets rights field with `TextConstruct`, storing both simple and detailed versions
349 ///
350 /// # Examples
351 ///
352 /// ```
353 /// use feedparser_rs::{FeedMeta, TextConstruct};
354 ///
355 /// let mut meta = FeedMeta::default();
356 /// meta.set_rights(TextConstruct::text("© 2025 Example"));
357 /// assert_eq!(meta.rights.as_deref(), Some("© 2025 Example"));
358 /// ```
359 #[inline]
360 pub fn set_rights(&mut self, text: TextConstruct) {
361 self.rights = Some(text.value.clone());
362 self.rights_detail = Some(text);
363 }
364
365 /// Sets generator field with `Generator`, storing both simple and detailed versions
366 ///
367 /// # Examples
368 ///
369 /// ```
370 /// use feedparser_rs::{FeedMeta, Generator};
371 ///
372 /// # fn main() {
373 /// let mut meta = FeedMeta::default();
374 /// let generator = Generator {
375 /// name: "Example Generator".to_string(),
376 /// href: None,
377 /// version: None,
378 /// };
379 /// meta.set_generator(generator);
380 /// assert_eq!(meta.generator.as_deref(), Some("Example Generator"));
381 /// # }
382 /// ```
383 #[inline]
384 pub fn set_generator(&mut self, generator: Generator) {
385 // Clone the name for the flat `generator` field; the detail struct keeps its own copy.
386 self.generator = Some(generator.name.clone());
387 self.generator_detail = Some(generator);
388 }
389
390 /// Sets author field with `Person`, storing both simple and detailed versions
391 ///
392 /// # Examples
393 ///
394 /// ```
395 /// use feedparser_rs::{FeedMeta, Person};
396 ///
397 /// let mut meta = FeedMeta::default();
398 /// meta.set_author(Person::from_name("John Doe"));
399 /// assert_eq!(meta.author.as_deref(), Some("John Doe"));
400 /// ```
401 #[inline]
402 pub fn set_author(&mut self, person: Person) {
403 self.author = person.flat_string();
404 self.author_detail = Some(person);
405 }
406
407 /// Sets publisher field with `Person`, storing both simple and detailed versions
408 ///
409 /// # Examples
410 ///
411 /// ```
412 /// use feedparser_rs::{FeedMeta, Person};
413 ///
414 /// let mut meta = FeedMeta::default();
415 /// meta.set_publisher(Person::from_name("ACME Corp"));
416 /// assert_eq!(meta.publisher.as_deref(), Some("ACME Corp"));
417 /// ```
418 #[inline]
419 pub fn set_publisher(&mut self, person: Person) {
420 self.publisher.clone_from(&person.name);
421 self.publisher_detail = Some(person);
422 }
423
424 /// Sets the primary link and adds it to the links collection
425 ///
426 /// This is a convenience method that:
427 /// 1. Sets the `link` field (if not already set)
428 /// 2. Adds an "alternate" link to the `links` collection
429 ///
430 /// # Examples
431 ///
432 /// ```
433 /// use feedparser_rs::FeedMeta;
434 ///
435 /// let mut meta = FeedMeta::default();
436 /// meta.set_alternate_link("https://example.com".to_string(), 10);
437 /// assert_eq!(meta.link.as_deref(), Some("https://example.com"));
438 /// assert_eq!(meta.links.len(), 1);
439 /// assert_eq!(meta.links[0].rel.as_deref(), Some("alternate"));
440 /// ```
441 #[inline]
442 pub fn set_alternate_link(&mut self, href: String, max_links: usize) {
443 if self.link.is_none() {
444 self.link = Some(href.clone());
445 }
446 self.links.try_push_limited(
447 Link {
448 href: href.into(),
449 rel: Some("alternate".into()),
450 ..Default::default()
451 },
452 max_links,
453 );
454 }
455}
456
457#[cfg(test)]
458mod tests {
459 use super::*;
460
461 #[test]
462 fn test_feed_meta_default() {
463 let meta = FeedMeta::default();
464 assert!(meta.title.is_none());
465 assert!(meta.links.is_empty());
466 assert!(meta.authors.is_empty());
467 }
468
469 #[test]
470 fn test_parsed_feed_default() {
471 let feed = ParsedFeed::default();
472 assert!(!feed.bozo);
473 assert!(feed.bozo_exception.is_none());
474 assert_eq!(feed.version, FeedVersion::Unknown);
475 assert!(feed.entries.is_empty());
476 }
477
478 #[test]
479 fn test_parsed_feed_new() {
480 let feed = ParsedFeed::new();
481 assert_eq!(feed.encoding, "utf-8");
482 assert!(!feed.bozo);
483 }
484
485 #[test]
486 fn test_parsed_feed_clone() {
487 let feed = ParsedFeed {
488 version: FeedVersion::Rss20,
489 bozo: true,
490 ..ParsedFeed::new()
491 };
492
493 assert_eq!(feed.version, FeedVersion::Rss20);
494 assert!(feed.bozo);
495 }
496}