feedparser_rs/types/feed.rs
1use super::{
2 common::{Generator, Image, Link, Person, Tag, TextConstruct},
3 entry::Entry,
4 generics::LimitedCollectionExt,
5 podcast::{ItunesFeedMeta, PodcastMeta},
6 version::FeedVersion,
7};
8use crate::namespace::syndication::SyndicationMeta;
9use crate::{ParserLimits, error::Result};
10use chrono::{DateTime, Utc};
11use quick_xml::Reader;
12use std::collections::HashMap;
13
14/// Feed metadata
15#[derive(Debug, Clone, Default)]
16pub struct FeedMeta {
17 /// Feed title
18 pub title: Option<String>,
19 /// Detailed title with metadata
20 pub title_detail: Option<TextConstruct>,
21 /// Primary feed link
22 pub link: Option<String>,
23 /// All links associated with this feed
24 pub links: Vec<Link>,
25 /// Feed subtitle/description
26 pub subtitle: Option<String>,
27 /// Detailed subtitle with metadata
28 pub subtitle_detail: Option<TextConstruct>,
29 /// Last update date
30 pub updated: Option<DateTime<Utc>>,
31 /// Original update date string as found in the feed (timezone preserved)
32 pub updated_str: Option<String>,
33 /// Initial publication date (RSS pubDate, Atom published)
34 pub published: Option<DateTime<Utc>>,
35 /// Original publication date string as found in the feed (timezone preserved)
36 pub published_str: Option<String>,
37 /// Primary author name (stored inline for names ≤24 bytes)
38 pub author: Option<super::common::SmallString>,
39 /// Detailed author information
40 pub author_detail: Option<Person>,
41 /// All authors
42 pub authors: Vec<Person>,
43 /// Contributors
44 pub contributors: Vec<Person>,
45 /// Publisher name (stored inline for names ≤24 bytes)
46 pub publisher: Option<super::common::SmallString>,
47 /// Detailed publisher information
48 pub publisher_detail: Option<Person>,
49 /// Feed language (e.g., "en-us") - stored inline as lang codes are ≤24 bytes
50 pub language: Option<super::common::SmallString>,
51 /// Copyright/rights statement
52 pub rights: Option<String>,
53 /// Detailed rights with metadata
54 pub rights_detail: Option<TextConstruct>,
55 /// Generator name
56 pub generator: Option<String>,
57 /// Detailed generator information
58 pub generator_detail: Option<Generator>,
59 /// Feed image
60 pub image: Option<Image>,
61 /// Icon URL (small image)
62 pub icon: Option<String>,
63 /// Logo URL (larger image)
64 pub logo: Option<String>,
65 /// Feed-level tags/categories
66 pub tags: Vec<Tag>,
67 /// Unique feed identifier
68 pub id: Option<String>,
69 /// Time-to-live (update frequency hint) in minutes (kept as string for API compatibility)
70 pub ttl: Option<String>,
71 /// URL of documentation for the RSS format used
72 pub docs: Option<String>,
73 /// iTunes podcast metadata (if present)
74 pub itunes: Option<Box<ItunesFeedMeta>>,
75 /// Podcast 2.0 namespace metadata (if present)
76 pub podcast: Option<Box<PodcastMeta>>,
77 /// Dublin Core creator (author fallback) - stored inline for names ≤24 bytes
78 pub dc_creator: Option<super::common::SmallString>,
79 /// Dublin Core publisher (stored inline for names ≤24 bytes)
80 pub dc_publisher: Option<super::common::SmallString>,
81 /// Dublin Core rights (copyright)
82 pub dc_rights: Option<String>,
83 /// License URL (Creative Commons, etc.)
84 pub license: Option<String>,
85 /// Syndication module metadata (RSS 1.0)
86 pub syndication: Option<Box<SyndicationMeta>>,
87 /// Geographic location from `GeoRSS` namespace (feed level, exposed as `where` per Python feedparser API)
88 pub r#where: Option<Box<crate::namespace::georss::GeoLocation>>,
89 /// Pagination URL for the next page of results (JSON Feed `next_url`, RFC 5005 `<link rel="next">`)
90 pub next_url: Option<String>,
91}
92
93/// Parsed feed result
94///
95/// This is the main result type returned by the parser, analogous to
96/// Python feedparser's `FeedParserDict`.
97#[derive(Debug, Clone, Default)]
98pub struct ParsedFeed {
99 /// Feed metadata
100 pub feed: FeedMeta,
101 /// Feed entries/items
102 pub entries: Vec<Entry>,
103 /// True if parsing encountered errors
104 pub bozo: bool,
105 /// Description of parsing error (if bozo is true)
106 pub bozo_exception: Option<String>,
107 /// Detected or declared encoding
108 pub encoding: String,
109 /// Detected feed format version
110 pub version: FeedVersion,
111 /// XML namespaces (prefix -> URI)
112 pub namespaces: HashMap<String, String>,
113 /// HTTP status code (if fetched from URL)
114 pub status: Option<u16>,
115 /// Final URL after redirects (if fetched from URL)
116 pub href: Option<String>,
117 /// `ETag` header from HTTP response
118 pub etag: Option<String>,
119 /// Last-Modified header from HTTP response
120 pub modified: Option<String>,
121 /// HTTP response headers (if fetched from URL)
122 #[cfg(feature = "http")]
123 pub headers: Option<HashMap<String, String>>,
124}
125
126impl ParsedFeed {
127 /// Creates a new `ParsedFeed` with default UTF-8 encoding
128 #[must_use]
129 pub fn new() -> Self {
130 Self {
131 encoding: String::from("utf-8"),
132 ..Default::default()
133 }
134 }
135
136 /// Creates a `ParsedFeed` with pre-allocated capacity for entries
137 ///
138 /// This method pre-allocates space for the expected number of entries,
139 /// reducing memory allocations during parsing.
140 ///
141 /// # Arguments
142 ///
143 /// * `entry_count` - Expected number of entries in the feed
144 ///
145 /// # Examples
146 ///
147 /// ```
148 /// use feedparser_rs::ParsedFeed;
149 ///
150 /// let feed = ParsedFeed::with_capacity(50);
151 /// assert_eq!(feed.encoding, "utf-8");
152 /// ```
153 #[must_use]
154 pub fn with_capacity(entry_count: usize) -> Self {
155 Self {
156 entries: Vec::with_capacity(entry_count),
157 namespaces: HashMap::with_capacity(8), // Typical feeds have 3-8 namespaces
158 encoding: String::from("utf-8"),
159 ..Default::default()
160 }
161 }
162
163 /// Check if entry limit is reached, set bozo flag and skip element if so
164 ///
165 /// This helper consolidates the duplicate entry limit checking logic used in
166 /// RSS and Atom parsers. If the entry limit is reached, it:
167 /// - Sets `bozo` flag to true
168 /// - Sets `bozo_exception` with descriptive error message
169 /// - Skips the entry element
170 /// - Returns `Ok(false)` to signal that the entry should not be processed
171 ///
172 /// # Arguments
173 ///
174 /// * `reader` - XML reader positioned at the entry element
175 /// * `buf` - Buffer for XML event reading
176 /// * `limits` - Parser limits including `max_entries`
177 /// * `depth` - Current nesting depth (will be decremented)
178 ///
179 /// # Returns
180 ///
181 /// * `Ok(true)` - Entry can be processed (limit not reached)
182 /// * `Ok(false)` - Entry limit reached, element was skipped
183 ///
184 /// # Errors
185 ///
186 /// Returns an error if:
187 /// - Skipping the entry element fails (e.g., malformed XML)
188 /// - Nesting depth exceeds limits while skipping
189 ///
190 /// # Examples
191 ///
192 /// ```ignore
193 /// // In parser:
194 /// if !feed.check_entry_limit(reader, &mut buf, limits, depth)? {
195 /// continue;
196 /// }
197 /// // Process entry...
198 /// ```
199 #[inline]
200 pub fn check_entry_limit(
201 &mut self,
202 reader: &mut Reader<&[u8]>,
203 buf: &mut Vec<u8>,
204 limits: &ParserLimits,
205 depth: &mut usize,
206 ) -> Result<bool> {
207 use crate::parser::skip_element;
208
209 if self.entries.is_at_limit(limits.max_entries) {
210 self.bozo = true;
211 self.bozo_exception = Some(format!("Entry limit exceeded: {}", limits.max_entries));
212 skip_element(reader, buf, limits, *depth)?;
213 *depth = depth.saturating_sub(1);
214 Ok(false)
215 } else {
216 Ok(true)
217 }
218 }
219}
220
221impl FeedMeta {
222 /// Creates `FeedMeta` with capacity hints for typical RSS 2.0 feeds
223 ///
224 /// Pre-allocates collections based on common RSS 2.0 field usage:
225 /// - 1-2 links (channel link, self link)
226 /// - 1 author (managingEditor)
227 /// - 0-3 tags (categories)
228 ///
229 /// # Examples
230 ///
231 /// ```
232 /// use feedparser_rs::FeedMeta;
233 ///
234 /// let meta = FeedMeta::with_rss_capacity();
235 /// ```
236 #[must_use]
237 pub fn with_rss_capacity() -> Self {
238 Self {
239 links: Vec::with_capacity(2),
240 authors: Vec::with_capacity(1),
241 contributors: Vec::with_capacity(0),
242 tags: Vec::with_capacity(3),
243 ..Default::default()
244 }
245 }
246
247 /// Creates `FeedMeta` with capacity hints for typical Atom 1.0 feeds
248 ///
249 /// Pre-allocates collections based on common Atom 1.0 field usage:
250 /// - 3-5 links (alternate, self, related, etc.)
251 /// - 1-2 authors
252 /// - 1 contributor
253 /// - 3-5 tags (categories)
254 ///
255 /// # Examples
256 ///
257 /// ```
258 /// use feedparser_rs::FeedMeta;
259 ///
260 /// let meta = FeedMeta::with_atom_capacity();
261 /// ```
262 #[must_use]
263 pub fn with_atom_capacity() -> Self {
264 Self {
265 links: Vec::with_capacity(4),
266 authors: Vec::with_capacity(2),
267 contributors: Vec::with_capacity(1),
268 tags: Vec::with_capacity(5),
269 ..Default::default()
270 }
271 }
272
273 /// Sets title field with `TextConstruct`, storing both simple and detailed versions
274 ///
275 /// # Examples
276 ///
277 /// ```
278 /// use feedparser_rs::{FeedMeta, TextConstruct};
279 ///
280 /// let mut meta = FeedMeta::default();
281 /// meta.set_title(TextConstruct::text("Example Feed"));
282 /// assert_eq!(meta.title.as_deref(), Some("Example Feed"));
283 /// ```
284 #[inline]
285 pub fn set_title(&mut self, text: TextConstruct) {
286 self.title = Some(text.value.clone());
287 self.title_detail = Some(text);
288 }
289
290 /// Sets subtitle field with `TextConstruct`, storing both simple and detailed versions
291 ///
292 /// # Examples
293 ///
294 /// ```
295 /// use feedparser_rs::{FeedMeta, TextConstruct};
296 ///
297 /// let mut meta = FeedMeta::default();
298 /// meta.set_subtitle(TextConstruct::text("A great feed"));
299 /// assert_eq!(meta.subtitle.as_deref(), Some("A great feed"));
300 /// ```
301 #[inline]
302 pub fn set_subtitle(&mut self, text: TextConstruct) {
303 self.subtitle = Some(text.value.clone());
304 self.subtitle_detail = Some(text);
305 }
306
307 /// Sets rights field with `TextConstruct`, storing both simple and detailed versions
308 ///
309 /// # Examples
310 ///
311 /// ```
312 /// use feedparser_rs::{FeedMeta, TextConstruct};
313 ///
314 /// let mut meta = FeedMeta::default();
315 /// meta.set_rights(TextConstruct::text("© 2025 Example"));
316 /// assert_eq!(meta.rights.as_deref(), Some("© 2025 Example"));
317 /// ```
318 #[inline]
319 pub fn set_rights(&mut self, text: TextConstruct) {
320 self.rights = Some(text.value.clone());
321 self.rights_detail = Some(text);
322 }
323
324 /// Sets generator field with `Generator`, storing both simple and detailed versions
325 ///
326 /// # Examples
327 ///
328 /// ```
329 /// use feedparser_rs::{FeedMeta, Generator};
330 ///
331 /// # fn main() {
332 /// let mut meta = FeedMeta::default();
333 /// let generator = Generator {
334 /// name: "Example Generator".to_string(),
335 /// href: None,
336 /// version: None,
337 /// };
338 /// meta.set_generator(generator);
339 /// assert_eq!(meta.generator.as_deref(), Some("Example Generator"));
340 /// # }
341 /// ```
342 #[inline]
343 pub fn set_generator(&mut self, generator: Generator) {
344 // Clone the name for the flat `generator` field; the detail struct keeps its own copy.
345 self.generator = Some(generator.name.clone());
346 self.generator_detail = Some(generator);
347 }
348
349 /// Sets author field with `Person`, storing both simple and detailed versions
350 ///
351 /// # Examples
352 ///
353 /// ```
354 /// use feedparser_rs::{FeedMeta, Person};
355 ///
356 /// let mut meta = FeedMeta::default();
357 /// meta.set_author(Person::from_name("John Doe"));
358 /// assert_eq!(meta.author.as_deref(), Some("John Doe"));
359 /// ```
360 #[inline]
361 pub fn set_author(&mut self, person: Person) {
362 self.author.clone_from(&person.name);
363 self.author_detail = Some(person);
364 }
365
366 /// Sets publisher field with `Person`, storing both simple and detailed versions
367 ///
368 /// # Examples
369 ///
370 /// ```
371 /// use feedparser_rs::{FeedMeta, Person};
372 ///
373 /// let mut meta = FeedMeta::default();
374 /// meta.set_publisher(Person::from_name("ACME Corp"));
375 /// assert_eq!(meta.publisher.as_deref(), Some("ACME Corp"));
376 /// ```
377 #[inline]
378 pub fn set_publisher(&mut self, person: Person) {
379 self.publisher.clone_from(&person.name);
380 self.publisher_detail = Some(person);
381 }
382
383 /// Sets the primary link and adds it to the links collection
384 ///
385 /// This is a convenience method that:
386 /// 1. Sets the `link` field (if not already set)
387 /// 2. Adds an "alternate" link to the `links` collection
388 ///
389 /// # Examples
390 ///
391 /// ```
392 /// use feedparser_rs::FeedMeta;
393 ///
394 /// let mut meta = FeedMeta::default();
395 /// meta.set_alternate_link("https://example.com".to_string(), 10);
396 /// assert_eq!(meta.link.as_deref(), Some("https://example.com"));
397 /// assert_eq!(meta.links.len(), 1);
398 /// assert_eq!(meta.links[0].rel.as_deref(), Some("alternate"));
399 /// ```
400 #[inline]
401 pub fn set_alternate_link(&mut self, href: String, max_links: usize) {
402 if self.link.is_none() {
403 self.link = Some(href.clone());
404 }
405 self.links.try_push_limited(
406 Link {
407 href: href.into(),
408 rel: Some("alternate".into()),
409 ..Default::default()
410 },
411 max_links,
412 );
413 }
414}
415
416#[cfg(test)]
417mod tests {
418 use super::*;
419
420 #[test]
421 fn test_feed_meta_default() {
422 let meta = FeedMeta::default();
423 assert!(meta.title.is_none());
424 assert!(meta.links.is_empty());
425 assert!(meta.authors.is_empty());
426 }
427
428 #[test]
429 fn test_parsed_feed_default() {
430 let feed = ParsedFeed::default();
431 assert!(!feed.bozo);
432 assert!(feed.bozo_exception.is_none());
433 assert_eq!(feed.version, FeedVersion::Unknown);
434 assert!(feed.entries.is_empty());
435 }
436
437 #[test]
438 fn test_parsed_feed_new() {
439 let feed = ParsedFeed::new();
440 assert_eq!(feed.encoding, "utf-8");
441 assert!(!feed.bozo);
442 }
443
444 #[test]
445 fn test_parsed_feed_clone() {
446 let feed = ParsedFeed {
447 version: FeedVersion::Rss20,
448 bozo: true,
449 ..ParsedFeed::new()
450 };
451
452 assert_eq!(feed.version, FeedVersion::Rss20);
453 assert!(feed.bozo);
454 }
455}