feedparser_rs/types/feed.rs
1use super::{
2 common::{Generator, Image, Link, Person, Tag, TextConstruct},
3 entry::Entry,
4 generics::LimitedCollectionExt,
5 podcast::{ItunesFeedMeta, PodcastMeta},
6 version::FeedVersion,
7};
8use crate::namespace::syndication::SyndicationMeta;
9use crate::{ParserLimits, error::Result};
10use chrono::{DateTime, Utc};
11use quick_xml::Reader;
12use std::collections::HashMap;
13
14/// Feed metadata
15#[derive(Debug, Clone, Default)]
16pub struct FeedMeta {
17 /// Feed title
18 pub title: Option<String>,
19 /// Detailed title with metadata
20 pub title_detail: Option<TextConstruct>,
21 /// Primary feed link
22 pub link: Option<String>,
23 /// All links associated with this feed
24 pub links: Vec<Link>,
25 /// Feed subtitle/description
26 pub subtitle: Option<String>,
27 /// Detailed subtitle with metadata
28 pub subtitle_detail: Option<TextConstruct>,
29 /// Last update date
30 pub updated: Option<DateTime<Utc>>,
31 /// Initial publication date (RSS pubDate, Atom published)
32 pub published: Option<DateTime<Utc>>,
33 /// Primary author name (stored inline for names ≤24 bytes)
34 pub author: Option<super::common::SmallString>,
35 /// Detailed author information
36 pub author_detail: Option<Person>,
37 /// All authors
38 pub authors: Vec<Person>,
39 /// Contributors
40 pub contributors: Vec<Person>,
41 /// Publisher name (stored inline for names ≤24 bytes)
42 pub publisher: Option<super::common::SmallString>,
43 /// Detailed publisher information
44 pub publisher_detail: Option<Person>,
45 /// Feed language (e.g., "en-us") - stored inline as lang codes are ≤24 bytes
46 pub language: Option<super::common::SmallString>,
47 /// Copyright/rights statement
48 pub rights: Option<String>,
49 /// Detailed rights with metadata
50 pub rights_detail: Option<TextConstruct>,
51 /// Generator name
52 pub generator: Option<String>,
53 /// Detailed generator information
54 pub generator_detail: Option<Generator>,
55 /// Feed image
56 pub image: Option<Image>,
57 /// Icon URL (small image)
58 pub icon: Option<String>,
59 /// Logo URL (larger image)
60 pub logo: Option<String>,
61 /// Feed-level tags/categories
62 pub tags: Vec<Tag>,
63 /// Unique feed identifier
64 pub id: Option<String>,
65 /// Time-to-live (update frequency hint) in minutes
66 pub ttl: Option<u32>,
67 /// iTunes podcast metadata (if present)
68 pub itunes: Option<Box<ItunesFeedMeta>>,
69 /// Podcast 2.0 namespace metadata (if present)
70 pub podcast: Option<Box<PodcastMeta>>,
71 /// Dublin Core creator (author fallback) - stored inline for names ≤24 bytes
72 pub dc_creator: Option<super::common::SmallString>,
73 /// Dublin Core publisher (stored inline for names ≤24 bytes)
74 pub dc_publisher: Option<super::common::SmallString>,
75 /// Dublin Core rights (copyright)
76 pub dc_rights: Option<String>,
77 /// License URL (Creative Commons, etc.)
78 pub license: Option<String>,
79 /// Syndication module metadata (RSS 1.0)
80 pub syndication: Option<Box<SyndicationMeta>>,
81 /// Geographic location from `GeoRSS` namespace (feed level)
82 pub geo: Option<Box<crate::namespace::georss::GeoLocation>>,
83}
84
85/// Parsed feed result
86///
87/// This is the main result type returned by the parser, analogous to
88/// Python feedparser's `FeedParserDict`.
89#[derive(Debug, Clone, Default)]
90pub struct ParsedFeed {
91 /// Feed metadata
92 pub feed: FeedMeta,
93 /// Feed entries/items
94 pub entries: Vec<Entry>,
95 /// True if parsing encountered errors
96 pub bozo: bool,
97 /// Description of parsing error (if bozo is true)
98 pub bozo_exception: Option<String>,
99 /// Detected or declared encoding
100 pub encoding: String,
101 /// Detected feed format version
102 pub version: FeedVersion,
103 /// XML namespaces (prefix -> URI)
104 pub namespaces: HashMap<String, String>,
105 /// HTTP status code (if fetched from URL)
106 pub status: Option<u16>,
107 /// Final URL after redirects (if fetched from URL)
108 pub href: Option<String>,
109 /// `ETag` header from HTTP response
110 pub etag: Option<String>,
111 /// Last-Modified header from HTTP response
112 pub modified: Option<String>,
113 /// HTTP response headers (if fetched from URL)
114 #[cfg(feature = "http")]
115 pub headers: Option<HashMap<String, String>>,
116}
117
118impl ParsedFeed {
119 /// Creates a new `ParsedFeed` with default UTF-8 encoding
120 #[must_use]
121 pub fn new() -> Self {
122 Self {
123 encoding: String::from("utf-8"),
124 ..Default::default()
125 }
126 }
127
128 /// Creates a `ParsedFeed` with pre-allocated capacity for entries
129 ///
130 /// This method pre-allocates space for the expected number of entries,
131 /// reducing memory allocations during parsing.
132 ///
133 /// # Arguments
134 ///
135 /// * `entry_count` - Expected number of entries in the feed
136 ///
137 /// # Examples
138 ///
139 /// ```
140 /// use feedparser_rs::ParsedFeed;
141 ///
142 /// let feed = ParsedFeed::with_capacity(50);
143 /// assert_eq!(feed.encoding, "utf-8");
144 /// ```
145 #[must_use]
146 pub fn with_capacity(entry_count: usize) -> Self {
147 Self {
148 entries: Vec::with_capacity(entry_count),
149 namespaces: HashMap::with_capacity(8), // Typical feeds have 3-8 namespaces
150 encoding: String::from("utf-8"),
151 ..Default::default()
152 }
153 }
154
155 /// Check if entry limit is reached, set bozo flag and skip element if so
156 ///
157 /// This helper consolidates the duplicate entry limit checking logic used in
158 /// RSS and Atom parsers. If the entry limit is reached, it:
159 /// - Sets `bozo` flag to true
160 /// - Sets `bozo_exception` with descriptive error message
161 /// - Skips the entry element
162 /// - Returns `Ok(false)` to signal that the entry should not be processed
163 ///
164 /// # Arguments
165 ///
166 /// * `reader` - XML reader positioned at the entry element
167 /// * `buf` - Buffer for XML event reading
168 /// * `limits` - Parser limits including `max_entries`
169 /// * `depth` - Current nesting depth (will be decremented)
170 ///
171 /// # Returns
172 ///
173 /// * `Ok(true)` - Entry can be processed (limit not reached)
174 /// * `Ok(false)` - Entry limit reached, element was skipped
175 ///
176 /// # Errors
177 ///
178 /// Returns an error if:
179 /// - Skipping the entry element fails (e.g., malformed XML)
180 /// - Nesting depth exceeds limits while skipping
181 ///
182 /// # Examples
183 ///
184 /// ```ignore
185 /// // In parser:
186 /// if !feed.check_entry_limit(reader, &mut buf, limits, depth)? {
187 /// continue;
188 /// }
189 /// // Process entry...
190 /// ```
191 #[inline]
192 pub fn check_entry_limit(
193 &mut self,
194 reader: &mut Reader<&[u8]>,
195 buf: &mut Vec<u8>,
196 limits: &ParserLimits,
197 depth: &mut usize,
198 ) -> Result<bool> {
199 use crate::parser::skip_element;
200
201 if self.entries.is_at_limit(limits.max_entries) {
202 self.bozo = true;
203 self.bozo_exception = Some(format!("Entry limit exceeded: {}", limits.max_entries));
204 skip_element(reader, buf, limits, *depth)?;
205 *depth = depth.saturating_sub(1);
206 Ok(false)
207 } else {
208 Ok(true)
209 }
210 }
211}
212
213impl FeedMeta {
214 /// Creates `FeedMeta` with capacity hints for typical RSS 2.0 feeds
215 ///
216 /// Pre-allocates collections based on common RSS 2.0 field usage:
217 /// - 1-2 links (channel link, self link)
218 /// - 1 author (managingEditor)
219 /// - 0-3 tags (categories)
220 ///
221 /// # Examples
222 ///
223 /// ```
224 /// use feedparser_rs::FeedMeta;
225 ///
226 /// let meta = FeedMeta::with_rss_capacity();
227 /// ```
228 #[must_use]
229 pub fn with_rss_capacity() -> Self {
230 Self {
231 links: Vec::with_capacity(2),
232 authors: Vec::with_capacity(1),
233 contributors: Vec::with_capacity(0),
234 tags: Vec::with_capacity(3),
235 ..Default::default()
236 }
237 }
238
239 /// Creates `FeedMeta` with capacity hints for typical Atom 1.0 feeds
240 ///
241 /// Pre-allocates collections based on common Atom 1.0 field usage:
242 /// - 3-5 links (alternate, self, related, etc.)
243 /// - 1-2 authors
244 /// - 1 contributor
245 /// - 3-5 tags (categories)
246 ///
247 /// # Examples
248 ///
249 /// ```
250 /// use feedparser_rs::FeedMeta;
251 ///
252 /// let meta = FeedMeta::with_atom_capacity();
253 /// ```
254 #[must_use]
255 pub fn with_atom_capacity() -> Self {
256 Self {
257 links: Vec::with_capacity(4),
258 authors: Vec::with_capacity(2),
259 contributors: Vec::with_capacity(1),
260 tags: Vec::with_capacity(5),
261 ..Default::default()
262 }
263 }
264
265 /// Sets title field with `TextConstruct`, storing both simple and detailed versions
266 ///
267 /// # Examples
268 ///
269 /// ```
270 /// use feedparser_rs::{FeedMeta, TextConstruct};
271 ///
272 /// let mut meta = FeedMeta::default();
273 /// meta.set_title(TextConstruct::text("Example Feed"));
274 /// assert_eq!(meta.title.as_deref(), Some("Example Feed"));
275 /// ```
276 #[inline]
277 pub fn set_title(&mut self, mut text: TextConstruct) {
278 self.title = Some(std::mem::take(&mut text.value));
279 self.title_detail = Some(text);
280 }
281
282 /// Sets subtitle field with `TextConstruct`, storing both simple and detailed versions
283 ///
284 /// # Examples
285 ///
286 /// ```
287 /// use feedparser_rs::{FeedMeta, TextConstruct};
288 ///
289 /// let mut meta = FeedMeta::default();
290 /// meta.set_subtitle(TextConstruct::text("A great feed"));
291 /// assert_eq!(meta.subtitle.as_deref(), Some("A great feed"));
292 /// ```
293 #[inline]
294 pub fn set_subtitle(&mut self, mut text: TextConstruct) {
295 self.subtitle = Some(std::mem::take(&mut text.value));
296 self.subtitle_detail = Some(text);
297 }
298
299 /// Sets rights field with `TextConstruct`, storing both simple and detailed versions
300 ///
301 /// # Examples
302 ///
303 /// ```
304 /// use feedparser_rs::{FeedMeta, TextConstruct};
305 ///
306 /// let mut meta = FeedMeta::default();
307 /// meta.set_rights(TextConstruct::text("© 2025 Example"));
308 /// assert_eq!(meta.rights.as_deref(), Some("© 2025 Example"));
309 /// ```
310 #[inline]
311 pub fn set_rights(&mut self, mut text: TextConstruct) {
312 self.rights = Some(std::mem::take(&mut text.value));
313 self.rights_detail = Some(text);
314 }
315
316 /// Sets generator field with `Generator`, storing both simple and detailed versions
317 ///
318 /// # Examples
319 ///
320 /// ```
321 /// use feedparser_rs::{FeedMeta, Generator};
322 ///
323 /// # fn main() {
324 /// let mut meta = FeedMeta::default();
325 /// let generator = Generator {
326 /// value: "Example Generator".to_string(),
327 /// uri: None,
328 /// version: None,
329 /// };
330 /// meta.set_generator(generator);
331 /// assert_eq!(meta.generator.as_deref(), Some("Example Generator"));
332 /// # }
333 /// ```
334 #[inline]
335 pub fn set_generator(&mut self, mut generator: Generator) {
336 self.generator = Some(std::mem::take(&mut generator.value));
337 self.generator_detail = Some(generator);
338 }
339
340 /// Sets author field with `Person`, storing both simple and detailed versions
341 ///
342 /// # Examples
343 ///
344 /// ```
345 /// use feedparser_rs::{FeedMeta, Person};
346 ///
347 /// let mut meta = FeedMeta::default();
348 /// meta.set_author(Person::from_name("John Doe"));
349 /// assert_eq!(meta.author.as_deref(), Some("John Doe"));
350 /// ```
351 #[inline]
352 pub fn set_author(&mut self, mut person: Person) {
353 self.author = person.name.take();
354 self.author_detail = Some(person);
355 }
356
357 /// Sets publisher field with `Person`, storing both simple and detailed versions
358 ///
359 /// # Examples
360 ///
361 /// ```
362 /// use feedparser_rs::{FeedMeta, Person};
363 ///
364 /// let mut meta = FeedMeta::default();
365 /// meta.set_publisher(Person::from_name("ACME Corp"));
366 /// assert_eq!(meta.publisher.as_deref(), Some("ACME Corp"));
367 /// ```
368 #[inline]
369 pub fn set_publisher(&mut self, mut person: Person) {
370 self.publisher = person.name.take();
371 self.publisher_detail = Some(person);
372 }
373
374 /// Sets the primary link and adds it to the links collection
375 ///
376 /// This is a convenience method that:
377 /// 1. Sets the `link` field (if not already set)
378 /// 2. Adds an "alternate" link to the `links` collection
379 ///
380 /// # Examples
381 ///
382 /// ```
383 /// use feedparser_rs::FeedMeta;
384 ///
385 /// let mut meta = FeedMeta::default();
386 /// meta.set_alternate_link("https://example.com".to_string(), 10);
387 /// assert_eq!(meta.link.as_deref(), Some("https://example.com"));
388 /// assert_eq!(meta.links.len(), 1);
389 /// assert_eq!(meta.links[0].rel.as_deref(), Some("alternate"));
390 /// ```
391 #[inline]
392 pub fn set_alternate_link(&mut self, href: String, max_links: usize) {
393 if self.link.is_none() {
394 self.link = Some(href.clone());
395 }
396 self.links.try_push_limited(
397 Link {
398 href: href.into(),
399 rel: Some("alternate".into()),
400 ..Default::default()
401 },
402 max_links,
403 );
404 }
405}
406
407#[cfg(test)]
408mod tests {
409 use super::*;
410
411 #[test]
412 fn test_feed_meta_default() {
413 let meta = FeedMeta::default();
414 assert!(meta.title.is_none());
415 assert!(meta.links.is_empty());
416 assert!(meta.authors.is_empty());
417 }
418
419 #[test]
420 fn test_parsed_feed_default() {
421 let feed = ParsedFeed::default();
422 assert!(!feed.bozo);
423 assert!(feed.bozo_exception.is_none());
424 assert_eq!(feed.version, FeedVersion::Unknown);
425 assert!(feed.entries.is_empty());
426 }
427
428 #[test]
429 fn test_parsed_feed_new() {
430 let feed = ParsedFeed::new();
431 assert_eq!(feed.encoding, "utf-8");
432 assert!(!feed.bozo);
433 }
434
435 #[test]
436 fn test_parsed_feed_clone() {
437 let feed = ParsedFeed {
438 version: FeedVersion::Rss20,
439 bozo: true,
440 ..ParsedFeed::new()
441 };
442
443 assert_eq!(feed.version, FeedVersion::Rss20);
444 assert!(feed.bozo);
445 }
446}