feedparser_rs/types/feed.rs
1use super::{
2 common::{Generator, Image, Link, Person, Tag, TextConstruct},
3 entry::Entry,
4 generics::LimitedCollectionExt,
5 podcast::{ItunesFeedMeta, PodcastMeta},
6 version::FeedVersion,
7};
8use crate::{ParserLimits, error::Result};
9use chrono::{DateTime, Utc};
10use quick_xml::Reader;
11use std::collections::HashMap;
12
13/// Feed metadata
14#[derive(Debug, Clone, Default)]
15pub struct FeedMeta {
16 /// Feed title
17 pub title: Option<String>,
18 /// Detailed title with metadata
19 pub title_detail: Option<TextConstruct>,
20 /// Primary feed link
21 pub link: Option<String>,
22 /// All links associated with this feed
23 pub links: Vec<Link>,
24 /// Feed subtitle/description
25 pub subtitle: Option<String>,
26 /// Detailed subtitle with metadata
27 pub subtitle_detail: Option<TextConstruct>,
28 /// Last update date
29 pub updated: Option<DateTime<Utc>>,
30 /// Primary author name
31 pub author: Option<String>,
32 /// Detailed author information
33 pub author_detail: Option<Person>,
34 /// All authors
35 pub authors: Vec<Person>,
36 /// Contributors
37 pub contributors: Vec<Person>,
38 /// Publisher name
39 pub publisher: Option<String>,
40 /// Detailed publisher information
41 pub publisher_detail: Option<Person>,
42 /// Feed language (e.g., "en-us")
43 pub language: Option<String>,
44 /// Copyright/rights statement
45 pub rights: Option<String>,
46 /// Detailed rights with metadata
47 pub rights_detail: Option<TextConstruct>,
48 /// Generator name
49 pub generator: Option<String>,
50 /// Detailed generator information
51 pub generator_detail: Option<Generator>,
52 /// Feed image
53 pub image: Option<Image>,
54 /// Icon URL (small image)
55 pub icon: Option<String>,
56 /// Logo URL (larger image)
57 pub logo: Option<String>,
58 /// Feed-level tags/categories
59 pub tags: Vec<Tag>,
60 /// Unique feed identifier
61 pub id: Option<String>,
62 /// Time-to-live (update frequency hint) in minutes
63 pub ttl: Option<u32>,
64 /// iTunes podcast metadata (if present)
65 pub itunes: Option<ItunesFeedMeta>,
66 /// Podcast 2.0 namespace metadata (if present)
67 pub podcast: Option<PodcastMeta>,
68 /// Dublin Core creator (author fallback)
69 pub dc_creator: Option<String>,
70 /// Dublin Core publisher
71 pub dc_publisher: Option<String>,
72 /// Dublin Core rights (copyright)
73 pub dc_rights: Option<String>,
74}
75
76/// Parsed feed result
77///
78/// This is the main result type returned by the parser, analogous to
79/// Python feedparser's `FeedParserDict`.
80#[derive(Debug, Clone, Default)]
81pub struct ParsedFeed {
82 /// Feed metadata
83 pub feed: FeedMeta,
84 /// Feed entries/items
85 pub entries: Vec<Entry>,
86 /// True if parsing encountered errors
87 pub bozo: bool,
88 /// Description of parsing error (if bozo is true)
89 pub bozo_exception: Option<String>,
90 /// Detected or declared encoding
91 pub encoding: String,
92 /// Detected feed format version
93 pub version: FeedVersion,
94 /// XML namespaces (prefix -> URI)
95 pub namespaces: HashMap<String, String>,
96 /// HTTP status code (if fetched from URL)
97 pub status: Option<u16>,
98 /// Final URL after redirects (if fetched from URL)
99 pub href: Option<String>,
100 /// `ETag` header from HTTP response
101 pub etag: Option<String>,
102 /// Last-Modified header from HTTP response
103 pub modified: Option<String>,
104 /// HTTP response headers (if fetched from URL)
105 #[cfg(feature = "http")]
106 pub headers: Option<HashMap<String, String>>,
107}
108
109impl ParsedFeed {
110 /// Creates a new `ParsedFeed` with default UTF-8 encoding
111 #[must_use]
112 pub fn new() -> Self {
113 Self {
114 encoding: String::from("utf-8"),
115 ..Default::default()
116 }
117 }
118
119 /// Creates a `ParsedFeed` with pre-allocated capacity for entries
120 ///
121 /// This method pre-allocates space for the expected number of entries,
122 /// reducing memory allocations during parsing.
123 ///
124 /// # Arguments
125 ///
126 /// * `entry_count` - Expected number of entries in the feed
127 ///
128 /// # Examples
129 ///
130 /// ```
131 /// use feedparser_rs::ParsedFeed;
132 ///
133 /// let feed = ParsedFeed::with_capacity(50);
134 /// assert_eq!(feed.encoding, "utf-8");
135 /// ```
136 #[must_use]
137 pub fn with_capacity(entry_count: usize) -> Self {
138 Self {
139 entries: Vec::with_capacity(entry_count),
140 namespaces: HashMap::with_capacity(8), // Typical feeds have 3-8 namespaces
141 encoding: String::from("utf-8"),
142 ..Default::default()
143 }
144 }
145
146 /// Check if entry limit is reached, set bozo flag and skip element if so
147 ///
148 /// This helper consolidates the duplicate entry limit checking logic used in
149 /// RSS and Atom parsers. If the entry limit is reached, it:
150 /// - Sets `bozo` flag to true
151 /// - Sets `bozo_exception` with descriptive error message
152 /// - Skips the entry element
153 /// - Returns `Ok(false)` to signal that the entry should not be processed
154 ///
155 /// # Arguments
156 ///
157 /// * `reader` - XML reader positioned at the entry element
158 /// * `buf` - Buffer for XML event reading
159 /// * `limits` - Parser limits including `max_entries`
160 /// * `depth` - Current nesting depth (will be decremented)
161 ///
162 /// # Returns
163 ///
164 /// * `Ok(true)` - Entry can be processed (limit not reached)
165 /// * `Ok(false)` - Entry limit reached, element was skipped
166 ///
167 /// # Errors
168 ///
169 /// Returns an error if:
170 /// - Skipping the entry element fails (e.g., malformed XML)
171 /// - Nesting depth exceeds limits while skipping
172 ///
173 /// # Examples
174 ///
175 /// ```ignore
176 /// // In parser:
177 /// if !feed.check_entry_limit(reader, &mut buf, limits, depth)? {
178 /// continue;
179 /// }
180 /// // Process entry...
181 /// ```
182 #[inline]
183 pub fn check_entry_limit(
184 &mut self,
185 reader: &mut Reader<&[u8]>,
186 buf: &mut Vec<u8>,
187 limits: &ParserLimits,
188 depth: &mut usize,
189 ) -> Result<bool> {
190 use crate::parser::skip_element;
191
192 if self.entries.is_at_limit(limits.max_entries) {
193 self.bozo = true;
194 self.bozo_exception = Some(format!("Entry limit exceeded: {}", limits.max_entries));
195 skip_element(reader, buf, limits, *depth)?;
196 *depth = depth.saturating_sub(1);
197 Ok(false)
198 } else {
199 Ok(true)
200 }
201 }
202}
203
204impl FeedMeta {
205 /// Creates `FeedMeta` with capacity hints for typical RSS 2.0 feeds
206 ///
207 /// Pre-allocates collections based on common RSS 2.0 field usage:
208 /// - 1-2 links (channel link, self link)
209 /// - 1 author (managingEditor)
210 /// - 0-3 tags (categories)
211 ///
212 /// # Examples
213 ///
214 /// ```
215 /// use feedparser_rs::FeedMeta;
216 ///
217 /// let meta = FeedMeta::with_rss_capacity();
218 /// ```
219 #[must_use]
220 pub fn with_rss_capacity() -> Self {
221 Self {
222 links: Vec::with_capacity(2),
223 authors: Vec::with_capacity(1),
224 contributors: Vec::with_capacity(0),
225 tags: Vec::with_capacity(3),
226 ..Default::default()
227 }
228 }
229
230 /// Creates `FeedMeta` with capacity hints for typical Atom 1.0 feeds
231 ///
232 /// Pre-allocates collections based on common Atom 1.0 field usage:
233 /// - 3-5 links (alternate, self, related, etc.)
234 /// - 1-2 authors
235 /// - 1 contributor
236 /// - 3-5 tags (categories)
237 ///
238 /// # Examples
239 ///
240 /// ```
241 /// use feedparser_rs::FeedMeta;
242 ///
243 /// let meta = FeedMeta::with_atom_capacity();
244 /// ```
245 #[must_use]
246 pub fn with_atom_capacity() -> Self {
247 Self {
248 links: Vec::with_capacity(4),
249 authors: Vec::with_capacity(2),
250 contributors: Vec::with_capacity(1),
251 tags: Vec::with_capacity(5),
252 ..Default::default()
253 }
254 }
255
256 /// Sets title field with `TextConstruct`, storing both simple and detailed versions
257 ///
258 /// # Examples
259 ///
260 /// ```
261 /// use feedparser_rs::{FeedMeta, TextConstruct};
262 ///
263 /// let mut meta = FeedMeta::default();
264 /// meta.set_title(TextConstruct::text("Example Feed"));
265 /// assert_eq!(meta.title.as_deref(), Some("Example Feed"));
266 /// ```
267 #[inline]
268 pub fn set_title(&mut self, mut text: TextConstruct) {
269 self.title = Some(std::mem::take(&mut text.value));
270 self.title_detail = Some(text);
271 }
272
273 /// Sets subtitle field with `TextConstruct`, storing both simple and detailed versions
274 ///
275 /// # Examples
276 ///
277 /// ```
278 /// use feedparser_rs::{FeedMeta, TextConstruct};
279 ///
280 /// let mut meta = FeedMeta::default();
281 /// meta.set_subtitle(TextConstruct::text("A great feed"));
282 /// assert_eq!(meta.subtitle.as_deref(), Some("A great feed"));
283 /// ```
284 #[inline]
285 pub fn set_subtitle(&mut self, mut text: TextConstruct) {
286 self.subtitle = Some(std::mem::take(&mut text.value));
287 self.subtitle_detail = Some(text);
288 }
289
290 /// Sets rights field with `TextConstruct`, storing both simple and detailed versions
291 ///
292 /// # Examples
293 ///
294 /// ```
295 /// use feedparser_rs::{FeedMeta, TextConstruct};
296 ///
297 /// let mut meta = FeedMeta::default();
298 /// meta.set_rights(TextConstruct::text("© 2025 Example"));
299 /// assert_eq!(meta.rights.as_deref(), Some("© 2025 Example"));
300 /// ```
301 #[inline]
302 pub fn set_rights(&mut self, mut text: TextConstruct) {
303 self.rights = Some(std::mem::take(&mut text.value));
304 self.rights_detail = Some(text);
305 }
306
307 /// Sets generator field with `Generator`, storing both simple and detailed versions
308 ///
309 /// # Examples
310 ///
311 /// ```
312 /// use feedparser_rs::{FeedMeta, Generator};
313 ///
314 /// # fn main() {
315 /// let mut meta = FeedMeta::default();
316 /// let generator = Generator {
317 /// value: "Example Generator".to_string(),
318 /// uri: None,
319 /// version: None,
320 /// };
321 /// meta.set_generator(generator);
322 /// assert_eq!(meta.generator.as_deref(), Some("Example Generator"));
323 /// # }
324 /// ```
325 #[inline]
326 pub fn set_generator(&mut self, mut generator: Generator) {
327 self.generator = Some(std::mem::take(&mut generator.value));
328 self.generator_detail = Some(generator);
329 }
330
331 /// Sets author field with `Person`, storing both simple and detailed versions
332 ///
333 /// # Examples
334 ///
335 /// ```
336 /// use feedparser_rs::{FeedMeta, Person};
337 ///
338 /// let mut meta = FeedMeta::default();
339 /// meta.set_author(Person::from_name("John Doe"));
340 /// assert_eq!(meta.author.as_deref(), Some("John Doe"));
341 /// ```
342 #[inline]
343 pub fn set_author(&mut self, mut person: Person) {
344 self.author = person.name.take();
345 self.author_detail = Some(person);
346 }
347
348 /// Sets publisher field with `Person`, storing both simple and detailed versions
349 ///
350 /// # Examples
351 ///
352 /// ```
353 /// use feedparser_rs::{FeedMeta, Person};
354 ///
355 /// let mut meta = FeedMeta::default();
356 /// meta.set_publisher(Person::from_name("ACME Corp"));
357 /// assert_eq!(meta.publisher.as_deref(), Some("ACME Corp"));
358 /// ```
359 #[inline]
360 pub fn set_publisher(&mut self, mut person: Person) {
361 self.publisher = person.name.take();
362 self.publisher_detail = Some(person);
363 }
364
365 /// Sets the primary link and adds it to the links collection
366 ///
367 /// This is a convenience method that:
368 /// 1. Sets the `link` field (if not already set)
369 /// 2. Adds an "alternate" link to the `links` collection
370 ///
371 /// # Examples
372 ///
373 /// ```
374 /// use feedparser_rs::FeedMeta;
375 ///
376 /// let mut meta = FeedMeta::default();
377 /// meta.set_alternate_link("https://example.com".to_string(), 10);
378 /// assert_eq!(meta.link.as_deref(), Some("https://example.com"));
379 /// assert_eq!(meta.links.len(), 1);
380 /// assert_eq!(meta.links[0].rel.as_deref(), Some("alternate"));
381 /// ```
382 #[inline]
383 pub fn set_alternate_link(&mut self, href: String, max_links: usize) {
384 if self.link.is_none() {
385 self.link = Some(href.clone());
386 }
387 self.links.try_push_limited(
388 Link {
389 href,
390 rel: Some("alternate".to_string()),
391 ..Default::default()
392 },
393 max_links,
394 );
395 }
396}
397
398#[cfg(test)]
399mod tests {
400 use super::*;
401
402 #[test]
403 fn test_feed_meta_default() {
404 let meta = FeedMeta::default();
405 assert!(meta.title.is_none());
406 assert!(meta.links.is_empty());
407 assert!(meta.authors.is_empty());
408 }
409
410 #[test]
411 fn test_parsed_feed_default() {
412 let feed = ParsedFeed::default();
413 assert!(!feed.bozo);
414 assert!(feed.bozo_exception.is_none());
415 assert_eq!(feed.version, FeedVersion::Unknown);
416 assert!(feed.entries.is_empty());
417 }
418
419 #[test]
420 fn test_parsed_feed_new() {
421 let feed = ParsedFeed::new();
422 assert_eq!(feed.encoding, "utf-8");
423 assert!(!feed.bozo);
424 }
425
426 #[test]
427 fn test_parsed_feed_clone() {
428 let feed = ParsedFeed {
429 version: FeedVersion::Rss20,
430 bozo: true,
431 ..ParsedFeed::new()
432 };
433
434 assert_eq!(feed.version, FeedVersion::Rss20);
435 assert!(feed.bozo);
436 }
437}