feedparser_rs/types/feed.rs
1use super::{
2 common::{Generator, Image, Link, Person, Tag, TextConstruct},
3 entry::Entry,
4 generics::LimitedCollectionExt,
5 podcast::{ItunesFeedMeta, PodcastMeta},
6 version::FeedVersion,
7};
8use crate::namespace::syndication::SyndicationMeta;
9use crate::{ParserLimits, error::Result};
10use chrono::{DateTime, Utc};
11use quick_xml::Reader;
12use std::collections::HashMap;
13
14/// Feed metadata
15#[derive(Debug, Clone, Default)]
16pub struct FeedMeta {
17 /// Feed title
18 pub title: Option<String>,
19 /// Detailed title with metadata
20 pub title_detail: Option<TextConstruct>,
21 /// Primary feed link
22 pub link: Option<String>,
23 /// All links associated with this feed
24 pub links: Vec<Link>,
25 /// Feed subtitle/description
26 pub subtitle: Option<String>,
27 /// Detailed subtitle with metadata
28 pub subtitle_detail: Option<TextConstruct>,
29 /// Last update date
30 pub updated: Option<DateTime<Utc>>,
31 /// Initial publication date (RSS pubDate, Atom published)
32 pub published: Option<DateTime<Utc>>,
33 /// Primary author name
34 pub author: Option<String>,
35 /// Detailed author information
36 pub author_detail: Option<Person>,
37 /// All authors
38 pub authors: Vec<Person>,
39 /// Contributors
40 pub contributors: Vec<Person>,
41 /// Publisher name
42 pub publisher: Option<String>,
43 /// Detailed publisher information
44 pub publisher_detail: Option<Person>,
45 /// Feed language (e.g., "en-us")
46 pub language: Option<String>,
47 /// Copyright/rights statement
48 pub rights: Option<String>,
49 /// Detailed rights with metadata
50 pub rights_detail: Option<TextConstruct>,
51 /// Generator name
52 pub generator: Option<String>,
53 /// Detailed generator information
54 pub generator_detail: Option<Generator>,
55 /// Feed image
56 pub image: Option<Image>,
57 /// Icon URL (small image)
58 pub icon: Option<String>,
59 /// Logo URL (larger image)
60 pub logo: Option<String>,
61 /// Feed-level tags/categories
62 pub tags: Vec<Tag>,
63 /// Unique feed identifier
64 pub id: Option<String>,
65 /// Time-to-live (update frequency hint) in minutes
66 pub ttl: Option<u32>,
67 /// iTunes podcast metadata (if present)
68 pub itunes: Option<ItunesFeedMeta>,
69 /// Podcast 2.0 namespace metadata (if present)
70 pub podcast: Option<PodcastMeta>,
71 /// Dublin Core creator (author fallback)
72 pub dc_creator: Option<String>,
73 /// Dublin Core publisher
74 pub dc_publisher: Option<String>,
75 /// Dublin Core rights (copyright)
76 pub dc_rights: Option<String>,
77 /// License URL (Creative Commons, etc.)
78 pub license: Option<String>,
79 /// Syndication module metadata (RSS 1.0)
80 pub syndication: Option<SyndicationMeta>,
81}
82
83/// Parsed feed result
84///
85/// This is the main result type returned by the parser, analogous to
86/// Python feedparser's `FeedParserDict`.
87#[derive(Debug, Clone, Default)]
88pub struct ParsedFeed {
89 /// Feed metadata
90 pub feed: FeedMeta,
91 /// Feed entries/items
92 pub entries: Vec<Entry>,
93 /// True if parsing encountered errors
94 pub bozo: bool,
95 /// Description of parsing error (if bozo is true)
96 pub bozo_exception: Option<String>,
97 /// Detected or declared encoding
98 pub encoding: String,
99 /// Detected feed format version
100 pub version: FeedVersion,
101 /// XML namespaces (prefix -> URI)
102 pub namespaces: HashMap<String, String>,
103 /// HTTP status code (if fetched from URL)
104 pub status: Option<u16>,
105 /// Final URL after redirects (if fetched from URL)
106 pub href: Option<String>,
107 /// `ETag` header from HTTP response
108 pub etag: Option<String>,
109 /// Last-Modified header from HTTP response
110 pub modified: Option<String>,
111 /// HTTP response headers (if fetched from URL)
112 #[cfg(feature = "http")]
113 pub headers: Option<HashMap<String, String>>,
114}
115
116impl ParsedFeed {
117 /// Creates a new `ParsedFeed` with default UTF-8 encoding
118 #[must_use]
119 pub fn new() -> Self {
120 Self {
121 encoding: String::from("utf-8"),
122 ..Default::default()
123 }
124 }
125
126 /// Creates a `ParsedFeed` with pre-allocated capacity for entries
127 ///
128 /// This method pre-allocates space for the expected number of entries,
129 /// reducing memory allocations during parsing.
130 ///
131 /// # Arguments
132 ///
133 /// * `entry_count` - Expected number of entries in the feed
134 ///
135 /// # Examples
136 ///
137 /// ```
138 /// use feedparser_rs::ParsedFeed;
139 ///
140 /// let feed = ParsedFeed::with_capacity(50);
141 /// assert_eq!(feed.encoding, "utf-8");
142 /// ```
143 #[must_use]
144 pub fn with_capacity(entry_count: usize) -> Self {
145 Self {
146 entries: Vec::with_capacity(entry_count),
147 namespaces: HashMap::with_capacity(8), // Typical feeds have 3-8 namespaces
148 encoding: String::from("utf-8"),
149 ..Default::default()
150 }
151 }
152
153 /// Check if entry limit is reached, set bozo flag and skip element if so
154 ///
155 /// This helper consolidates the duplicate entry limit checking logic used in
156 /// RSS and Atom parsers. If the entry limit is reached, it:
157 /// - Sets `bozo` flag to true
158 /// - Sets `bozo_exception` with descriptive error message
159 /// - Skips the entry element
160 /// - Returns `Ok(false)` to signal that the entry should not be processed
161 ///
162 /// # Arguments
163 ///
164 /// * `reader` - XML reader positioned at the entry element
165 /// * `buf` - Buffer for XML event reading
166 /// * `limits` - Parser limits including `max_entries`
167 /// * `depth` - Current nesting depth (will be decremented)
168 ///
169 /// # Returns
170 ///
171 /// * `Ok(true)` - Entry can be processed (limit not reached)
172 /// * `Ok(false)` - Entry limit reached, element was skipped
173 ///
174 /// # Errors
175 ///
176 /// Returns an error if:
177 /// - Skipping the entry element fails (e.g., malformed XML)
178 /// - Nesting depth exceeds limits while skipping
179 ///
180 /// # Examples
181 ///
182 /// ```ignore
183 /// // In parser:
184 /// if !feed.check_entry_limit(reader, &mut buf, limits, depth)? {
185 /// continue;
186 /// }
187 /// // Process entry...
188 /// ```
189 #[inline]
190 pub fn check_entry_limit(
191 &mut self,
192 reader: &mut Reader<&[u8]>,
193 buf: &mut Vec<u8>,
194 limits: &ParserLimits,
195 depth: &mut usize,
196 ) -> Result<bool> {
197 use crate::parser::skip_element;
198
199 if self.entries.is_at_limit(limits.max_entries) {
200 self.bozo = true;
201 self.bozo_exception = Some(format!("Entry limit exceeded: {}", limits.max_entries));
202 skip_element(reader, buf, limits, *depth)?;
203 *depth = depth.saturating_sub(1);
204 Ok(false)
205 } else {
206 Ok(true)
207 }
208 }
209}
210
211impl FeedMeta {
212 /// Creates `FeedMeta` with capacity hints for typical RSS 2.0 feeds
213 ///
214 /// Pre-allocates collections based on common RSS 2.0 field usage:
215 /// - 1-2 links (channel link, self link)
216 /// - 1 author (managingEditor)
217 /// - 0-3 tags (categories)
218 ///
219 /// # Examples
220 ///
221 /// ```
222 /// use feedparser_rs::FeedMeta;
223 ///
224 /// let meta = FeedMeta::with_rss_capacity();
225 /// ```
226 #[must_use]
227 pub fn with_rss_capacity() -> Self {
228 Self {
229 links: Vec::with_capacity(2),
230 authors: Vec::with_capacity(1),
231 contributors: Vec::with_capacity(0),
232 tags: Vec::with_capacity(3),
233 ..Default::default()
234 }
235 }
236
237 /// Creates `FeedMeta` with capacity hints for typical Atom 1.0 feeds
238 ///
239 /// Pre-allocates collections based on common Atom 1.0 field usage:
240 /// - 3-5 links (alternate, self, related, etc.)
241 /// - 1-2 authors
242 /// - 1 contributor
243 /// - 3-5 tags (categories)
244 ///
245 /// # Examples
246 ///
247 /// ```
248 /// use feedparser_rs::FeedMeta;
249 ///
250 /// let meta = FeedMeta::with_atom_capacity();
251 /// ```
252 #[must_use]
253 pub fn with_atom_capacity() -> Self {
254 Self {
255 links: Vec::with_capacity(4),
256 authors: Vec::with_capacity(2),
257 contributors: Vec::with_capacity(1),
258 tags: Vec::with_capacity(5),
259 ..Default::default()
260 }
261 }
262
263 /// Sets title field with `TextConstruct`, storing both simple and detailed versions
264 ///
265 /// # Examples
266 ///
267 /// ```
268 /// use feedparser_rs::{FeedMeta, TextConstruct};
269 ///
270 /// let mut meta = FeedMeta::default();
271 /// meta.set_title(TextConstruct::text("Example Feed"));
272 /// assert_eq!(meta.title.as_deref(), Some("Example Feed"));
273 /// ```
274 #[inline]
275 pub fn set_title(&mut self, mut text: TextConstruct) {
276 self.title = Some(std::mem::take(&mut text.value));
277 self.title_detail = Some(text);
278 }
279
280 /// Sets subtitle field with `TextConstruct`, storing both simple and detailed versions
281 ///
282 /// # Examples
283 ///
284 /// ```
285 /// use feedparser_rs::{FeedMeta, TextConstruct};
286 ///
287 /// let mut meta = FeedMeta::default();
288 /// meta.set_subtitle(TextConstruct::text("A great feed"));
289 /// assert_eq!(meta.subtitle.as_deref(), Some("A great feed"));
290 /// ```
291 #[inline]
292 pub fn set_subtitle(&mut self, mut text: TextConstruct) {
293 self.subtitle = Some(std::mem::take(&mut text.value));
294 self.subtitle_detail = Some(text);
295 }
296
297 /// Sets rights field with `TextConstruct`, storing both simple and detailed versions
298 ///
299 /// # Examples
300 ///
301 /// ```
302 /// use feedparser_rs::{FeedMeta, TextConstruct};
303 ///
304 /// let mut meta = FeedMeta::default();
305 /// meta.set_rights(TextConstruct::text("© 2025 Example"));
306 /// assert_eq!(meta.rights.as_deref(), Some("© 2025 Example"));
307 /// ```
308 #[inline]
309 pub fn set_rights(&mut self, mut text: TextConstruct) {
310 self.rights = Some(std::mem::take(&mut text.value));
311 self.rights_detail = Some(text);
312 }
313
314 /// Sets generator field with `Generator`, storing both simple and detailed versions
315 ///
316 /// # Examples
317 ///
318 /// ```
319 /// use feedparser_rs::{FeedMeta, Generator};
320 ///
321 /// # fn main() {
322 /// let mut meta = FeedMeta::default();
323 /// let generator = Generator {
324 /// value: "Example Generator".to_string(),
325 /// uri: None,
326 /// version: None,
327 /// };
328 /// meta.set_generator(generator);
329 /// assert_eq!(meta.generator.as_deref(), Some("Example Generator"));
330 /// # }
331 /// ```
332 #[inline]
333 pub fn set_generator(&mut self, mut generator: Generator) {
334 self.generator = Some(std::mem::take(&mut generator.value));
335 self.generator_detail = Some(generator);
336 }
337
338 /// Sets author field with `Person`, storing both simple and detailed versions
339 ///
340 /// # Examples
341 ///
342 /// ```
343 /// use feedparser_rs::{FeedMeta, Person};
344 ///
345 /// let mut meta = FeedMeta::default();
346 /// meta.set_author(Person::from_name("John Doe"));
347 /// assert_eq!(meta.author.as_deref(), Some("John Doe"));
348 /// ```
349 #[inline]
350 pub fn set_author(&mut self, mut person: Person) {
351 self.author = person.name.take();
352 self.author_detail = Some(person);
353 }
354
355 /// Sets publisher field with `Person`, storing both simple and detailed versions
356 ///
357 /// # Examples
358 ///
359 /// ```
360 /// use feedparser_rs::{FeedMeta, Person};
361 ///
362 /// let mut meta = FeedMeta::default();
363 /// meta.set_publisher(Person::from_name("ACME Corp"));
364 /// assert_eq!(meta.publisher.as_deref(), Some("ACME Corp"));
365 /// ```
366 #[inline]
367 pub fn set_publisher(&mut self, mut person: Person) {
368 self.publisher = person.name.take();
369 self.publisher_detail = Some(person);
370 }
371
372 /// Sets the primary link and adds it to the links collection
373 ///
374 /// This is a convenience method that:
375 /// 1. Sets the `link` field (if not already set)
376 /// 2. Adds an "alternate" link to the `links` collection
377 ///
378 /// # Examples
379 ///
380 /// ```
381 /// use feedparser_rs::FeedMeta;
382 ///
383 /// let mut meta = FeedMeta::default();
384 /// meta.set_alternate_link("https://example.com".to_string(), 10);
385 /// assert_eq!(meta.link.as_deref(), Some("https://example.com"));
386 /// assert_eq!(meta.links.len(), 1);
387 /// assert_eq!(meta.links[0].rel.as_deref(), Some("alternate"));
388 /// ```
389 #[inline]
390 pub fn set_alternate_link(&mut self, href: String, max_links: usize) {
391 if self.link.is_none() {
392 self.link = Some(href.clone());
393 }
394 self.links.try_push_limited(
395 Link {
396 href,
397 rel: Some("alternate".to_string()),
398 ..Default::default()
399 },
400 max_links,
401 );
402 }
403}
404
405#[cfg(test)]
406mod tests {
407 use super::*;
408
409 #[test]
410 fn test_feed_meta_default() {
411 let meta = FeedMeta::default();
412 assert!(meta.title.is_none());
413 assert!(meta.links.is_empty());
414 assert!(meta.authors.is_empty());
415 }
416
417 #[test]
418 fn test_parsed_feed_default() {
419 let feed = ParsedFeed::default();
420 assert!(!feed.bozo);
421 assert!(feed.bozo_exception.is_none());
422 assert_eq!(feed.version, FeedVersion::Unknown);
423 assert!(feed.entries.is_empty());
424 }
425
426 #[test]
427 fn test_parsed_feed_new() {
428 let feed = ParsedFeed::new();
429 assert_eq!(feed.encoding, "utf-8");
430 assert!(!feed.bozo);
431 }
432
433 #[test]
434 fn test_parsed_feed_clone() {
435 let feed = ParsedFeed {
436 version: FeedVersion::Rss20,
437 bozo: true,
438 ..ParsedFeed::new()
439 };
440
441 assert_eq!(feed.version, FeedVersion::Rss20);
442 assert!(feed.bozo);
443 }
444}