feedparser_rs/types/feed.rs
1use super::{
2 common::{Generator, Image, Link, Person, Tag, TextConstruct},
3 entry::Entry,
4 generics::LimitedCollectionExt,
5 podcast::{ItunesFeedMeta, PodcastMeta},
6 version::FeedVersion,
7};
8use crate::namespace::syndication::SyndicationMeta;
9use crate::{ParserLimits, error::Result};
10use chrono::{DateTime, Utc};
11use quick_xml::Reader;
12use std::collections::HashMap;
13
14/// Feed metadata
15#[derive(Debug, Clone, Default)]
16pub struct FeedMeta {
17 /// Feed title
18 pub title: Option<String>,
19 /// Detailed title with metadata
20 pub title_detail: Option<TextConstruct>,
21 /// Primary feed link
22 pub link: Option<String>,
23 /// All links associated with this feed
24 pub links: Vec<Link>,
25 /// Feed subtitle/description
26 pub subtitle: Option<String>,
27 /// Detailed subtitle with metadata
28 pub subtitle_detail: Option<TextConstruct>,
29 /// Last update date
30 pub updated: Option<DateTime<Utc>>,
31 /// Initial publication date (RSS pubDate, Atom published)
32 pub published: Option<DateTime<Utc>>,
33 /// Primary author name (stored inline for names ≤24 bytes)
34 pub author: Option<super::common::SmallString>,
35 /// Detailed author information
36 pub author_detail: Option<Person>,
37 /// All authors
38 pub authors: Vec<Person>,
39 /// Contributors
40 pub contributors: Vec<Person>,
41 /// Publisher name (stored inline for names ≤24 bytes)
42 pub publisher: Option<super::common::SmallString>,
43 /// Detailed publisher information
44 pub publisher_detail: Option<Person>,
45 /// Feed language (e.g., "en-us") - stored inline as lang codes are ≤24 bytes
46 pub language: Option<super::common::SmallString>,
47 /// Copyright/rights statement
48 pub rights: Option<String>,
49 /// Detailed rights with metadata
50 pub rights_detail: Option<TextConstruct>,
51 /// Generator name
52 pub generator: Option<String>,
53 /// Detailed generator information
54 pub generator_detail: Option<Generator>,
55 /// Feed image
56 pub image: Option<Image>,
57 /// Icon URL (small image)
58 pub icon: Option<String>,
59 /// Logo URL (larger image)
60 pub logo: Option<String>,
61 /// Feed-level tags/categories
62 pub tags: Vec<Tag>,
63 /// Unique feed identifier
64 pub id: Option<String>,
65 /// Time-to-live (update frequency hint) in minutes
66 pub ttl: Option<u32>,
67 /// iTunes podcast metadata (if present)
68 pub itunes: Option<Box<ItunesFeedMeta>>,
69 /// Podcast 2.0 namespace metadata (if present)
70 pub podcast: Option<Box<PodcastMeta>>,
71 /// Dublin Core creator (author fallback) - stored inline for names ≤24 bytes
72 pub dc_creator: Option<super::common::SmallString>,
73 /// Dublin Core publisher (stored inline for names ≤24 bytes)
74 pub dc_publisher: Option<super::common::SmallString>,
75 /// Dublin Core rights (copyright)
76 pub dc_rights: Option<String>,
77 /// License URL (Creative Commons, etc.)
78 pub license: Option<String>,
79 /// Syndication module metadata (RSS 1.0)
80 pub syndication: Option<Box<SyndicationMeta>>,
81 /// Geographic location from `GeoRSS` namespace (feed level)
82 pub geo: Option<Box<crate::namespace::georss::GeoLocation>>,
83 /// Pagination URL for the next page of results (JSON Feed `next_url`, RFC 5005 `<link rel="next">`)
84 pub next_url: Option<String>,
85}
86
87/// Parsed feed result
88///
89/// This is the main result type returned by the parser, analogous to
90/// Python feedparser's `FeedParserDict`.
91#[derive(Debug, Clone, Default)]
92pub struct ParsedFeed {
93 /// Feed metadata
94 pub feed: FeedMeta,
95 /// Feed entries/items
96 pub entries: Vec<Entry>,
97 /// True if parsing encountered errors
98 pub bozo: bool,
99 /// Description of parsing error (if bozo is true)
100 pub bozo_exception: Option<String>,
101 /// Detected or declared encoding
102 pub encoding: String,
103 /// Detected feed format version
104 pub version: FeedVersion,
105 /// XML namespaces (prefix -> URI)
106 pub namespaces: HashMap<String, String>,
107 /// HTTP status code (if fetched from URL)
108 pub status: Option<u16>,
109 /// Final URL after redirects (if fetched from URL)
110 pub href: Option<String>,
111 /// `ETag` header from HTTP response
112 pub etag: Option<String>,
113 /// Last-Modified header from HTTP response
114 pub modified: Option<String>,
115 /// HTTP response headers (if fetched from URL)
116 #[cfg(feature = "http")]
117 pub headers: Option<HashMap<String, String>>,
118}
119
120impl ParsedFeed {
121 /// Creates a new `ParsedFeed` with default UTF-8 encoding
122 #[must_use]
123 pub fn new() -> Self {
124 Self {
125 encoding: String::from("utf-8"),
126 ..Default::default()
127 }
128 }
129
130 /// Creates a `ParsedFeed` with pre-allocated capacity for entries
131 ///
132 /// This method pre-allocates space for the expected number of entries,
133 /// reducing memory allocations during parsing.
134 ///
135 /// # Arguments
136 ///
137 /// * `entry_count` - Expected number of entries in the feed
138 ///
139 /// # Examples
140 ///
141 /// ```
142 /// use feedparser_rs::ParsedFeed;
143 ///
144 /// let feed = ParsedFeed::with_capacity(50);
145 /// assert_eq!(feed.encoding, "utf-8");
146 /// ```
147 #[must_use]
148 pub fn with_capacity(entry_count: usize) -> Self {
149 Self {
150 entries: Vec::with_capacity(entry_count),
151 namespaces: HashMap::with_capacity(8), // Typical feeds have 3-8 namespaces
152 encoding: String::from("utf-8"),
153 ..Default::default()
154 }
155 }
156
157 /// Check if entry limit is reached, set bozo flag and skip element if so
158 ///
159 /// This helper consolidates the duplicate entry limit checking logic used in
160 /// RSS and Atom parsers. If the entry limit is reached, it:
161 /// - Sets `bozo` flag to true
162 /// - Sets `bozo_exception` with descriptive error message
163 /// - Skips the entry element
164 /// - Returns `Ok(false)` to signal that the entry should not be processed
165 ///
166 /// # Arguments
167 ///
168 /// * `reader` - XML reader positioned at the entry element
169 /// * `buf` - Buffer for XML event reading
170 /// * `limits` - Parser limits including `max_entries`
171 /// * `depth` - Current nesting depth (will be decremented)
172 ///
173 /// # Returns
174 ///
175 /// * `Ok(true)` - Entry can be processed (limit not reached)
176 /// * `Ok(false)` - Entry limit reached, element was skipped
177 ///
178 /// # Errors
179 ///
180 /// Returns an error if:
181 /// - Skipping the entry element fails (e.g., malformed XML)
182 /// - Nesting depth exceeds limits while skipping
183 ///
184 /// # Examples
185 ///
186 /// ```ignore
187 /// // In parser:
188 /// if !feed.check_entry_limit(reader, &mut buf, limits, depth)? {
189 /// continue;
190 /// }
191 /// // Process entry...
192 /// ```
193 #[inline]
194 pub fn check_entry_limit(
195 &mut self,
196 reader: &mut Reader<&[u8]>,
197 buf: &mut Vec<u8>,
198 limits: &ParserLimits,
199 depth: &mut usize,
200 ) -> Result<bool> {
201 use crate::parser::skip_element;
202
203 if self.entries.is_at_limit(limits.max_entries) {
204 self.bozo = true;
205 self.bozo_exception = Some(format!("Entry limit exceeded: {}", limits.max_entries));
206 skip_element(reader, buf, limits, *depth)?;
207 *depth = depth.saturating_sub(1);
208 Ok(false)
209 } else {
210 Ok(true)
211 }
212 }
213}
214
215impl FeedMeta {
216 /// Creates `FeedMeta` with capacity hints for typical RSS 2.0 feeds
217 ///
218 /// Pre-allocates collections based on common RSS 2.0 field usage:
219 /// - 1-2 links (channel link, self link)
220 /// - 1 author (managingEditor)
221 /// - 0-3 tags (categories)
222 ///
223 /// # Examples
224 ///
225 /// ```
226 /// use feedparser_rs::FeedMeta;
227 ///
228 /// let meta = FeedMeta::with_rss_capacity();
229 /// ```
230 #[must_use]
231 pub fn with_rss_capacity() -> Self {
232 Self {
233 links: Vec::with_capacity(2),
234 authors: Vec::with_capacity(1),
235 contributors: Vec::with_capacity(0),
236 tags: Vec::with_capacity(3),
237 ..Default::default()
238 }
239 }
240
241 /// Creates `FeedMeta` with capacity hints for typical Atom 1.0 feeds
242 ///
243 /// Pre-allocates collections based on common Atom 1.0 field usage:
244 /// - 3-5 links (alternate, self, related, etc.)
245 /// - 1-2 authors
246 /// - 1 contributor
247 /// - 3-5 tags (categories)
248 ///
249 /// # Examples
250 ///
251 /// ```
252 /// use feedparser_rs::FeedMeta;
253 ///
254 /// let meta = FeedMeta::with_atom_capacity();
255 /// ```
256 #[must_use]
257 pub fn with_atom_capacity() -> Self {
258 Self {
259 links: Vec::with_capacity(4),
260 authors: Vec::with_capacity(2),
261 contributors: Vec::with_capacity(1),
262 tags: Vec::with_capacity(5),
263 ..Default::default()
264 }
265 }
266
267 /// Sets title field with `TextConstruct`, storing both simple and detailed versions
268 ///
269 /// # Examples
270 ///
271 /// ```
272 /// use feedparser_rs::{FeedMeta, TextConstruct};
273 ///
274 /// let mut meta = FeedMeta::default();
275 /// meta.set_title(TextConstruct::text("Example Feed"));
276 /// assert_eq!(meta.title.as_deref(), Some("Example Feed"));
277 /// ```
278 #[inline]
279 pub fn set_title(&mut self, mut text: TextConstruct) {
280 self.title = Some(std::mem::take(&mut text.value));
281 self.title_detail = Some(text);
282 }
283
284 /// Sets subtitle field with `TextConstruct`, storing both simple and detailed versions
285 ///
286 /// # Examples
287 ///
288 /// ```
289 /// use feedparser_rs::{FeedMeta, TextConstruct};
290 ///
291 /// let mut meta = FeedMeta::default();
292 /// meta.set_subtitle(TextConstruct::text("A great feed"));
293 /// assert_eq!(meta.subtitle.as_deref(), Some("A great feed"));
294 /// ```
295 #[inline]
296 pub fn set_subtitle(&mut self, mut text: TextConstruct) {
297 self.subtitle = Some(std::mem::take(&mut text.value));
298 self.subtitle_detail = Some(text);
299 }
300
301 /// Sets rights field with `TextConstruct`, storing both simple and detailed versions
302 ///
303 /// # Examples
304 ///
305 /// ```
306 /// use feedparser_rs::{FeedMeta, TextConstruct};
307 ///
308 /// let mut meta = FeedMeta::default();
309 /// meta.set_rights(TextConstruct::text("© 2025 Example"));
310 /// assert_eq!(meta.rights.as_deref(), Some("© 2025 Example"));
311 /// ```
312 #[inline]
313 pub fn set_rights(&mut self, mut text: TextConstruct) {
314 self.rights = Some(std::mem::take(&mut text.value));
315 self.rights_detail = Some(text);
316 }
317
318 /// Sets generator field with `Generator`, storing both simple and detailed versions
319 ///
320 /// # Examples
321 ///
322 /// ```
323 /// use feedparser_rs::{FeedMeta, Generator};
324 ///
325 /// # fn main() {
326 /// let mut meta = FeedMeta::default();
327 /// let generator = Generator {
328 /// value: "Example Generator".to_string(),
329 /// uri: None,
330 /// version: None,
331 /// };
332 /// meta.set_generator(generator);
333 /// assert_eq!(meta.generator.as_deref(), Some("Example Generator"));
334 /// # }
335 /// ```
336 #[inline]
337 pub fn set_generator(&mut self, mut generator: Generator) {
338 self.generator = Some(std::mem::take(&mut generator.value));
339 self.generator_detail = Some(generator);
340 }
341
342 /// Sets author field with `Person`, storing both simple and detailed versions
343 ///
344 /// # Examples
345 ///
346 /// ```
347 /// use feedparser_rs::{FeedMeta, Person};
348 ///
349 /// let mut meta = FeedMeta::default();
350 /// meta.set_author(Person::from_name("John Doe"));
351 /// assert_eq!(meta.author.as_deref(), Some("John Doe"));
352 /// ```
353 #[inline]
354 pub fn set_author(&mut self, mut person: Person) {
355 self.author = person.name.take();
356 self.author_detail = Some(person);
357 }
358
359 /// Sets publisher field with `Person`, storing both simple and detailed versions
360 ///
361 /// # Examples
362 ///
363 /// ```
364 /// use feedparser_rs::{FeedMeta, Person};
365 ///
366 /// let mut meta = FeedMeta::default();
367 /// meta.set_publisher(Person::from_name("ACME Corp"));
368 /// assert_eq!(meta.publisher.as_deref(), Some("ACME Corp"));
369 /// ```
370 #[inline]
371 pub fn set_publisher(&mut self, mut person: Person) {
372 self.publisher = person.name.take();
373 self.publisher_detail = Some(person);
374 }
375
376 /// Sets the primary link and adds it to the links collection
377 ///
378 /// This is a convenience method that:
379 /// 1. Sets the `link` field (if not already set)
380 /// 2. Adds an "alternate" link to the `links` collection
381 ///
382 /// # Examples
383 ///
384 /// ```
385 /// use feedparser_rs::FeedMeta;
386 ///
387 /// let mut meta = FeedMeta::default();
388 /// meta.set_alternate_link("https://example.com".to_string(), 10);
389 /// assert_eq!(meta.link.as_deref(), Some("https://example.com"));
390 /// assert_eq!(meta.links.len(), 1);
391 /// assert_eq!(meta.links[0].rel.as_deref(), Some("alternate"));
392 /// ```
393 #[inline]
394 pub fn set_alternate_link(&mut self, href: String, max_links: usize) {
395 if self.link.is_none() {
396 self.link = Some(href.clone());
397 }
398 self.links.try_push_limited(
399 Link {
400 href: href.into(),
401 rel: Some("alternate".into()),
402 ..Default::default()
403 },
404 max_links,
405 );
406 }
407}
408
409#[cfg(test)]
410mod tests {
411 use super::*;
412
413 #[test]
414 fn test_feed_meta_default() {
415 let meta = FeedMeta::default();
416 assert!(meta.title.is_none());
417 assert!(meta.links.is_empty());
418 assert!(meta.authors.is_empty());
419 }
420
421 #[test]
422 fn test_parsed_feed_default() {
423 let feed = ParsedFeed::default();
424 assert!(!feed.bozo);
425 assert!(feed.bozo_exception.is_none());
426 assert_eq!(feed.version, FeedVersion::Unknown);
427 assert!(feed.entries.is_empty());
428 }
429
430 #[test]
431 fn test_parsed_feed_new() {
432 let feed = ParsedFeed::new();
433 assert_eq!(feed.encoding, "utf-8");
434 assert!(!feed.bozo);
435 }
436
437 #[test]
438 fn test_parsed_feed_clone() {
439 let feed = ParsedFeed {
440 version: FeedVersion::Rss20,
441 bozo: true,
442 ..ParsedFeed::new()
443 };
444
445 assert_eq!(feed.version, FeedVersion::Rss20);
446 assert!(feed.bozo);
447 }
448}