1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
use super::{
common::{Cloud, Generator, Image, Link, MediaRating, Person, Tag, TextConstruct, TextInput},
entry::Entry,
generics::LimitedCollectionExt,
podcast::{ItunesFeedMeta, PodcastMeta},
version::FeedVersion,
};
use crate::namespace::syndication::SyndicationMeta;
use crate::{ParserLimits, error::Result};
use chrono::{DateTime, Utc};
use quick_xml::Reader;
use std::collections::HashMap;
/// Feed metadata
#[derive(Debug, Clone, Default)]
pub struct FeedMeta {
/// Feed title
pub title: Option<String>,
/// Detailed title with metadata
pub title_detail: Option<TextConstruct>,
/// Primary feed link
pub link: Option<String>,
/// All links associated with this feed
pub links: Vec<Link>,
/// Feed subtitle/description
pub subtitle: Option<String>,
/// Detailed subtitle with metadata
pub subtitle_detail: Option<TextConstruct>,
/// Feed summary (populated from itunes:summary when present)
pub summary: Option<String>,
/// Detailed summary with metadata
pub summary_detail: Option<TextConstruct>,
/// Last update date
pub updated: Option<DateTime<Utc>>,
/// Original update date string as found in the feed (timezone preserved)
pub updated_str: Option<String>,
/// Initial publication date (RSS pubDate, Atom published)
pub published: Option<DateTime<Utc>>,
/// Original publication date string as found in the feed (timezone preserved)
pub published_str: Option<String>,
/// Primary author name (stored inline for names ≤24 bytes)
pub author: Option<super::common::SmallString>,
/// Detailed author information
pub author_detail: Option<Person>,
/// All authors
pub authors: Vec<Person>,
/// Contributors
pub contributors: Vec<Person>,
/// Publisher name (stored inline for names ≤24 bytes)
pub publisher: Option<super::common::SmallString>,
/// Detailed publisher information
pub publisher_detail: Option<Person>,
/// Feed language (e.g., "en-us") - stored inline as lang codes are ≤24 bytes
pub language: Option<super::common::SmallString>,
/// Copyright/rights statement
pub rights: Option<String>,
/// Detailed rights with metadata
pub rights_detail: Option<TextConstruct>,
/// Generator name
pub generator: Option<String>,
/// Detailed generator information
pub generator_detail: Option<Generator>,
/// Feed image
pub image: Option<Image>,
/// Icon URL (small image)
pub icon: Option<String>,
/// Logo URL (larger image)
pub logo: Option<String>,
/// Feed-level tags/categories
pub tags: Vec<Tag>,
/// Unique feed identifier
pub id: Option<String>,
/// Time-to-live (update frequency hint) in minutes (kept as string for API compatibility)
pub ttl: Option<String>,
/// URL of documentation for the RSS format used
pub docs: Option<String>,
/// iTunes podcast metadata (if present)
pub itunes: Option<Box<ItunesFeedMeta>>,
/// Podcast 2.0 namespace metadata (if present)
pub podcast: Option<Box<PodcastMeta>>,
/// Dublin Core creator (author fallback) - stored inline for names ≤24 bytes
pub dc_creator: Option<super::common::SmallString>,
/// Dublin Core publisher (stored inline for names ≤24 bytes)
pub dc_publisher: Option<super::common::SmallString>,
/// Dublin Core rights (copyright)
pub dc_rights: Option<String>,
/// License URL (Creative Commons, etc.)
pub license: Option<String>,
/// Syndication module metadata (RSS 1.0)
pub syndication: Option<Box<SyndicationMeta>>,
/// Geographic location from `GeoRSS` namespace (feed level, exposed as `where` per Python feedparser API)
pub r#where: Option<Box<crate::namespace::georss::GeoLocation>>,
/// W3C Basic Geo latitude (`geo:lat`)
pub geo_lat: Option<String>,
/// W3C Basic Geo longitude (`geo:long`)
pub geo_long: Option<String>,
/// Pagination URL for the next page of results (JSON Feed `next_url`, RFC 5005 `<link rel="next">`)
pub next_url: Option<String>,
/// Media RSS thumbnails at feed/channel level
pub media_thumbnail: Vec<super::common::MediaThumbnail>,
/// Media RSS content items at feed/channel level
pub media_content: Vec<super::common::MediaContent>,
/// Media RSS rating (`media:rating`) at feed level
pub media_rating: Option<MediaRating>,
/// Media RSS keywords (`media:keywords`) at feed level, comma-separated string
pub media_keywords: Option<String>,
/// RSS 2.0 `<cloud>` element — subscription endpoint for notifications
pub cloud: Option<Cloud>,
/// RSS 2.0 `<textInput>` element — text input form associated with the channel
pub textinput: Option<TextInput>,
/// RSS 2.0 `<skipHours>` — hours of the day when the channel may be skipped (0–23)
pub skiphours: Vec<u32>,
/// RSS 2.0 `<skipDays>` — days of the week when the channel may be skipped
pub skipdays: Vec<String>,
}
/// Parsed feed result
///
/// This is the main result type returned by the parser, analogous to
/// Python feedparser's `FeedParserDict`.
#[derive(Debug, Clone, Default)]
pub struct ParsedFeed {
/// Feed metadata
pub feed: FeedMeta,
/// Feed entries/items
pub entries: Vec<Entry>,
/// True if parsing encountered errors
pub bozo: bool,
/// Description of parsing error (if bozo is true)
pub bozo_exception: Option<String>,
/// Detected or declared encoding
pub encoding: String,
/// Detected feed format version
pub version: FeedVersion,
/// XML namespaces (prefix -> URI)
pub namespaces: HashMap<String, String>,
/// HTTP status code (if fetched from URL)
pub status: Option<u16>,
/// Final URL after redirects (if fetched from URL)
pub href: Option<String>,
/// `ETag` header from HTTP response
pub etag: Option<String>,
/// Last-Modified header from HTTP response
pub modified: Option<String>,
/// HTTP response headers (if fetched from URL)
#[cfg(feature = "http")]
pub headers: Option<HashMap<String, String>>,
}
impl ParsedFeed {
/// Creates a new `ParsedFeed` with default UTF-8 encoding
#[must_use]
pub fn new() -> Self {
Self {
encoding: String::from("utf-8"),
..Default::default()
}
}
/// Creates a `ParsedFeed` with pre-allocated capacity for entries
///
/// This method pre-allocates space for the expected number of entries,
/// reducing memory allocations during parsing.
///
/// # Arguments
///
/// * `entry_count` - Expected number of entries in the feed
///
/// # Examples
///
/// ```
/// use feedparser_rs::ParsedFeed;
///
/// let feed = ParsedFeed::with_capacity(50);
/// assert_eq!(feed.encoding, "utf-8");
/// ```
#[must_use]
pub fn with_capacity(entry_count: usize) -> Self {
Self {
entries: Vec::with_capacity(entry_count),
namespaces: HashMap::with_capacity(8), // Typical feeds have 3-8 namespaces
encoding: String::from("utf-8"),
..Default::default()
}
}
/// Check if entry limit is reached, set bozo flag and skip element if so
///
/// This helper consolidates the duplicate entry limit checking logic used in
/// RSS and Atom parsers. If the entry limit is reached, it:
/// - Sets `bozo` flag to true
/// - Sets `bozo_exception` with descriptive error message
/// - Skips the entry element
/// - Returns `Ok(false)` to signal that the entry should not be processed
///
/// # Arguments
///
/// * `reader` - XML reader positioned at the entry element
/// * `buf` - Buffer for XML event reading
/// * `limits` - Parser limits including `max_entries`
/// * `depth` - Current nesting depth (will be decremented)
///
/// # Returns
///
/// * `Ok(true)` - Entry can be processed (limit not reached)
/// * `Ok(false)` - Entry limit reached, element was skipped
///
/// # Errors
///
/// Returns an error if:
/// - Skipping the entry element fails (e.g., malformed XML)
/// - Nesting depth exceeds limits while skipping
///
/// # Examples
///
/// ```ignore
/// // In parser:
/// if !feed.check_entry_limit(reader, &mut buf, limits, depth)? {
/// continue;
/// }
/// // Process entry...
/// ```
#[inline]
pub fn check_entry_limit(
&mut self,
reader: &mut Reader<&[u8]>,
buf: &mut Vec<u8>,
limits: &ParserLimits,
depth: &mut usize,
) -> Result<bool> {
use crate::parser::skip_element;
if self.entries.is_at_limit(limits.max_entries) {
self.bozo = true;
self.bozo_exception = Some(format!("Entry limit exceeded: {}", limits.max_entries));
skip_element(reader, buf, limits, *depth)?;
*depth = depth.saturating_sub(1);
Ok(false)
} else {
Ok(true)
}
}
}
impl FeedMeta {
/// Creates `FeedMeta` with capacity hints for typical RSS 2.0 feeds
///
/// Pre-allocates collections based on common RSS 2.0 field usage:
/// - 1-2 links (channel link, self link)
/// - 1 author (managingEditor)
/// - 0-3 tags (categories)
///
/// # Examples
///
/// ```
/// use feedparser_rs::FeedMeta;
///
/// let meta = FeedMeta::with_rss_capacity();
/// ```
#[must_use]
pub fn with_rss_capacity() -> Self {
Self {
links: Vec::with_capacity(2),
authors: Vec::with_capacity(1),
contributors: Vec::with_capacity(0),
tags: Vec::with_capacity(3),
..Default::default()
}
}
/// Creates `FeedMeta` with capacity hints for typical Atom 1.0 feeds
///
/// Pre-allocates collections based on common Atom 1.0 field usage:
/// - 3-5 links (alternate, self, related, etc.)
/// - 1-2 authors
/// - 1 contributor
/// - 3-5 tags (categories)
///
/// # Examples
///
/// ```
/// use feedparser_rs::FeedMeta;
///
/// let meta = FeedMeta::with_atom_capacity();
/// ```
#[must_use]
pub fn with_atom_capacity() -> Self {
Self {
links: Vec::with_capacity(4),
authors: Vec::with_capacity(2),
contributors: Vec::with_capacity(1),
tags: Vec::with_capacity(5),
..Default::default()
}
}
/// Sets title field with `TextConstruct`, storing both simple and detailed versions
///
/// # Examples
///
/// ```
/// use feedparser_rs::{FeedMeta, TextConstruct};
///
/// let mut meta = FeedMeta::default();
/// meta.set_title(TextConstruct::text("Example Feed"));
/// assert_eq!(meta.title.as_deref(), Some("Example Feed"));
/// ```
#[inline]
pub fn set_title(&mut self, text: TextConstruct) {
self.title = Some(text.value.clone());
self.title_detail = Some(text);
}
/// Sets subtitle field with `TextConstruct`, storing both simple and detailed versions
///
/// # Examples
///
/// ```
/// use feedparser_rs::{FeedMeta, TextConstruct};
///
/// let mut meta = FeedMeta::default();
/// meta.set_subtitle(TextConstruct::text("A great feed"));
/// assert_eq!(meta.subtitle.as_deref(), Some("A great feed"));
/// ```
#[inline]
pub fn set_subtitle(&mut self, text: TextConstruct) {
self.subtitle = Some(text.value.clone());
self.subtitle_detail = Some(text);
}
/// Sets summary field with `TextConstruct`, storing both simple and detailed versions
///
/// # Examples
///
/// ```
/// use feedparser_rs::{FeedMeta, TextConstruct};
///
/// let mut meta = FeedMeta::default();
/// meta.set_summary(TextConstruct::text("A detailed description"));
/// assert_eq!(meta.summary.as_deref(), Some("A detailed description"));
/// ```
#[inline]
pub fn set_summary(&mut self, text: TextConstruct) {
self.summary = Some(text.value.clone());
self.summary_detail = Some(text);
}
/// Sets rights field with `TextConstruct`, storing both simple and detailed versions
///
/// # Examples
///
/// ```
/// use feedparser_rs::{FeedMeta, TextConstruct};
///
/// let mut meta = FeedMeta::default();
/// meta.set_rights(TextConstruct::text("© 2025 Example"));
/// assert_eq!(meta.rights.as_deref(), Some("© 2025 Example"));
/// ```
#[inline]
pub fn set_rights(&mut self, text: TextConstruct) {
self.rights = Some(text.value.clone());
self.rights_detail = Some(text);
}
/// Sets generator field with `Generator`, storing both simple and detailed versions
///
/// # Examples
///
/// ```
/// use feedparser_rs::{FeedMeta, Generator};
///
/// # fn main() {
/// let mut meta = FeedMeta::default();
/// let generator = Generator {
/// name: "Example Generator".to_string(),
/// href: None,
/// version: None,
/// };
/// meta.set_generator(generator);
/// assert_eq!(meta.generator.as_deref(), Some("Example Generator"));
/// # }
/// ```
#[inline]
pub fn set_generator(&mut self, generator: Generator) {
// Clone the name for the flat `generator` field; the detail struct keeps its own copy.
self.generator = Some(generator.name.clone());
self.generator_detail = Some(generator);
}
/// Sets author field with `Person`, storing both simple and detailed versions
///
/// # Examples
///
/// ```
/// use feedparser_rs::{FeedMeta, Person};
///
/// let mut meta = FeedMeta::default();
/// meta.set_author(Person::from_name("John Doe"));
/// assert_eq!(meta.author.as_deref(), Some("John Doe"));
/// ```
#[inline]
pub fn set_author(&mut self, person: Person) {
self.author = person.flat_string();
self.author_detail = Some(person);
}
/// Sets publisher field with `Person`, storing both simple and detailed versions
///
/// # Examples
///
/// ```
/// use feedparser_rs::{FeedMeta, Person};
///
/// let mut meta = FeedMeta::default();
/// meta.set_publisher(Person::from_name("ACME Corp"));
/// assert_eq!(meta.publisher.as_deref(), Some("ACME Corp"));
/// ```
#[inline]
pub fn set_publisher(&mut self, person: Person) {
self.publisher.clone_from(&person.name);
self.publisher_detail = Some(person);
}
/// Sets the primary link and adds it to the links collection
///
/// This is a convenience method that:
/// 1. Sets the `link` field (if not already set)
/// 2. Adds an "alternate" link to the `links` collection
///
/// # Examples
///
/// ```
/// use feedparser_rs::FeedMeta;
///
/// let mut meta = FeedMeta::default();
/// meta.set_alternate_link("https://example.com".to_string(), 10);
/// assert_eq!(meta.link.as_deref(), Some("https://example.com"));
/// assert_eq!(meta.links.len(), 1);
/// assert_eq!(meta.links[0].rel.as_deref(), Some("alternate"));
/// ```
#[inline]
pub fn set_alternate_link(&mut self, href: String, max_links: usize) {
if self.link.is_none() {
self.link = Some(href.clone());
}
self.links.try_push_limited(
Link {
href: href.into(),
rel: Some("alternate".into()),
..Default::default()
},
max_links,
);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_feed_meta_default() {
let meta = FeedMeta::default();
assert!(meta.title.is_none());
assert!(meta.links.is_empty());
assert!(meta.authors.is_empty());
}
#[test]
fn test_parsed_feed_default() {
let feed = ParsedFeed::default();
assert!(!feed.bozo);
assert!(feed.bozo_exception.is_none());
assert_eq!(feed.version, FeedVersion::Unknown);
assert!(feed.entries.is_empty());
}
#[test]
fn test_parsed_feed_new() {
let feed = ParsedFeed::new();
assert_eq!(feed.encoding, "utf-8");
assert!(!feed.bozo);
}
#[test]
fn test_parsed_feed_clone() {
let feed = ParsedFeed {
version: FeedVersion::Rss20,
bozo: true,
..ParsedFeed::new()
};
assert_eq!(feed.version, FeedVersion::Rss20);
assert!(feed.bozo);
}
}