feedparser_rs/lib.rs
1//! feedparser-rs-core: High-performance RSS/Atom/JSON Feed parser
2//!
3//! This crate provides a pure Rust implementation of feed parsing with
4//! compatibility for Python's feedparser library.
5//!
6//! # Examples
7//!
8//! ```
9//! use feedparser_rs::parse;
10//!
11//! let xml = r#"
12//! <?xml version="1.0"?>
13//! <rss version="2.0">
14//! <channel>
15//! <title>Example Feed</title>
16//! </channel>
17//! </rss>
18//! "#;
19//!
20//! // Parsing will be fully implemented in Phase 2
21//! let feed = parse(xml.as_bytes()).unwrap();
22//! assert!(feed.bozo == false);
23//! ```
24//!
25//! # Features
26//!
27//! - Parse RSS 0.9x, 1.0, 2.0
28//! - Parse Atom 0.3, 1.0
29//! - Parse JSON Feed 1.0, 1.1
30//! - Tolerant parsing with bozo flag
31//! - Multi-format date parsing
32//! - HTML sanitization
33//! - Encoding detection
34//!
35//! # Architecture
36//!
37//! The library provides core data structures like [`ParsedFeed`], [`Entry`], and [`FeedMeta`]
38//! for representing parsed feed data. The main entry point is the [`parse`] function which
39//! automatically detects feed format and returns parsed results.
40
41mod compat;
42mod error;
43#[cfg(feature = "http")]
44/// HTTP client module for fetching feeds from URLs
45pub mod http;
46mod limits;
47/// Namespace handlers for extended feed formats
48pub mod namespace;
49mod options;
50mod parser;
51
52/// Type definitions for feed data structures
53///
54/// This module contains all the data types used to represent parsed feeds,
55/// including the main `ParsedFeed` struct and related types.
56pub mod types;
57
58/// Utility functions for feed parsing
59///
60/// This module provides helper functions for date parsing, HTML sanitization,
61/// and encoding detection that are useful for feed processing.
62pub mod util;
63
64pub use error::{FeedError, Result};
65pub use limits::{LimitError, ParserLimits};
66pub use options::ParseOptions;
67pub use parser::{detect_format, parse, parse_with_limits};
68pub use types::{
69 Content, Enclosure, Entry, FeedMeta, FeedVersion, Generator, Image, ItunesCategory,
70 ItunesEntryMeta, ItunesFeedMeta, ItunesOwner, LimitedCollectionExt, Link, ParsedFeed, Person,
71 PodcastFunding, PodcastMeta, PodcastPerson, PodcastTranscript, Source, Tag, TextConstruct,
72 TextType, parse_duration, parse_explicit,
73};
74
75pub use namespace::syndication::{SyndicationMeta, UpdatePeriod};
76
77#[cfg(feature = "http")]
78pub use http::{FeedHttpClient, FeedHttpResponse};
79
80/// Parse feed from HTTP/HTTPS URL
81///
82/// Fetches the feed from the given URL and parses it. Supports conditional GET
83/// using `ETag` and `Last-Modified` headers for bandwidth-efficient caching.
84///
85/// # Arguments
86///
87/// * `url` - HTTP or HTTPS URL to fetch
88/// * `etag` - Optional `ETag` from previous fetch for conditional GET
89/// * `modified` - Optional `Last-Modified` timestamp from previous fetch
90/// * `user_agent` - Optional custom User-Agent header
91///
92/// # Returns
93///
94/// Returns a `ParsedFeed` with HTTP metadata fields populated:
95/// - `status`: HTTP status code (200, 304, etc.)
96/// - `href`: Final URL after redirects
97/// - `etag`: `ETag` header value (for next request)
98/// - `modified`: `Last-Modified` header value (for next request)
99/// - `headers`: Full HTTP response headers
100///
101/// On 304 Not Modified, returns a feed with empty entries but status=304.
102///
103/// # Errors
104///
105/// Returns `FeedError::Http` if:
106/// - Network error occurs
107/// - URL is invalid
108/// - HTTP status is 4xx or 5xx (except 304)
109///
110/// # Examples
111///
112/// ```no_run
113/// use feedparser_rs::parse_url;
114///
115/// // First fetch
116/// let feed = parse_url("https://example.com/feed.xml", None, None, None).unwrap();
117/// println!("Title: {:?}", feed.feed.title);
118/// println!("ETag: {:?}", feed.etag);
119///
120/// // Subsequent fetch with caching
121/// let feed2 = parse_url(
122/// "https://example.com/feed.xml",
123/// feed.etag.as_deref(),
124/// feed.modified.as_deref(),
125/// None
126/// ).unwrap();
127///
128/// if feed2.status == Some(304) {
129/// println!("Feed not modified, use cached version");
130/// }
131/// ```
132#[cfg(feature = "http")]
133pub fn parse_url(
134 url: &str,
135 etag: Option<&str>,
136 modified: Option<&str>,
137 user_agent: Option<&str>,
138) -> Result<ParsedFeed> {
139 use http::FeedHttpClient;
140
141 // Create HTTP client
142 let mut client = FeedHttpClient::new()?;
143 if let Some(agent) = user_agent {
144 client = client.with_user_agent(agent.to_string());
145 }
146
147 // Fetch feed
148 let response = client.get(url, etag, modified, None)?;
149
150 // Handle 304 Not Modified
151 if response.status == 304 {
152 return Ok(ParsedFeed {
153 status: Some(304),
154 href: Some(response.url),
155 etag: etag.map(String::from),
156 modified: modified.map(String::from),
157 #[cfg(feature = "http")]
158 headers: Some(response.headers),
159 encoding: String::from("utf-8"),
160 ..Default::default()
161 });
162 }
163
164 // Handle error status codes
165 if response.status >= 400 {
166 return Err(FeedError::Http {
167 message: format!("HTTP {} for URL: {}", response.status, response.url),
168 });
169 }
170
171 // Parse feed from response body
172 let mut feed = parse(&response.body)?;
173
174 // Add HTTP metadata
175 feed.status = Some(response.status);
176 feed.href = Some(response.url);
177 feed.etag = response.etag;
178 feed.modified = response.last_modified;
179 #[cfg(feature = "http")]
180 {
181 feed.headers = Some(response.headers);
182 }
183
184 // Override encoding if HTTP header specifies
185 if let Some(http_encoding) = response.encoding {
186 feed.encoding = http_encoding;
187 }
188
189 Ok(feed)
190}
191
192/// Parse feed from URL with custom parser limits
193///
194/// Like `parse_url` but allows specifying custom limits for resource control.
195///
196/// # Errors
197///
198/// Returns `FeedError::Http` if the request fails or `FeedError::Parse` if parsing fails.
199///
200/// # Examples
201///
202/// ```no_run
203/// use feedparser_rs::{parse_url_with_limits, ParserLimits};
204///
205/// let limits = ParserLimits::strict();
206/// let feed = parse_url_with_limits(
207/// "https://example.com/feed.xml",
208/// None,
209/// None,
210/// None,
211/// limits
212/// ).unwrap();
213/// ```
214#[cfg(feature = "http")]
215pub fn parse_url_with_limits(
216 url: &str,
217 etag: Option<&str>,
218 modified: Option<&str>,
219 user_agent: Option<&str>,
220 limits: ParserLimits,
221) -> Result<ParsedFeed> {
222 use http::FeedHttpClient;
223
224 let mut client = FeedHttpClient::new()?;
225 if let Some(agent) = user_agent {
226 client = client.with_user_agent(agent.to_string());
227 }
228
229 let response = client.get(url, etag, modified, None)?;
230
231 if response.status == 304 {
232 return Ok(ParsedFeed {
233 status: Some(304),
234 href: Some(response.url),
235 etag: etag.map(String::from),
236 modified: modified.map(String::from),
237 #[cfg(feature = "http")]
238 headers: Some(response.headers),
239 encoding: String::from("utf-8"),
240 ..Default::default()
241 });
242 }
243
244 if response.status >= 400 {
245 return Err(FeedError::Http {
246 message: format!("HTTP {} for URL: {}", response.status, response.url),
247 });
248 }
249
250 let mut feed = parse_with_limits(&response.body, limits)?;
251
252 feed.status = Some(response.status);
253 feed.href = Some(response.url);
254 feed.etag = response.etag;
255 feed.modified = response.last_modified;
256 #[cfg(feature = "http")]
257 {
258 feed.headers = Some(response.headers);
259 }
260
261 if let Some(http_encoding) = response.encoding {
262 feed.encoding = http_encoding;
263 }
264
265 Ok(feed)
266}
267
268#[cfg(test)]
269mod tests {
270 use super::*;
271
272 #[test]
273 fn test_parse_basic() {
274 let xml = r#"
275 <?xml version="1.0"?>
276 <rss version="2.0">
277 <channel>
278 <title>Test</title>
279 </channel>
280 </rss>
281 "#;
282
283 let result = parse(xml.as_bytes());
284 assert!(result.is_ok());
285 }
286
287 #[test]
288 fn test_parsed_feed_new() {
289 let feed = ParsedFeed::new();
290 assert_eq!(feed.encoding, "utf-8");
291 assert!(!feed.bozo);
292 assert_eq!(feed.version, FeedVersion::Unknown);
293 }
294
295 #[test]
296 fn test_feed_version_display() {
297 assert_eq!(FeedVersion::Rss20.to_string(), "rss20");
298 assert_eq!(FeedVersion::Atom10.to_string(), "atom10");
299 }
300}