feedparser_rs/lib.rs
1//! feedparser-rs-core: High-performance RSS/Atom/JSON Feed parser
2//!
3//! This crate provides a pure Rust implementation of feed parsing with
4//! compatibility for Python's feedparser library.
5//!
6//! # Examples
7//!
8//! ```
9//! use feedparser_rs::parse;
10//!
11//! let xml = r#"
12//! <?xml version="1.0"?>
13//! <rss version="2.0">
14//! <channel>
15//! <title>Example Feed</title>
16//! </channel>
17//! </rss>
18//! "#;
19//!
20//! // Parsing will be fully implemented in Phase 2
21//! let feed = parse(xml.as_bytes()).unwrap();
22//! assert!(feed.bozo == false);
23//! ```
24//!
25//! # Features
26//!
27//! - Parse RSS 0.9x, 1.0, 2.0
28//! - Parse Atom 0.3, 1.0
29//! - Parse JSON Feed 1.0, 1.1
30//! - Tolerant parsing with bozo flag
31//! - Multi-format date parsing
32//! - HTML sanitization
33//! - Encoding detection
34//!
35//! # Architecture
36//!
37//! The library provides core data structures like [`ParsedFeed`], [`Entry`], and [`FeedMeta`]
38//! for representing parsed feed data. The main entry point is the [`parse`] function which
39//! automatically detects feed format and returns parsed results.
40
41mod compat;
42mod error;
43#[cfg(feature = "http")]
44/// HTTP client module for fetching feeds from URLs
45pub mod http;
46mod limits;
47/// Namespace handlers for extended feed formats
48pub mod namespace;
49mod options;
50mod parser;
51
52/// Type definitions for feed data structures
53///
54/// This module contains all the data types used to represent parsed feeds,
55/// including the main `ParsedFeed` struct and related types.
56pub mod types;
57
58/// Utility functions for feed parsing
59///
60/// This module provides helper functions for date parsing, HTML sanitization,
61/// and encoding detection that are useful for feed processing.
62pub mod util;
63
64pub use error::{FeedError, Result};
65pub use limits::{LimitError, ParserLimits};
66pub use options::ParseOptions;
67pub use parser::{detect_format, parse, parse_with_limits};
68pub use types::{
69 Content, Enclosure, Entry, FeedMeta, FeedVersion, Generator, Image, ItunesCategory,
70 ItunesEntryMeta, ItunesFeedMeta, ItunesOwner, LimitedCollectionExt, Link, ParsedFeed, Person,
71 PodcastFunding, PodcastMeta, PodcastPerson, PodcastTranscript, Source, Tag, TextConstruct,
72 TextType, parse_duration, parse_explicit,
73};
74
75#[cfg(feature = "http")]
76pub use http::{FeedHttpClient, FeedHttpResponse};
77
78/// Parse feed from HTTP/HTTPS URL
79///
80/// Fetches the feed from the given URL and parses it. Supports conditional GET
81/// using `ETag` and `Last-Modified` headers for bandwidth-efficient caching.
82///
83/// # Arguments
84///
85/// * `url` - HTTP or HTTPS URL to fetch
86/// * `etag` - Optional `ETag` from previous fetch for conditional GET
87/// * `modified` - Optional `Last-Modified` timestamp from previous fetch
88/// * `user_agent` - Optional custom User-Agent header
89///
90/// # Returns
91///
92/// Returns a `ParsedFeed` with HTTP metadata fields populated:
93/// - `status`: HTTP status code (200, 304, etc.)
94/// - `href`: Final URL after redirects
95/// - `etag`: `ETag` header value (for next request)
96/// - `modified`: `Last-Modified` header value (for next request)
97/// - `headers`: Full HTTP response headers
98///
99/// On 304 Not Modified, returns a feed with empty entries but status=304.
100///
101/// # Errors
102///
103/// Returns `FeedError::Http` if:
104/// - Network error occurs
105/// - URL is invalid
106/// - HTTP status is 4xx or 5xx (except 304)
107///
108/// # Examples
109///
110/// ```no_run
111/// use feedparser_rs::parse_url;
112///
113/// // First fetch
114/// let feed = parse_url("https://example.com/feed.xml", None, None, None).unwrap();
115/// println!("Title: {:?}", feed.feed.title);
116/// println!("ETag: {:?}", feed.etag);
117///
118/// // Subsequent fetch with caching
119/// let feed2 = parse_url(
120/// "https://example.com/feed.xml",
121/// feed.etag.as_deref(),
122/// feed.modified.as_deref(),
123/// None
124/// ).unwrap();
125///
126/// if feed2.status == Some(304) {
127/// println!("Feed not modified, use cached version");
128/// }
129/// ```
130#[cfg(feature = "http")]
131pub fn parse_url(
132 url: &str,
133 etag: Option<&str>,
134 modified: Option<&str>,
135 user_agent: Option<&str>,
136) -> Result<ParsedFeed> {
137 use http::FeedHttpClient;
138
139 // Create HTTP client
140 let mut client = FeedHttpClient::new()?;
141 if let Some(agent) = user_agent {
142 client = client.with_user_agent(agent.to_string());
143 }
144
145 // Fetch feed
146 let response = client.get(url, etag, modified, None)?;
147
148 // Handle 304 Not Modified
149 if response.status == 304 {
150 return Ok(ParsedFeed {
151 status: Some(304),
152 href: Some(response.url),
153 etag: etag.map(String::from),
154 modified: modified.map(String::from),
155 #[cfg(feature = "http")]
156 headers: Some(response.headers),
157 encoding: String::from("utf-8"),
158 ..Default::default()
159 });
160 }
161
162 // Handle error status codes
163 if response.status >= 400 {
164 return Err(FeedError::Http {
165 message: format!("HTTP {} for URL: {}", response.status, response.url),
166 });
167 }
168
169 // Parse feed from response body
170 let mut feed = parse(&response.body)?;
171
172 // Add HTTP metadata
173 feed.status = Some(response.status);
174 feed.href = Some(response.url);
175 feed.etag = response.etag;
176 feed.modified = response.last_modified;
177 #[cfg(feature = "http")]
178 {
179 feed.headers = Some(response.headers);
180 }
181
182 // Override encoding if HTTP header specifies
183 if let Some(http_encoding) = response.encoding {
184 feed.encoding = http_encoding;
185 }
186
187 Ok(feed)
188}
189
190/// Parse feed from URL with custom parser limits
191///
192/// Like `parse_url` but allows specifying custom limits for resource control.
193///
194/// # Errors
195///
196/// Returns `FeedError::Http` if the request fails or `FeedError::Parse` if parsing fails.
197///
198/// # Examples
199///
200/// ```no_run
201/// use feedparser_rs::{parse_url_with_limits, ParserLimits};
202///
203/// let limits = ParserLimits::strict();
204/// let feed = parse_url_with_limits(
205/// "https://example.com/feed.xml",
206/// None,
207/// None,
208/// None,
209/// limits
210/// ).unwrap();
211/// ```
212#[cfg(feature = "http")]
213pub fn parse_url_with_limits(
214 url: &str,
215 etag: Option<&str>,
216 modified: Option<&str>,
217 user_agent: Option<&str>,
218 limits: ParserLimits,
219) -> Result<ParsedFeed> {
220 use http::FeedHttpClient;
221
222 let mut client = FeedHttpClient::new()?;
223 if let Some(agent) = user_agent {
224 client = client.with_user_agent(agent.to_string());
225 }
226
227 let response = client.get(url, etag, modified, None)?;
228
229 if response.status == 304 {
230 return Ok(ParsedFeed {
231 status: Some(304),
232 href: Some(response.url),
233 etag: etag.map(String::from),
234 modified: modified.map(String::from),
235 #[cfg(feature = "http")]
236 headers: Some(response.headers),
237 encoding: String::from("utf-8"),
238 ..Default::default()
239 });
240 }
241
242 if response.status >= 400 {
243 return Err(FeedError::Http {
244 message: format!("HTTP {} for URL: {}", response.status, response.url),
245 });
246 }
247
248 let mut feed = parse_with_limits(&response.body, limits)?;
249
250 feed.status = Some(response.status);
251 feed.href = Some(response.url);
252 feed.etag = response.etag;
253 feed.modified = response.last_modified;
254 #[cfg(feature = "http")]
255 {
256 feed.headers = Some(response.headers);
257 }
258
259 if let Some(http_encoding) = response.encoding {
260 feed.encoding = http_encoding;
261 }
262
263 Ok(feed)
264}
265
266#[cfg(test)]
267mod tests {
268 use super::*;
269
270 #[test]
271 fn test_parse_basic() {
272 let xml = r#"
273 <?xml version="1.0"?>
274 <rss version="2.0">
275 <channel>
276 <title>Test</title>
277 </channel>
278 </rss>
279 "#;
280
281 let result = parse(xml.as_bytes());
282 assert!(result.is_ok());
283 }
284
285 #[test]
286 fn test_parsed_feed_new() {
287 let feed = ParsedFeed::new();
288 assert_eq!(feed.encoding, "utf-8");
289 assert!(!feed.bozo);
290 assert_eq!(feed.version, FeedVersion::Unknown);
291 }
292
293 #[test]
294 fn test_feed_version_display() {
295 assert_eq!(FeedVersion::Rss20.to_string(), "rss20");
296 assert_eq!(FeedVersion::Atom10.to_string(), "atom10");
297 }
298}