feedparser_rs/lib.rs
1//! feedparser-rs-core: High-performance RSS/Atom/JSON Feed parser
2//!
3//! This crate provides a pure Rust implementation of feed parsing with
4//! compatibility for Python's feedparser library.
5//!
6//! # Examples
7//!
8//! ```
9//! use feedparser_rs::parse;
10//!
11//! let xml = r#"
12//! <?xml version="1.0"?>
13//! <rss version="2.0">
14//! <channel>
15//! <title>Example Feed</title>
16//! </channel>
17//! </rss>
18//! "#;
19//!
20//! // Parsing will be fully implemented in Phase 2
21//! let feed = parse(xml.as_bytes()).unwrap();
22//! assert!(feed.bozo == false);
23//! ```
24//!
25//! # Features
26//!
27//! - Parse RSS 0.9x, 1.0, 2.0
28//! - Parse Atom 0.3, 1.0
29//! - Parse JSON Feed 1.0, 1.1
30//! - Tolerant parsing with bozo flag
31//! - Multi-format date parsing
32//! - HTML sanitization
33//! - Encoding detection
34//!
35//! # Architecture
36//!
37//! The library provides core data structures like [`ParsedFeed`], [`Entry`], and [`FeedMeta`]
38//! for representing parsed feed data. The main entry point is the [`parse`] function which
39//! automatically detects feed format and returns parsed results.
40
41mod compat;
42mod error;
43#[cfg(feature = "http")]
44/// HTTP client module for fetching feeds from URLs
45pub mod http;
46mod limits;
47/// Namespace handlers for extended feed formats
48pub mod namespace;
49mod parser;
50
51/// Type definitions for feed data structures
52///
53/// This module contains all the data types used to represent parsed feeds,
54/// including the main `ParsedFeed` struct and related types.
55pub mod types;
56
57/// Utility functions for feed parsing
58///
59/// This module provides helper functions for date parsing, HTML sanitization,
60/// and encoding detection that are useful for feed processing.
61pub mod util;
62
63pub use error::{FeedError, Result};
64pub use limits::{LimitError, ParserLimits};
65pub use parser::{detect_format, parse, parse_with_limits};
66pub use types::{
67 Content, Enclosure, Entry, FeedMeta, FeedVersion, Generator, Image, ItunesCategory,
68 ItunesEntryMeta, ItunesFeedMeta, ItunesOwner, LimitedCollectionExt, Link, ParsedFeed, Person,
69 PodcastFunding, PodcastMeta, PodcastPerson, PodcastTranscript, Source, Tag, TextConstruct,
70 TextType, parse_duration, parse_explicit,
71};
72
73#[cfg(feature = "http")]
74pub use http::{FeedHttpClient, FeedHttpResponse};
75
76/// Parse feed from HTTP/HTTPS URL
77///
78/// Fetches the feed from the given URL and parses it. Supports conditional GET
79/// using `ETag` and `Last-Modified` headers for bandwidth-efficient caching.
80///
81/// # Arguments
82///
83/// * `url` - HTTP or HTTPS URL to fetch
84/// * `etag` - Optional `ETag` from previous fetch for conditional GET
85/// * `modified` - Optional `Last-Modified` timestamp from previous fetch
86/// * `user_agent` - Optional custom User-Agent header
87///
88/// # Returns
89///
90/// Returns a `ParsedFeed` with HTTP metadata fields populated:
91/// - `status`: HTTP status code (200, 304, etc.)
92/// - `href`: Final URL after redirects
93/// - `etag`: `ETag` header value (for next request)
94/// - `modified`: `Last-Modified` header value (for next request)
95/// - `headers`: Full HTTP response headers
96///
97/// On 304 Not Modified, returns a feed with empty entries but status=304.
98///
99/// # Errors
100///
101/// Returns `FeedError::Http` if:
102/// - Network error occurs
103/// - URL is invalid
104/// - HTTP status is 4xx or 5xx (except 304)
105///
106/// # Examples
107///
108/// ```no_run
109/// use feedparser_rs::parse_url;
110///
111/// // First fetch
112/// let feed = parse_url("https://example.com/feed.xml", None, None, None).unwrap();
113/// println!("Title: {:?}", feed.feed.title);
114/// println!("ETag: {:?}", feed.etag);
115///
116/// // Subsequent fetch with caching
117/// let feed2 = parse_url(
118/// "https://example.com/feed.xml",
119/// feed.etag.as_deref(),
120/// feed.modified.as_deref(),
121/// None
122/// ).unwrap();
123///
124/// if feed2.status == Some(304) {
125/// println!("Feed not modified, use cached version");
126/// }
127/// ```
128#[cfg(feature = "http")]
129pub fn parse_url(
130 url: &str,
131 etag: Option<&str>,
132 modified: Option<&str>,
133 user_agent: Option<&str>,
134) -> Result<ParsedFeed> {
135 use http::FeedHttpClient;
136
137 // Create HTTP client
138 let mut client = FeedHttpClient::new()?;
139 if let Some(agent) = user_agent {
140 client = client.with_user_agent(agent.to_string());
141 }
142
143 // Fetch feed
144 let response = client.get(url, etag, modified, None)?;
145
146 // Handle 304 Not Modified
147 if response.status == 304 {
148 return Ok(ParsedFeed {
149 status: Some(304),
150 href: Some(response.url),
151 etag: etag.map(String::from),
152 modified: modified.map(String::from),
153 #[cfg(feature = "http")]
154 headers: Some(response.headers),
155 encoding: String::from("utf-8"),
156 ..Default::default()
157 });
158 }
159
160 // Handle error status codes
161 if response.status >= 400 {
162 return Err(FeedError::Http {
163 message: format!("HTTP {} for URL: {}", response.status, response.url),
164 });
165 }
166
167 // Parse feed from response body
168 let mut feed = parse(&response.body)?;
169
170 // Add HTTP metadata
171 feed.status = Some(response.status);
172 feed.href = Some(response.url);
173 feed.etag = response.etag;
174 feed.modified = response.last_modified;
175 #[cfg(feature = "http")]
176 {
177 feed.headers = Some(response.headers);
178 }
179
180 // Override encoding if HTTP header specifies
181 if let Some(http_encoding) = response.encoding {
182 feed.encoding = http_encoding;
183 }
184
185 Ok(feed)
186}
187
188/// Parse feed from URL with custom parser limits
189///
190/// Like `parse_url` but allows specifying custom limits for resource control.
191///
192/// # Errors
193///
194/// Returns `FeedError::Http` if the request fails or `FeedError::Parse` if parsing fails.
195///
196/// # Examples
197///
198/// ```no_run
199/// use feedparser_rs::{parse_url_with_limits, ParserLimits};
200///
201/// let limits = ParserLimits::strict();
202/// let feed = parse_url_with_limits(
203/// "https://example.com/feed.xml",
204/// None,
205/// None,
206/// None,
207/// limits
208/// ).unwrap();
209/// ```
210#[cfg(feature = "http")]
211pub fn parse_url_with_limits(
212 url: &str,
213 etag: Option<&str>,
214 modified: Option<&str>,
215 user_agent: Option<&str>,
216 limits: ParserLimits,
217) -> Result<ParsedFeed> {
218 use http::FeedHttpClient;
219
220 let mut client = FeedHttpClient::new()?;
221 if let Some(agent) = user_agent {
222 client = client.with_user_agent(agent.to_string());
223 }
224
225 let response = client.get(url, etag, modified, None)?;
226
227 if response.status == 304 {
228 return Ok(ParsedFeed {
229 status: Some(304),
230 href: Some(response.url),
231 etag: etag.map(String::from),
232 modified: modified.map(String::from),
233 #[cfg(feature = "http")]
234 headers: Some(response.headers),
235 encoding: String::from("utf-8"),
236 ..Default::default()
237 });
238 }
239
240 if response.status >= 400 {
241 return Err(FeedError::Http {
242 message: format!("HTTP {} for URL: {}", response.status, response.url),
243 });
244 }
245
246 let mut feed = parse_with_limits(&response.body, limits)?;
247
248 feed.status = Some(response.status);
249 feed.href = Some(response.url);
250 feed.etag = response.etag;
251 feed.modified = response.last_modified;
252 #[cfg(feature = "http")]
253 {
254 feed.headers = Some(response.headers);
255 }
256
257 if let Some(http_encoding) = response.encoding {
258 feed.encoding = http_encoding;
259 }
260
261 Ok(feed)
262}
263
264#[cfg(test)]
265mod tests {
266 use super::*;
267
268 #[test]
269 fn test_parse_basic() {
270 let xml = r#"
271 <?xml version="1.0"?>
272 <rss version="2.0">
273 <channel>
274 <title>Test</title>
275 </channel>
276 </rss>
277 "#;
278
279 let result = parse(xml.as_bytes());
280 assert!(result.is_ok());
281 }
282
283 #[test]
284 fn test_parsed_feed_new() {
285 let feed = ParsedFeed::new();
286 assert_eq!(feed.encoding, "utf-8");
287 assert!(!feed.bozo);
288 assert_eq!(feed.version, FeedVersion::Unknown);
289 }
290
291 #[test]
292 fn test_feed_version_display() {
293 assert_eq!(FeedVersion::Rss20.to_string(), "rss20");
294 assert_eq!(FeedVersion::Atom10.to_string(), "atom10");
295 }
296}