feedparser_rs/
lib.rs

1//! feedparser-rs-core: High-performance RSS/Atom/JSON Feed parser
2//!
3//! This crate provides a pure Rust implementation of feed parsing with
4//! compatibility for Python's feedparser library.
5//!
6//! # Examples
7//!
8//! ```
9//! use feedparser_rs::parse;
10//!
11//! let xml = r#"
12//!     <?xml version="1.0"?>
13//!     <rss version="2.0">
14//!         <channel>
15//!             <title>Example Feed</title>
16//!         </channel>
17//!     </rss>
18//! "#;
19//!
20//! // Parsing will be fully implemented in Phase 2
21//! let feed = parse(xml.as_bytes()).unwrap();
22//! assert!(feed.bozo == false);
23//! ```
24//!
25//! # Features
26//!
27//! - Parse RSS 0.9x, 1.0, 2.0
28//! - Parse Atom 0.3, 1.0
29//! - Parse JSON Feed 1.0, 1.1
30//! - Tolerant parsing with bozo flag
31//! - Multi-format date parsing
32//! - HTML sanitization
33//! - Encoding detection
34//!
35//! # Architecture
36//!
37//! The library provides core data structures like [`ParsedFeed`], [`Entry`], and [`FeedMeta`]
38//! for representing parsed feed data. The main entry point is the [`parse`] function which
39//! automatically detects feed format and returns parsed results.
40
41mod compat;
42mod error;
43#[cfg(feature = "http")]
44/// HTTP client module for fetching feeds from URLs
45pub mod http;
46mod limits;
47/// Namespace handlers for extended feed formats
48pub mod namespace;
49mod options;
50mod parser;
51
52/// Type definitions for feed data structures
53///
54/// This module contains all the data types used to represent parsed feeds,
55/// including the main `ParsedFeed` struct and related types.
56pub mod types;
57
58/// Utility functions for feed parsing
59///
60/// This module provides helper functions for date parsing, HTML sanitization,
61/// and encoding detection that are useful for feed processing.
62pub mod util;
63
64pub use error::{FeedError, Result};
65pub use limits::{LimitError, ParserLimits};
66pub use options::ParseOptions;
67pub use parser::{detect_format, parse, parse_with_limits};
68pub use types::{
69    Content, Enclosure, Entry, FeedMeta, FeedVersion, Generator, Image, ItunesCategory,
70    ItunesEntryMeta, ItunesFeedMeta, ItunesOwner, LimitedCollectionExt, Link, ParsedFeed, Person,
71    PodcastFunding, PodcastMeta, PodcastPerson, PodcastTranscript, Source, Tag, TextConstruct,
72    TextType, parse_duration, parse_explicit,
73};
74
75pub use namespace::syndication::{SyndicationMeta, UpdatePeriod};
76
77#[cfg(feature = "http")]
78pub use http::{FeedHttpClient, FeedHttpResponse};
79
80/// Parse feed from HTTP/HTTPS URL
81///
82/// Fetches the feed from the given URL and parses it. Supports conditional GET
83/// using `ETag` and `Last-Modified` headers for bandwidth-efficient caching.
84///
85/// # Arguments
86///
87/// * `url` - HTTP or HTTPS URL to fetch
88/// * `etag` - Optional `ETag` from previous fetch for conditional GET
89/// * `modified` - Optional `Last-Modified` timestamp from previous fetch
90/// * `user_agent` - Optional custom User-Agent header
91///
92/// # Returns
93///
94/// Returns a `ParsedFeed` with HTTP metadata fields populated:
95/// - `status`: HTTP status code (200, 304, etc.)
96/// - `href`: Final URL after redirects
97/// - `etag`: `ETag` header value (for next request)
98/// - `modified`: `Last-Modified` header value (for next request)
99/// - `headers`: Full HTTP response headers
100///
101/// On 304 Not Modified, returns a feed with empty entries but status=304.
102///
103/// # Errors
104///
105/// Returns `FeedError::Http` if:
106/// - Network error occurs
107/// - URL is invalid
108/// - HTTP status is 4xx or 5xx (except 304)
109///
110/// # Examples
111///
112/// ```no_run
113/// use feedparser_rs::parse_url;
114///
115/// // First fetch
116/// let feed = parse_url("https://example.com/feed.xml", None, None, None).unwrap();
117/// println!("Title: {:?}", feed.feed.title);
118/// println!("ETag: {:?}", feed.etag);
119///
120/// // Subsequent fetch with caching
121/// let feed2 = parse_url(
122///     "https://example.com/feed.xml",
123///     feed.etag.as_deref(),
124///     feed.modified.as_deref(),
125///     None
126/// ).unwrap();
127///
128/// if feed2.status == Some(304) {
129///     println!("Feed not modified, use cached version");
130/// }
131/// ```
132#[cfg(feature = "http")]
133pub fn parse_url(
134    url: &str,
135    etag: Option<&str>,
136    modified: Option<&str>,
137    user_agent: Option<&str>,
138) -> Result<ParsedFeed> {
139    use http::FeedHttpClient;
140
141    // Create HTTP client
142    let mut client = FeedHttpClient::new()?;
143    if let Some(agent) = user_agent {
144        client = client.with_user_agent(agent.to_string());
145    }
146
147    // Fetch feed
148    let response = client.get(url, etag, modified, None)?;
149
150    // Handle 304 Not Modified
151    if response.status == 304 {
152        return Ok(ParsedFeed {
153            status: Some(304),
154            href: Some(response.url),
155            etag: etag.map(String::from),
156            modified: modified.map(String::from),
157            #[cfg(feature = "http")]
158            headers: Some(response.headers),
159            encoding: String::from("utf-8"),
160            ..Default::default()
161        });
162    }
163
164    // Handle error status codes
165    if response.status >= 400 {
166        return Err(FeedError::Http {
167            message: format!("HTTP {} for URL: {}", response.status, response.url),
168        });
169    }
170
171    // Parse feed from response body
172    let mut feed = parse(&response.body)?;
173
174    // Add HTTP metadata
175    feed.status = Some(response.status);
176    feed.href = Some(response.url);
177    feed.etag = response.etag;
178    feed.modified = response.last_modified;
179    #[cfg(feature = "http")]
180    {
181        feed.headers = Some(response.headers);
182    }
183
184    // Override encoding if HTTP header specifies
185    if let Some(http_encoding) = response.encoding {
186        feed.encoding = http_encoding;
187    }
188
189    Ok(feed)
190}
191
192/// Parse feed from URL with custom parser limits
193///
194/// Like `parse_url` but allows specifying custom limits for resource control.
195///
196/// # Errors
197///
198/// Returns `FeedError::Http` if the request fails or `FeedError::Parse` if parsing fails.
199///
200/// # Examples
201///
202/// ```no_run
203/// use feedparser_rs::{parse_url_with_limits, ParserLimits};
204///
205/// let limits = ParserLimits::strict();
206/// let feed = parse_url_with_limits(
207///     "https://example.com/feed.xml",
208///     None,
209///     None,
210///     None,
211///     limits
212/// ).unwrap();
213/// ```
214#[cfg(feature = "http")]
215pub fn parse_url_with_limits(
216    url: &str,
217    etag: Option<&str>,
218    modified: Option<&str>,
219    user_agent: Option<&str>,
220    limits: ParserLimits,
221) -> Result<ParsedFeed> {
222    use http::FeedHttpClient;
223
224    let mut client = FeedHttpClient::new()?;
225    if let Some(agent) = user_agent {
226        client = client.with_user_agent(agent.to_string());
227    }
228
229    let response = client.get(url, etag, modified, None)?;
230
231    if response.status == 304 {
232        return Ok(ParsedFeed {
233            status: Some(304),
234            href: Some(response.url),
235            etag: etag.map(String::from),
236            modified: modified.map(String::from),
237            #[cfg(feature = "http")]
238            headers: Some(response.headers),
239            encoding: String::from("utf-8"),
240            ..Default::default()
241        });
242    }
243
244    if response.status >= 400 {
245        return Err(FeedError::Http {
246            message: format!("HTTP {} for URL: {}", response.status, response.url),
247        });
248    }
249
250    let mut feed = parse_with_limits(&response.body, limits)?;
251
252    feed.status = Some(response.status);
253    feed.href = Some(response.url);
254    feed.etag = response.etag;
255    feed.modified = response.last_modified;
256    #[cfg(feature = "http")]
257    {
258        feed.headers = Some(response.headers);
259    }
260
261    if let Some(http_encoding) = response.encoding {
262        feed.encoding = http_encoding;
263    }
264
265    Ok(feed)
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271
272    #[test]
273    fn test_parse_basic() {
274        let xml = r#"
275            <?xml version="1.0"?>
276            <rss version="2.0">
277                <channel>
278                    <title>Test</title>
279                </channel>
280            </rss>
281        "#;
282
283        let result = parse(xml.as_bytes());
284        assert!(result.is_ok());
285    }
286
287    #[test]
288    fn test_parsed_feed_new() {
289        let feed = ParsedFeed::new();
290        assert_eq!(feed.encoding, "utf-8");
291        assert!(!feed.bozo);
292        assert_eq!(feed.version, FeedVersion::Unknown);
293    }
294
295    #[test]
296    fn test_feed_version_display() {
297        assert_eq!(FeedVersion::Rss20.to_string(), "rss20");
298        assert_eq!(FeedVersion::Atom10.to_string(), "atom10");
299    }
300}