feedparser_rs/
lib.rs

1//! feedparser-rs-core: High-performance RSS/Atom/JSON Feed parser
2//!
3//! This crate provides a pure Rust implementation of feed parsing with
4//! compatibility for Python's feedparser library.
5//!
6//! # Examples
7//!
8//! ```
9//! use feedparser_rs::parse;
10//!
11//! let xml = r#"
12//!     <?xml version="1.0"?>
13//!     <rss version="2.0">
14//!         <channel>
15//!             <title>Example Feed</title>
16//!         </channel>
17//!     </rss>
18//! "#;
19//!
20//! // Parsing will be fully implemented in Phase 2
21//! let feed = parse(xml.as_bytes()).unwrap();
22//! assert!(feed.bozo == false);
23//! ```
24//!
25//! # Features
26//!
27//! - Parse RSS 0.9x, 1.0, 2.0
28//! - Parse Atom 0.3, 1.0
29//! - Parse JSON Feed 1.0, 1.1
30//! - Tolerant parsing with bozo flag
31//! - Multi-format date parsing
32//! - HTML sanitization
33//! - Encoding detection
34//!
35//! # Architecture
36//!
37//! The library provides core data structures like [`ParsedFeed`], [`Entry`], and [`FeedMeta`]
38//! for representing parsed feed data. The main entry point is the [`parse`] function which
39//! automatically detects feed format and returns parsed results.
40
41mod compat;
42mod error;
43#[cfg(feature = "http")]
44/// HTTP client module for fetching feeds from URLs
45pub mod http;
46mod limits;
47/// Namespace handlers for extended feed formats
48pub mod namespace;
49mod options;
50mod parser;
51
52/// Type definitions for feed data structures
53///
54/// This module contains all the data types used to represent parsed feeds,
55/// including the main `ParsedFeed` struct and related types.
56pub mod types;
57
58/// Utility functions for feed parsing
59///
60/// This module provides helper functions for date parsing, HTML sanitization,
61/// and encoding detection that are useful for feed processing.
62pub mod util;
63
64pub use error::{FeedError, Result};
65pub use limits::{LimitError, ParserLimits};
66pub use options::ParseOptions;
67pub use parser::{detect_format, parse, parse_with_limits};
68pub use types::{
69    Content, Enclosure, Entry, FeedMeta, FeedVersion, Generator, Image, ItunesCategory,
70    ItunesEntryMeta, ItunesFeedMeta, ItunesOwner, LimitedCollectionExt, Link, ParsedFeed, Person,
71    PodcastFunding, PodcastMeta, PodcastPerson, PodcastTranscript, Source, Tag, TextConstruct,
72    TextType, parse_duration, parse_explicit,
73};
74
75#[cfg(feature = "http")]
76pub use http::{FeedHttpClient, FeedHttpResponse};
77
78/// Parse feed from HTTP/HTTPS URL
79///
80/// Fetches the feed from the given URL and parses it. Supports conditional GET
81/// using `ETag` and `Last-Modified` headers for bandwidth-efficient caching.
82///
83/// # Arguments
84///
85/// * `url` - HTTP or HTTPS URL to fetch
86/// * `etag` - Optional `ETag` from previous fetch for conditional GET
87/// * `modified` - Optional `Last-Modified` timestamp from previous fetch
88/// * `user_agent` - Optional custom User-Agent header
89///
90/// # Returns
91///
92/// Returns a `ParsedFeed` with HTTP metadata fields populated:
93/// - `status`: HTTP status code (200, 304, etc.)
94/// - `href`: Final URL after redirects
95/// - `etag`: `ETag` header value (for next request)
96/// - `modified`: `Last-Modified` header value (for next request)
97/// - `headers`: Full HTTP response headers
98///
99/// On 304 Not Modified, returns a feed with empty entries but status=304.
100///
101/// # Errors
102///
103/// Returns `FeedError::Http` if:
104/// - Network error occurs
105/// - URL is invalid
106/// - HTTP status is 4xx or 5xx (except 304)
107///
108/// # Examples
109///
110/// ```no_run
111/// use feedparser_rs::parse_url;
112///
113/// // First fetch
114/// let feed = parse_url("https://example.com/feed.xml", None, None, None).unwrap();
115/// println!("Title: {:?}", feed.feed.title);
116/// println!("ETag: {:?}", feed.etag);
117///
118/// // Subsequent fetch with caching
119/// let feed2 = parse_url(
120///     "https://example.com/feed.xml",
121///     feed.etag.as_deref(),
122///     feed.modified.as_deref(),
123///     None
124/// ).unwrap();
125///
126/// if feed2.status == Some(304) {
127///     println!("Feed not modified, use cached version");
128/// }
129/// ```
130#[cfg(feature = "http")]
131pub fn parse_url(
132    url: &str,
133    etag: Option<&str>,
134    modified: Option<&str>,
135    user_agent: Option<&str>,
136) -> Result<ParsedFeed> {
137    use http::FeedHttpClient;
138
139    // Create HTTP client
140    let mut client = FeedHttpClient::new()?;
141    if let Some(agent) = user_agent {
142        client = client.with_user_agent(agent.to_string());
143    }
144
145    // Fetch feed
146    let response = client.get(url, etag, modified, None)?;
147
148    // Handle 304 Not Modified
149    if response.status == 304 {
150        return Ok(ParsedFeed {
151            status: Some(304),
152            href: Some(response.url),
153            etag: etag.map(String::from),
154            modified: modified.map(String::from),
155            #[cfg(feature = "http")]
156            headers: Some(response.headers),
157            encoding: String::from("utf-8"),
158            ..Default::default()
159        });
160    }
161
162    // Handle error status codes
163    if response.status >= 400 {
164        return Err(FeedError::Http {
165            message: format!("HTTP {} for URL: {}", response.status, response.url),
166        });
167    }
168
169    // Parse feed from response body
170    let mut feed = parse(&response.body)?;
171
172    // Add HTTP metadata
173    feed.status = Some(response.status);
174    feed.href = Some(response.url);
175    feed.etag = response.etag;
176    feed.modified = response.last_modified;
177    #[cfg(feature = "http")]
178    {
179        feed.headers = Some(response.headers);
180    }
181
182    // Override encoding if HTTP header specifies
183    if let Some(http_encoding) = response.encoding {
184        feed.encoding = http_encoding;
185    }
186
187    Ok(feed)
188}
189
190/// Parse feed from URL with custom parser limits
191///
192/// Like `parse_url` but allows specifying custom limits for resource control.
193///
194/// # Errors
195///
196/// Returns `FeedError::Http` if the request fails or `FeedError::Parse` if parsing fails.
197///
198/// # Examples
199///
200/// ```no_run
201/// use feedparser_rs::{parse_url_with_limits, ParserLimits};
202///
203/// let limits = ParserLimits::strict();
204/// let feed = parse_url_with_limits(
205///     "https://example.com/feed.xml",
206///     None,
207///     None,
208///     None,
209///     limits
210/// ).unwrap();
211/// ```
212#[cfg(feature = "http")]
213pub fn parse_url_with_limits(
214    url: &str,
215    etag: Option<&str>,
216    modified: Option<&str>,
217    user_agent: Option<&str>,
218    limits: ParserLimits,
219) -> Result<ParsedFeed> {
220    use http::FeedHttpClient;
221
222    let mut client = FeedHttpClient::new()?;
223    if let Some(agent) = user_agent {
224        client = client.with_user_agent(agent.to_string());
225    }
226
227    let response = client.get(url, etag, modified, None)?;
228
229    if response.status == 304 {
230        return Ok(ParsedFeed {
231            status: Some(304),
232            href: Some(response.url),
233            etag: etag.map(String::from),
234            modified: modified.map(String::from),
235            #[cfg(feature = "http")]
236            headers: Some(response.headers),
237            encoding: String::from("utf-8"),
238            ..Default::default()
239        });
240    }
241
242    if response.status >= 400 {
243        return Err(FeedError::Http {
244            message: format!("HTTP {} for URL: {}", response.status, response.url),
245        });
246    }
247
248    let mut feed = parse_with_limits(&response.body, limits)?;
249
250    feed.status = Some(response.status);
251    feed.href = Some(response.url);
252    feed.etag = response.etag;
253    feed.modified = response.last_modified;
254    #[cfg(feature = "http")]
255    {
256        feed.headers = Some(response.headers);
257    }
258
259    if let Some(http_encoding) = response.encoding {
260        feed.encoding = http_encoding;
261    }
262
263    Ok(feed)
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269
270    #[test]
271    fn test_parse_basic() {
272        let xml = r#"
273            <?xml version="1.0"?>
274            <rss version="2.0">
275                <channel>
276                    <title>Test</title>
277                </channel>
278            </rss>
279        "#;
280
281        let result = parse(xml.as_bytes());
282        assert!(result.is_ok());
283    }
284
285    #[test]
286    fn test_parsed_feed_new() {
287        let feed = ParsedFeed::new();
288        assert_eq!(feed.encoding, "utf-8");
289        assert!(!feed.bozo);
290        assert_eq!(feed.version, FeedVersion::Unknown);
291    }
292
293    #[test]
294    fn test_feed_version_display() {
295        assert_eq!(FeedVersion::Rss20.to_string(), "rss20");
296        assert_eq!(FeedVersion::Atom10.to_string(), "atom10");
297    }
298}