pub mod atom;
mod common;
mod detect;
pub mod json;
pub mod namespace_detection;
pub mod rss;
pub mod rss10;
use crate::{error::Result, types::ParsedFeed};
pub use common::skip_element;
pub use detect::detect_format;
pub fn parse(data: &[u8]) -> Result<ParsedFeed> {
parse_with_limits(data, crate::ParserLimits::default())
}
pub fn parse_with_limits(data: &[u8], limits: crate::ParserLimits) -> Result<ParsedFeed> {
use crate::types::FeedVersion;
use crate::util::encoding::detect_and_convert;
let (utf8_string, detected_encoding) = detect_and_convert(data)
.unwrap_or_else(|_| (String::from_utf8_lossy(data).into_owned(), "UTF-8"));
let utf8_bytes = utf8_string.as_bytes();
let encoding_label = detected_encoding.to_lowercase();
let version = detect_format(utf8_bytes);
let mut feed = match version {
FeedVersion::Rss20
| FeedVersion::Rss092
| FeedVersion::Rss091Netscape
| FeedVersion::Rss091Userland
| FeedVersion::Rss090 => {
let mut parsed = rss::parse_rss20_with_limits(utf8_bytes, limits)?;
parsed.version = version;
Ok(parsed)
}
FeedVersion::Atom10 | FeedVersion::Atom03 => {
atom::parse_atom10_with_limits(utf8_bytes, limits)
}
FeedVersion::Rss10 => rss10::parse_rss10_with_limits(utf8_bytes, limits),
FeedVersion::JsonFeed10 | FeedVersion::JsonFeed11 => {
json::parse_json_feed_with_limits(utf8_bytes, limits)
}
FeedVersion::Unknown => {
let mut feed = crate::types::ParsedFeed::new();
feed.version = FeedVersion::Unknown;
feed.bozo = true;
feed.bozo_exception = Some("Feed format not recognized".to_string());
Ok(feed)
}
}?;
feed.encoding = encoding_label;
Ok(feed)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_returns_ok_bozo_for_garbage() {
let feed = parse(b"test").unwrap();
assert!(feed.bozo, "unrecognized input must set bozo");
assert_eq!(feed.version, crate::types::FeedVersion::Unknown);
assert!(feed.entries.is_empty());
}
#[test]
fn test_rss091n_version_string() {
let xml = br#"<?xml version="1.0"?>
<!DOCTYPE rss PUBLIC "-//Netscape Communications//DTD RSS 0.91//EN"
"http://my.netscape.com/publish/formats/rss-0.91.dtd">
<rss version="0.91">
<channel><title>T</title><link>http://example.com</link><description>D</description>
<language>en</language></channel></rss>"#;
let feed = parse(xml).unwrap();
assert_eq!(feed.version.as_str(), "rss091n");
}
#[test]
fn test_rss091u_version_string() {
let xml = br#"<?xml version="1.0"?>
<rss version="0.91">
<channel><title>T</title><link>http://example.com</link><description>D</description>
<language>en</language></channel></rss>"#;
let feed = parse(xml).unwrap();
assert_eq!(feed.version.as_str(), "rss091u");
}
#[test]
fn test_rss092_version_string() {
let xml = br#"<?xml version="1.0"?>
<rss version="0.92">
<channel><title>T</title><link>http://example.com</link><description>D</description>
</channel></rss>"#;
let feed = parse(xml).unwrap();
assert_eq!(feed.version.as_str(), "rss092");
}
#[test]
fn test_rss20_version_string_unchanged() {
let xml = br#"<?xml version="1.0"?>
<rss version="2.0">
<channel><title>T</title><link>http://example.com</link><description>D</description>
</channel></rss>"#;
let feed = parse(xml).unwrap();
assert_eq!(feed.version.as_str(), "rss20");
}
}