feedparser_rs/parser/
mod.rs1pub mod atom;
2mod common;
3mod detect;
4pub mod json;
5pub mod namespace_detection;
6pub mod rss;
7pub mod rss10;
8
9use crate::{error::Result, types::ParsedFeed};
10
11pub use common::skip_element;
12pub use detect::detect_format;
13
14pub fn parse(data: &[u8]) -> Result<ParsedFeed> {
43 parse_with_limits(data, crate::ParserLimits::default())
44}
45
46pub fn parse_with_limits(data: &[u8], limits: crate::ParserLimits) -> Result<ParsedFeed> {
67 use crate::types::FeedVersion;
68 use crate::util::encoding::detect_and_convert;
69
70 let (utf8_string, detected_encoding) = detect_and_convert(data)
73 .unwrap_or_else(|_| (String::from_utf8_lossy(data).into_owned(), "UTF-8"));
74
75 let utf8_bytes = utf8_string.as_bytes();
76 let encoding_label = detected_encoding.to_lowercase();
77
78 let version = detect_format(utf8_bytes);
80
81 let mut feed = match version {
83 FeedVersion::Rss20
85 | FeedVersion::Rss092
86 | FeedVersion::Rss091Netscape
87 | FeedVersion::Rss091Userland
88 | FeedVersion::Rss090 => {
89 let mut parsed = rss::parse_rss20_with_limits(utf8_bytes, limits)?;
90 parsed.version = version;
91 Ok(parsed)
92 }
93
94 FeedVersion::Atom10 | FeedVersion::Atom03 => {
96 atom::parse_atom10_with_limits(utf8_bytes, limits)
97 }
98
99 FeedVersion::Rss10 => rss10::parse_rss10_with_limits(utf8_bytes, limits),
101
102 FeedVersion::JsonFeed10 | FeedVersion::JsonFeed11 => {
104 json::parse_json_feed_with_limits(utf8_bytes, limits)
105 }
106
107 FeedVersion::Unknown => {
111 let mut feed = crate::types::ParsedFeed::new();
112 feed.version = FeedVersion::Unknown;
113 feed.bozo = true;
114 feed.bozo_exception = Some("Feed format not recognized".to_string());
115 Ok(feed)
116 }
117 }?;
118
119 feed.encoding = encoding_label;
120 Ok(feed)
121}
122
123#[cfg(test)]
124mod tests {
125 use super::*;
126
127 #[test]
128 fn test_parse_returns_ok_bozo_for_garbage() {
129 let feed = parse(b"test").unwrap();
130 assert!(feed.bozo, "unrecognized input must set bozo");
131 assert_eq!(feed.version, crate::types::FeedVersion::Unknown);
132 assert!(feed.entries.is_empty());
133 }
134
135 #[test]
136 fn test_rss091n_version_string() {
137 let xml = br#"<?xml version="1.0"?>
139<!DOCTYPE rss PUBLIC "-//Netscape Communications//DTD RSS 0.91//EN"
140 "http://my.netscape.com/publish/formats/rss-0.91.dtd">
141<rss version="0.91">
142<channel><title>T</title><link>http://example.com</link><description>D</description>
143<language>en</language></channel></rss>"#;
144 let feed = parse(xml).unwrap();
145 assert_eq!(feed.version.as_str(), "rss091n");
146 }
147
148 #[test]
149 fn test_rss091u_version_string() {
150 let xml = br#"<?xml version="1.0"?>
152<rss version="0.91">
153<channel><title>T</title><link>http://example.com</link><description>D</description>
154<language>en</language></channel></rss>"#;
155 let feed = parse(xml).unwrap();
156 assert_eq!(feed.version.as_str(), "rss091u");
157 }
158
159 #[test]
160 fn test_rss092_version_string() {
161 let xml = br#"<?xml version="1.0"?>
163<rss version="0.92">
164<channel><title>T</title><link>http://example.com</link><description>D</description>
165</channel></rss>"#;
166 let feed = parse(xml).unwrap();
167 assert_eq!(feed.version.as_str(), "rss092");
168 }
169
170 #[test]
171 fn test_rss20_version_string_unchanged() {
172 let xml = br#"<?xml version="1.0"?>
174<rss version="2.0">
175<channel><title>T</title><link>http://example.com</link><description>D</description>
176</channel></rss>"#;
177 let feed = parse(xml).unwrap();
178 assert_eq!(feed.version.as_str(), "rss20");
179 }
180}