feedparser_rs/parser/
detect.rs1use crate::types::FeedVersion;
4use quick_xml::{Reader, events::Event};
5
6const MAX_JSON_DETECTION_SIZE: usize = 1024 * 1024; #[must_use]
36pub fn detect_format(data: &[u8]) -> FeedVersion {
37 let first_non_whitespace = data.iter().find(|&&b| !b.is_ascii_whitespace()).copied();
39
40 if first_non_whitespace == Some(b'{') {
41 return detect_json_feed_version(data);
42 }
43
44 detect_xml_format(data)
46}
47
48fn detect_json_feed_version(data: &[u8]) -> FeedVersion {
52 if data.len() > MAX_JSON_DETECTION_SIZE {
54 let truncated = &data[..MAX_JSON_DETECTION_SIZE];
57 return detect_json_version_from_partial(truncated);
60 }
61
62 if let Ok(json) = serde_json::from_slice::<serde_json::Value>(data)
64 && let Some(version) = json.get("version").and_then(|v| v.as_str())
65 {
66 return match version {
67 "https://jsonfeed.org/version/1" => FeedVersion::JsonFeed10,
68 "https://jsonfeed.org/version/1.1" => FeedVersion::JsonFeed11,
69 _ => FeedVersion::Unknown,
70 };
71 }
72 FeedVersion::Unknown
73}
74
75fn detect_json_version_from_partial(data: &[u8]) -> FeedVersion {
77 let data_str = std::str::from_utf8(data).unwrap_or("");
80
81 if data_str.contains("https://jsonfeed.org/version/1.1") {
82 FeedVersion::JsonFeed11
83 } else if data_str.contains("https://jsonfeed.org/version/1") {
84 FeedVersion::JsonFeed10
85 } else {
86 FeedVersion::Unknown
87 }
88}
89
90fn detect_xml_format(data: &[u8]) -> FeedVersion {
92 let mut reader = Reader::from_reader(data);
93 reader.config_mut().trim_text(true);
94
95 let mut buf = Vec::new();
96
97 loop {
99 match reader.read_event_into(&mut buf) {
100 Ok(Event::Start(e) | Event::Empty(e)) => {
101 let name = e.local_name();
102
103 match name.as_ref() {
104 b"rss" => {
105 for attr in e.attributes().flatten() {
107 if attr.key.as_ref() == b"version" {
108 return match attr.value.as_ref() {
109 b"0.90" => FeedVersion::Rss090,
110 b"0.91" => FeedVersion::Rss091,
111 b"0.92" => FeedVersion::Rss092,
112 b"2.0" => FeedVersion::Rss20,
113 _ => FeedVersion::Unknown,
114 };
115 }
116 }
117 return FeedVersion::Rss20;
119 }
120 b"rdf:RDF" | b"RDF" => {
121 return FeedVersion::Rss10;
123 }
124 b"feed" => {
125 for attr in e.attributes().flatten() {
127 if attr.key.as_ref() == b"xmlns" {
128 let ns = attr.value.as_ref();
129 if ns == b"http://www.w3.org/2005/Atom" {
130 return FeedVersion::Atom10;
131 } else if ns == b"http://purl.org/atom/ns#" {
132 return FeedVersion::Atom03;
133 }
134 }
135 }
136 return FeedVersion::Atom10;
138 }
139 _ => {
140 return FeedVersion::Unknown;
142 }
143 }
144 }
145 Ok(Event::Eof) => break,
146 Err(_) => {
147 break;
149 }
150 _ => {}
151 }
152 buf.clear();
153 }
154
155 FeedVersion::Unknown
156}
157
158#[cfg(test)]
159mod tests {
160 use super::*;
161
162 #[test]
163 fn test_detect_rss20() {
164 let xml = br#"<?xml version="1.0"?><rss version="2.0"></rss>"#;
165 assert_eq!(detect_format(xml), FeedVersion::Rss20);
166 }
167
168 #[test]
169 fn test_detect_rss20_no_version() {
170 let xml = br#"<?xml version="1.0"?><rss></rss>"#;
171 assert_eq!(detect_format(xml), FeedVersion::Rss20);
172 }
173
174 #[test]
175 fn test_detect_rss091() {
176 let xml = br#"<rss version="0.91"></rss>"#;
177 assert_eq!(detect_format(xml), FeedVersion::Rss091);
178 }
179
180 #[test]
181 fn test_detect_rss092() {
182 let xml = br#"<rss version="0.92"></rss>"#;
183 assert_eq!(detect_format(xml), FeedVersion::Rss092);
184 }
185
186 #[test]
187 fn test_detect_rss090() {
188 let xml = br#"<rss version="0.90"></rss>"#;
189 assert_eq!(detect_format(xml), FeedVersion::Rss090);
190 }
191
192 #[test]
193 fn test_detect_rss10_rdf() {
194 let xml = br#"<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF>"#;
195 assert_eq!(detect_format(xml), FeedVersion::Rss10);
196 }
197
198 #[test]
199 fn test_detect_rss10_rdf_uppercase() {
200 let xml = br#"<RDF xmlns="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></RDF>"#;
201 assert_eq!(detect_format(xml), FeedVersion::Rss10);
202 }
203
204 #[test]
205 fn test_detect_atom10() {
206 let xml = br#"<feed xmlns="http://www.w3.org/2005/Atom"></feed>"#;
207 assert_eq!(detect_format(xml), FeedVersion::Atom10);
208 }
209
210 #[test]
211 fn test_detect_atom10_no_xmlns() {
212 let xml = br"<feed></feed>";
213 assert_eq!(detect_format(xml), FeedVersion::Atom10);
214 }
215
216 #[test]
217 fn test_detect_atom03() {
218 let xml = br#"<feed xmlns="http://purl.org/atom/ns#"></feed>"#;
219 assert_eq!(detect_format(xml), FeedVersion::Atom03);
220 }
221
222 #[test]
223 fn test_detect_json_feed_10() {
224 let json = br#"{"version": "https://jsonfeed.org/version/1", "title": "Test"}"#;
225 assert_eq!(detect_format(json), FeedVersion::JsonFeed10);
226 }
227
228 #[test]
229 fn test_detect_json_feed_11() {
230 let json = br#"{"version": "https://jsonfeed.org/version/1.1"}"#;
231 assert_eq!(detect_format(json), FeedVersion::JsonFeed11);
232 }
233
234 #[test]
235 fn test_detect_unknown_xml() {
236 let xml = br"<unknown></unknown>";
237 assert_eq!(detect_format(xml), FeedVersion::Unknown);
238 }
239
240 #[test]
241 fn test_detect_invalid_xml() {
242 let xml = b"not xml at all";
243 assert_eq!(detect_format(xml), FeedVersion::Unknown);
244 }
245
246 #[test]
247 fn test_detect_whitespace_before_json() {
248 let json = b" \n {\"version\": \"https://jsonfeed.org/version/1.1\"}";
249 assert_eq!(detect_format(json), FeedVersion::JsonFeed11);
250 }
251
252 #[test]
253 fn test_detect_whitespace_before_xml() {
254 let xml = b" \n <?xml version=\"1.0\"?><rss version=\"2.0\"></rss>";
255 assert_eq!(detect_format(xml), FeedVersion::Rss20);
256 }
257
258 #[test]
259 fn test_detect_empty_data() {
260 let data = b"";
261 assert_eq!(detect_format(data), FeedVersion::Unknown);
262 }
263
264 #[test]
265 fn test_detect_json_version_from_partial() {
266 use super::detect_json_version_from_partial;
268
269 let json_11 = br#"{"version": "https://jsonfeed.org/version/1.1", "title": "Test"}"#;
270 assert_eq!(
271 detect_json_version_from_partial(json_11),
272 FeedVersion::JsonFeed11
273 );
274
275 let json_10 = br#"{"version": "https://jsonfeed.org/version/1", "title": "Test"}"#;
276 assert_eq!(
277 detect_json_version_from_partial(json_10),
278 FeedVersion::JsonFeed10
279 );
280
281 let unknown = br#"{"title": "No version field"}"#;
282 assert_eq!(
283 detect_json_version_from_partial(unknown),
284 FeedVersion::Unknown
285 );
286 }
287}