1use std::error::Error;
2use std::fmt;
3use std::fmt::Debug;
4use std::hash::Hasher;
5use std::io::{BufRead, BufReader, Read};
6
7use chrono::{DateTime, Utc};
8use siphasher::sip128::{Hasher128, SipHasher};
9
10use crate::model;
11use crate::parser::util::{IdGenerator, TimestampParser};
12use crate::xml;
13use crate::xml::NS;
14
15mod atom;
16mod json;
17mod rss0;
18mod rss1;
19mod rss2;
20
21pub(crate) mod itunes;
22pub(crate) mod mediarss;
23pub(crate) mod util;
24
25pub type ParseFeedResult<T> = Result<T, ParseFeedError>;
26
27#[derive(Debug)]
29pub enum ParseFeedError {
30 ParseError(ParseErrorKind),
32 IoError(std::io::Error),
34 JsonSerde(serde_json::error::Error),
36 JsonUnsupportedVersion(String),
38 XmlReader(xml::XmlError),
40}
41
42impl From<serde_json::error::Error> for ParseFeedError {
43 fn from(err: serde_json::error::Error) -> Self {
44 ParseFeedError::JsonSerde(err)
45 }
46}
47
48impl From<std::io::Error> for ParseFeedError {
49 fn from(err: std::io::Error) -> Self {
50 ParseFeedError::IoError(err)
51 }
52}
53
54impl From<xml::XmlError> for ParseFeedError {
55 fn from(err: xml::XmlError) -> Self {
56 ParseFeedError::XmlReader(err)
57 }
58}
59
60impl fmt::Display for ParseFeedError {
61 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
62 match self {
63 ParseFeedError::ParseError(pe) => write!(f, "unable to parse feed: {}", pe),
64 ParseFeedError::IoError(ie) => write!(f, "unable to read feed: {}", ie),
65 ParseFeedError::JsonSerde(je) => write!(f, "unable to parse JSON: {}", je),
66 ParseFeedError::JsonUnsupportedVersion(version) => write!(f, "unsupported version: {}", version),
67 ParseFeedError::XmlReader(xe) => write!(f, "unable to parse XML: {}", xe),
68 }
69 }
70}
71
72impl Error for ParseFeedError {
73 fn source(&self) -> Option<&(dyn Error + 'static)> {
74 match self {
75 ParseFeedError::IoError(ie) => Some(ie),
76 ParseFeedError::JsonSerde(je) => Some(je),
77 ParseFeedError::XmlReader(xe) => Some(xe),
78 _ => None,
79 }
80 }
81}
82
83#[derive(Debug)]
85pub enum ParseErrorKind {
86 NoFeedRoot,
88 UnknownMimeType(String),
90 MissingContent(&'static str),
92}
93
94impl fmt::Display for ParseErrorKind {
95 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96 match self {
97 ParseErrorKind::NoFeedRoot => f.write_str("no root element"),
98 ParseErrorKind::UnknownMimeType(mime) => write!(f, "unsupported content type {}", mime),
99 ParseErrorKind::MissingContent(elem) => write!(f, "missing content element {}", elem),
100 }
101 }
102}
103
104pub struct Parser {
106 base_uri: Option<String>,
107 id_generator: Box<IdGenerator>,
108 sanitize_content: bool,
109 timestamp_parser: Box<TimestampParser>,
110}
111
112impl Parser {
113 pub fn parse<R: Read>(&self, source: R) -> ParseFeedResult<model::Feed> {
144 let mut input = BufReader::new(source);
146
147 input.fill_buf()?;
149 let first_char = input.buffer().iter().find(|b| **b == b'<' || **b == b'{').map(|b| *b as char);
150 let result = match first_char {
151 Some('<') => self.parse_xml(input),
152
153 Some('{') => self.parse_json(input),
154
155 _ => Err(ParseFeedError::ParseError(ParseErrorKind::NoFeedRoot)),
156 };
157
158 if let Ok(mut feed) = result {
160 assign_missing_ids(&self.id_generator, &mut feed, self.base_uri.as_deref());
161
162 Ok(feed)
163 } else {
164 result
165 }
166 }
167
168 fn parse_json<R: BufRead>(&self, source: R) -> ParseFeedResult<model::Feed> {
170 json::parse(self, source)
171 }
172
173 fn parse_timestamp(&self, text: &str) -> Option<DateTime<Utc>> {
175 (self.timestamp_parser)(text)
176 }
177
178 fn parse_xml<R: BufRead>(&self, source: R) -> ParseFeedResult<model::Feed> {
180 let element_source = xml::ElementSource::new(source, self.base_uri.as_deref())?;
182 if let Ok(Some(root)) = element_source.root() {
183 let version = root.attr_value("version");
185 match (root.name.as_str(), version.as_deref()) {
186 ("feed", _) => {
187 element_source.set_default_default_namespace(NS::Atom);
188 return atom::parse_feed(self, root);
189 }
190 ("entry", _) => {
191 element_source.set_default_default_namespace(NS::Atom);
192 return atom::parse_entry(self, root);
193 }
194 ("rss", Some("2.0")) => {
195 element_source.set_default_default_namespace(NS::RSS);
196 return rss2::parse(self, root);
197 }
198 ("rss", Some("0.91")) | ("rss", Some("0.92")) => {
199 element_source.set_default_default_namespace(NS::RSS);
200 return rss0::parse(self, root);
201 }
202 ("RDF", _) => {
203 element_source.set_default_default_namespace(NS::RSS);
204 return rss1::parse(self, root);
205 }
206 _ => {}
207 };
208 }
209
210 Err(ParseFeedError::ParseError(ParseErrorKind::NoFeedRoot))
212 }
213}
214
215pub fn parse<R: Read>(source: R) -> ParseFeedResult<model::Feed> {
219 Builder::new().build().parse(source)
220}
221
222pub struct Builder {
224 base_uri: Option<String>,
225 id_generator: Box<IdGenerator>,
226 sanitize_content: bool,
227 timestamp_parser: Box<TimestampParser>,
228}
229
230impl Builder {
231 pub fn new() -> Builder {
233 Builder::default()
234 }
235
236 pub fn base_uri<S: AsRef<str>>(mut self, uri: Option<S>) -> Self {
238 self.base_uri = uri.map(|s| s.as_ref().to_string());
239 self
240 }
241
242 pub fn build(self) -> Parser {
244 Parser {
245 base_uri: self.base_uri,
246 id_generator: self.id_generator,
247 sanitize_content: self.sanitize_content,
248 timestamp_parser: self.timestamp_parser,
249 }
250 }
251
252 pub fn id_generator<F>(mut self, generator: F) -> Self
254 where
255 F: Fn(&[model::Link], &Option<model::Text>, Option<&str>) -> String + 'static,
256 {
257 self.id_generator = Box::new(generator);
258 self
259 }
260
261 pub fn id_generator_v0_2(self) -> Self {
263 self.id_generator(|links, title, _uri| {
264 if let Some(link) = links.iter().find(|l| l.rel.is_none()) {
266 let mut link = model::Link::new(link.href.clone(), None);
268 if link.href.ends_with('/') {
269 link.href.pop();
270 }
271
272 generate_id_from_link_and_title(&link, title)
273 } else {
274 util::uuid_gen()
275 }
276 })
277 }
278
279 pub fn sanitize_content(mut self, flag: bool) -> Self {
282 self.sanitize_content = flag;
283 self
284 }
285
286 pub fn timestamp_parser<F>(mut self, ts_parser: F) -> Self
288 where
289 F: Fn(&str) -> Option<DateTime<Utc>> + 'static,
290 {
291 self.timestamp_parser = Box::new(ts_parser);
292 self
293 }
294}
295
296impl Default for Builder {
298 fn default() -> Self {
299 Builder {
300 base_uri: None,
301 id_generator: Box::new(generate_id),
302 sanitize_content: true,
303 timestamp_parser: Box::new(util::parse_timestamp_lenient),
304 }
305 }
306}
307
308fn assign_missing_ids(id_generator: &IdGenerator, feed: &mut model::Feed, uri: Option<&str>) {
310 if feed.id.is_empty() {
311 feed.id = id_generator(&feed.links, &feed.title, uri);
312 }
313
314 for entry in feed.entries.iter_mut() {
315 if entry.id.is_empty() {
316 entry.id = id_generator(&entry.links, &entry.title, uri);
317 }
318 }
319}
320
321const LINK_HASH_KEY1: u64 = 0x5d78_4074_2887_2d60;
322const LINK_HASH_KEY2: u64 = 0x90ee_ca4c_90a5_e228;
323
324pub fn generate_id(links: &[model::Link], title: &Option<model::Text>, uri: Option<&str>) -> String {
329 if let Some(link) = links.first() {
330 generate_id_from_link_and_title(link, title)
331 } else if let (Some(uri), Some(title)) = (uri, title) {
332 generate_id_from_uri_and_title(uri, title)
333 } else {
334 util::uuid_gen()
336 }
337}
338
339pub fn generate_id_from_link_and_title(link: &model::Link, title: &Option<model::Text>) -> String {
341 let mut hasher = SipHasher::new_with_keys(LINK_HASH_KEY1, LINK_HASH_KEY2);
342 hasher.write(link.href.as_bytes());
343 if let Some(title) = title {
344 hasher.write(title.content.as_bytes());
345 }
346 let hash = hasher.finish128();
347 format!("{:x}{:x}", hash.h1, hash.h2)
348}
349
350pub fn generate_id_from_uri_and_title(uri: &str, title: &model::Text) -> String {
352 let mut hasher = SipHasher::new_with_keys(LINK_HASH_KEY1, LINK_HASH_KEY2);
353 hasher.write(uri.as_bytes());
354 hasher.write(title.content.as_bytes());
355 let hash = hasher.finish128();
356 format!("{:x}{:x}", hash.h1, hash.h2)
357}
358
359#[cfg(test)]
360mod tests;