use std::error::Error;
use std::fmt;
use std::fmt::Debug;
use std::hash::Hasher;
use std::io::{BufRead, BufReader, Read};
use chrono::{DateTime, Utc};
use siphasher::sip128::{Hasher128, SipHasher};
use crate::model;
use crate::parser::util::{IdGenerator, TimestampParser};
use crate::xml;
use crate::xml::NS;
mod atom;
mod json;
mod rss0;
mod rss1;
mod rss2;
pub(crate) mod itunes;
pub(crate) mod mediarss;
pub(crate) mod util;
pub type ParseFeedResult<T> = Result<T, ParseFeedError>;
#[derive(Debug)]
pub enum ParseFeedError {
ParseError(ParseErrorKind),
IoError(std::io::Error),
JsonSerde(serde_json::error::Error),
JsonUnsupportedVersion(String),
XmlReader(xml::XmlError),
}
impl From<serde_json::error::Error> for ParseFeedError {
fn from(err: serde_json::error::Error) -> Self {
ParseFeedError::JsonSerde(err)
}
}
impl From<std::io::Error> for ParseFeedError {
fn from(err: std::io::Error) -> Self {
ParseFeedError::IoError(err)
}
}
impl From<xml::XmlError> for ParseFeedError {
fn from(err: xml::XmlError) -> Self {
ParseFeedError::XmlReader(err)
}
}
impl fmt::Display for ParseFeedError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ParseFeedError::ParseError(pe) => write!(f, "unable to parse feed: {}", pe),
ParseFeedError::IoError(ie) => write!(f, "unable to read feed: {}", ie),
ParseFeedError::JsonSerde(je) => write!(f, "unable to parse JSON: {}", je),
ParseFeedError::JsonUnsupportedVersion(version) => write!(f, "unsupported version: {}", version),
ParseFeedError::XmlReader(xe) => write!(f, "unable to parse XML: {}", xe),
}
}
}
impl Error for ParseFeedError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
ParseFeedError::IoError(ie) => Some(ie),
ParseFeedError::JsonSerde(je) => Some(je),
ParseFeedError::XmlReader(xe) => Some(xe),
_ => None,
}
}
}
#[derive(Debug)]
pub enum ParseErrorKind {
NoFeedRoot,
UnknownMimeType(String),
MissingContent(&'static str),
}
impl fmt::Display for ParseErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ParseErrorKind::NoFeedRoot => f.write_str("no root element"),
ParseErrorKind::UnknownMimeType(mime) => write!(f, "unsupported content type {}", mime),
ParseErrorKind::MissingContent(elem) => write!(f, "missing content element {}", elem),
}
}
}
pub struct Parser {
base_uri: Option<String>,
id_generator: Box<IdGenerator>,
timestamp_parser: Box<TimestampParser>,
}
impl Parser {
pub fn parse<R: Read>(&self, source: R) -> ParseFeedResult<model::Feed> {
let mut input = BufReader::new(source);
input.fill_buf()?;
let first_char = input.buffer().iter().find(|b| **b == b'<' || **b == b'{').map(|b| *b as char);
let result = match first_char {
Some('<') => self.parse_xml(input),
Some('{') => self.parse_json(input),
_ => Err(ParseFeedError::ParseError(ParseErrorKind::NoFeedRoot)),
};
if let Ok(mut feed) = result {
assign_missing_ids(&self.id_generator, &mut feed, self.base_uri.as_deref());
Ok(feed)
} else {
result
}
}
fn parse_json<R: BufRead>(&self, source: R) -> ParseFeedResult<model::Feed> {
json::parse(self, source)
}
fn parse_timestamp(&self, text: &str) -> Option<DateTime<Utc>> {
(self.timestamp_parser)(text)
}
fn parse_xml<R: BufRead>(&self, source: R) -> ParseFeedResult<model::Feed> {
let element_source = xml::ElementSource::new(source, self.base_uri.as_deref())?;
if let Ok(Some(root)) = element_source.root() {
let version = root.attr_value("version");
match (root.name.as_str(), version.as_deref()) {
("feed", _) => {
element_source.set_default_default_namespace(NS::Atom);
return atom::parse_feed(self, root);
}
("entry", _) => {
element_source.set_default_default_namespace(NS::Atom);
return atom::parse_entry(self, root);
}
("rss", Some("2.0")) => {
element_source.set_default_default_namespace(NS::RSS);
return rss2::parse(self, root);
}
("rss", Some("0.91")) | ("rss", Some("0.92")) => {
element_source.set_default_default_namespace(NS::RSS);
return rss0::parse(self, root);
}
("RDF", _) => {
element_source.set_default_default_namespace(NS::RSS);
return rss1::parse(self, root);
}
_ => {}
};
}
Err(ParseFeedError::ParseError(ParseErrorKind::NoFeedRoot))
}
}
pub fn parse<R: Read>(source: R) -> ParseFeedResult<model::Feed> {
Builder::new().build().parse(source)
}
pub struct Builder {
base_uri: Option<String>,
id_generator: Box<IdGenerator>,
timestamp_parser: Box<TimestampParser>,
}
impl Builder {
pub fn new() -> Builder {
Builder::default()
}
pub fn base_uri<S: AsRef<str>>(mut self, uri: Option<S>) -> Self {
self.base_uri = uri.map(|s| s.as_ref().to_string());
self
}
pub fn build(self) -> Parser {
Parser {
base_uri: self.base_uri,
id_generator: self.id_generator,
timestamp_parser: self.timestamp_parser,
}
}
pub fn id_generator<F>(mut self, generator: F) -> Self
where
F: Fn(&[model::Link], &Option<model::Text>, Option<&str>) -> String + 'static,
{
self.id_generator = Box::new(generator);
self
}
pub fn id_generator_v0_2(self) -> Self {
self.id_generator(|links, title, _uri| {
if let Some(link) = links.iter().find(|l| l.rel.is_none()) {
let mut link = model::Link::new(link.href.clone(), None);
if link.href.ends_with('/') {
link.href.pop();
}
generate_id_from_link_and_title(&link, title)
} else {
util::uuid_gen()
}
})
}
pub fn timestamp_parser<F>(mut self, ts_parser: F) -> Self
where
F: Fn(&str) -> Option<DateTime<Utc>> + 'static,
{
self.timestamp_parser = Box::new(ts_parser);
self
}
}
impl Default for Builder {
fn default() -> Self {
Builder {
base_uri: None,
id_generator: Box::new(generate_id),
timestamp_parser: Box::new(util::parse_timestamp_lenient),
}
}
}
fn assign_missing_ids(id_generator: &IdGenerator, feed: &mut model::Feed, uri: Option<&str>) {
if feed.id.is_empty() {
feed.id = id_generator(&feed.links, &feed.title, uri);
}
for entry in feed.entries.iter_mut() {
if entry.id.is_empty() {
entry.id = id_generator(&entry.links, &entry.title, uri);
}
}
}
const LINK_HASH_KEY1: u64 = 0x5d78_4074_2887_2d60;
const LINK_HASH_KEY2: u64 = 0x90ee_ca4c_90a5_e228;
pub fn generate_id(links: &[model::Link], title: &Option<model::Text>, uri: Option<&str>) -> String {
if let Some(link) = links.first() {
generate_id_from_link_and_title(link, title)
} else if let (Some(uri), Some(title)) = (uri, title) {
generate_id_from_uri_and_title(uri, title)
} else {
util::uuid_gen()
}
}
pub fn generate_id_from_link_and_title(link: &model::Link, title: &Option<model::Text>) -> String {
let mut hasher = SipHasher::new_with_keys(LINK_HASH_KEY1, LINK_HASH_KEY2);
hasher.write(link.href.as_bytes());
if let Some(title) = title {
hasher.write(title.content.as_bytes());
}
let hash = hasher.finish128();
format!("{:x}{:x}", hash.h1, hash.h2)
}
pub fn generate_id_from_uri_and_title(uri: &str, title: &model::Text) -> String {
let mut hasher = SipHasher::new_with_keys(LINK_HASH_KEY1, LINK_HASH_KEY2);
hasher.write(uri.as_bytes());
hasher.write(title.content.as_bytes());
let hash = hasher.finish128();
format!("{:x}{:x}", hash.h1, hash.h2)
}
#[cfg(test)]
mod tests;