use std::io::BufRead;
use mediatype::{names, MediaTypeBuf};
use crate::model::{Category, Content, Entry, Feed, FeedType, Generator, Image, Link, MediaObject, Person, Text};
use crate::parser::mediarss::handle_media_element;
use crate::parser::util;
use crate::parser::util::if_some_then;
use crate::parser::{mediarss, Parser};
use crate::parser::{ParseErrorKind, ParseFeedError, ParseFeedResult};
use crate::xml::{Element, NS};
#[cfg(test)]
mod tests;
pub(crate) fn parse_feed<R: BufRead>(parser: &Parser, root: Element<R>) -> ParseFeedResult<Feed> {
let mut feed = Feed::new(FeedType::Atom);
feed.language = util::handle_language_attr(&root);
for child in root.children() {
let child = child?;
match child.ns_and_tag() {
(NS::Atom, "id") => if_some_then(child.child_as_text(), |id| feed.id = id),
(NS::Atom, "title") => feed.title = handle_text(child)?,
(NS::Atom, "updated") => if_some_then(child.child_as_text(), |text| feed.updated = parser.parse_timestamp(&text)),
(NS::Atom, "author") => if_some_then(handle_person(child)?, |person| feed.authors.push(person)),
(NS::Atom, "link") => if_some_then(handle_link(child), |link| feed.links.push(link)),
(NS::Atom, "category") => if_some_then(handle_category(child), |category| feed.categories.push(category)),
(NS::Atom, "contributor") => if_some_then(handle_person(child)?, |person| feed.contributors.push(person)),
(NS::Atom, "generator") => feed.generator = handle_generator(child),
(NS::Atom, "icon") => feed.icon = handle_image(child),
(NS::Atom, "logo") => feed.logo = handle_image(child),
(NS::Atom, "rights") => feed.rights = handle_text(child)?,
(NS::Atom, "subtitle") => feed.description = handle_text(child)?,
(NS::Atom, "entry") => if_some_then(handle_entry(parser, child)?, |entry| feed.entries.push(entry)),
_ => {}
}
}
if parser.sanitize_content {
if let Some(t) = feed.description.as_mut() {
t.sanitize()
}
if let Some(t) = feed.rights.as_mut() {
t.sanitize()
}
if let Some(t) = feed.title.as_mut() {
t.sanitize()
}
}
Ok(feed)
}
pub(crate) fn parse_entry<R: BufRead>(parser: &Parser, root: Element<R>) -> ParseFeedResult<Feed> {
let mut feed = Feed::new(FeedType::Atom);
if_some_then(handle_entry(parser, root)?, |entry| feed.entries.push(entry));
Ok(feed)
}
fn handle_category<R: BufRead>(element: Element<R>) -> Option<Category> {
if let Some(term) = element.attr_value("term") {
let mut category = Category::new(&term);
for attr in element.attributes {
match attr.name.as_str() {
"scheme" => category.scheme = Some(attr.value.clone()),
"label" => category.label = Some(attr.value.clone()),
_ => {}
}
}
Some(category)
} else {
None
}
}
fn handle_content<R: BufRead>(element: Element<R>) -> ParseFeedResult<Option<Content>> {
let content_type = element.attr_value("type");
if let Some(src) = element.attr_value("src") {
let mime = match &content_type {
Some(ct) => ct
.parse::<MediaTypeBuf>()
.map_err(|_| ParseFeedError::ParseError(ParseErrorKind::UnknownMimeType(ct.into())))?,
None => {
MediaTypeBuf::new(names::TEXT, names::HTML)
}
};
if element.child_as_text().is_some() {
return Err(ParseFeedError::ParseError(ParseErrorKind::MissingContent("non-empty atom:content with src")));
}
let content = Content {
body: None,
content_type: mime,
src: Some(Link {
href: src,
rel: None,
media_type: content_type,
href_lang: None,
title: None,
length: None,
}),
..Default::default()
};
return Ok(Some(content));
}
match content_type.as_deref() {
Some("text") | Some("html") | Some("xhtml") | Some("text/html") | None => {
handle_text(element)?
.map(|text| {
let mut content = Content::default();
content.body = Some(text.content);
content.content_type = text.content_type;
Some(content)
})
.ok_or(ParseFeedError::ParseError(ParseErrorKind::MissingContent("content.text")))
}
Some(ct) if ct.ends_with(" +xml") || ct.ends_with("/xml") => {
handle_text(element)?
.map(|body| {
let mut content = Content::default();
content.body = Some(body.content);
content.content_type = MediaTypeBuf::new(names::TEXT, names::XML);
Some(content)
})
.ok_or(ParseFeedError::ParseError(ParseErrorKind::MissingContent("content.xml")))
}
Some(ct) => {
if let Ok(mime) = ct.parse::<MediaTypeBuf>() {
element
.child_as_text()
.map(|body| {
let content = Content {
body: Some(body),
content_type: mime,
..Default::default()
};
Some(content)
})
.ok_or(ParseFeedError::ParseError(ParseErrorKind::MissingContent("content.inline")))
} else {
Err(ParseFeedError::ParseError(ParseErrorKind::UnknownMimeType(ct.into())))
}
}
}
}
fn handle_entry<R: BufRead>(parser: &Parser, element: Element<R>) -> ParseFeedResult<Option<Entry>> {
let mut media_obj = MediaObject::default();
let mut entry = Entry::default();
for child in element.children() {
let child = child?;
match child.ns_and_tag() {
(NS::Atom, "id") => if_some_then(child.child_as_text(), |id| entry.id = id),
(NS::Atom, "title") => entry.title = handle_text(child)?,
(NS::Atom, "updated") => if_some_then(child.child_as_text(), |text| entry.updated = parser.parse_timestamp(&text)),
(NS::Atom, "author") => if_some_then(handle_person(child)?, |person| entry.authors.push(person)),
(NS::Atom, "content") => {
entry.base = util::handle_base_attr(&child);
entry.language = util::handle_language_attr(&child);
entry.content = handle_content(child)?;
}
(NS::Atom, "link") => if_some_then(handle_link(child), |link| entry.links.push(link)),
(NS::Atom, "summary") => entry.summary = handle_text(child)?,
(NS::Atom, "category") => if_some_then(handle_category(child), |category| entry.categories.push(category)),
(NS::Atom, "contributor") => if_some_then(handle_person(child)?, |person| entry.contributors.push(person)),
(NS::Atom, "published") | (NS::Atom, "pubDate") => if_some_then(child.child_as_text(), |text| entry.published = parser.parse_timestamp(&text)),
(NS::Atom, "rights") => entry.rights = handle_text(child)?,
(NS::MediaRSS, "group") => if_some_then(mediarss::handle_media_group(child)?, |obj| entry.media.push(obj)),
(NS::MediaRSS, _) => handle_media_element(child, &mut media_obj)?,
_ => {}
}
}
if parser.sanitize_content {
if let Some(c) = entry.content.as_mut() {
c.sanitize()
}
if let Some(t) = entry.rights.as_mut() {
t.sanitize()
}
if let Some(t) = entry.summary.as_mut() {
t.sanitize()
}
if let Some(t) = entry.title.as_mut() {
t.sanitize()
}
}
if !media_obj.content.is_empty() || !media_obj.thumbnails.is_empty() {
entry.media.push(media_obj);
}
Ok(Some(entry))
}
fn handle_generator<R: BufRead>(element: Element<R>) -> Option<Generator> {
element.child_as_text().map(|content| {
let mut generator = Generator::new(&content);
for attr in element.attributes {
match attr.name.as_str() {
"uri" => generator.uri = Some(attr.value.clone()),
"version" => generator.version = Some(attr.value.clone()),
_ => {}
}
}
generator
})
}
fn handle_image<R: BufRead>(element: Element<R>) -> Option<Image> {
element
.child_as_text()
.map(|raw_uri| {
util::parse_uri(&raw_uri, element.xml_base.as_ref())
.map(|parsed| parsed.to_string())
.unwrap_or(raw_uri)
})
.map(Image::new)
}
pub(crate) fn handle_link<R: BufRead>(element: Element<R>) -> Option<Link> {
element.attr_value("href").map(|href| {
let mut link = Link::new(href, element.xml_base.as_ref());
for attr in element.attributes {
match attr.name.as_str() {
"rel" => link.rel = Some(attr.value.clone()),
"type" => link.media_type = Some(attr.value.clone()),
"hreflang" => link.href_lang = Some(attr.value.clone()),
"title" => link.title = Some(attr.value.clone()),
"length" => link.length = attr.value.parse::<u64>().ok(),
_ => {}
}
}
if link.rel.is_none() {
link.rel = Some(String::from("alternate"));
}
link
})
}
fn handle_person<R: BufRead>(element: Element<R>) -> ParseFeedResult<Option<Person>> {
let mut person = Person::new("unknown");
for child in element.children() {
let child = child?;
let tag_name = child.name.as_str();
let child_text = child.child_as_text();
match (tag_name, child_text) {
("name", Some(name)) => person.name = name,
("uri", uri) => person.uri = uri,
("email", email) => person.email = email,
_ => {}
}
}
Ok(Some(person))
}
pub(crate) fn handle_text<R: BufRead>(element: Element<R>) -> ParseFeedResult<Option<Text>> {
let type_attr = element.attributes.iter().find(|a| &a.name == "type").map_or("text", |a| a.value.as_str());
let mime = match type_attr {
"text" => Ok(MediaTypeBuf::new(names::TEXT, names::PLAIN)),
"html" | "xhtml" | "text/html" => Ok(MediaTypeBuf::new(names::TEXT, names::HTML)),
_ => Err(ParseFeedError::ParseError(ParseErrorKind::UnknownMimeType(type_attr.into()))),
}?;
element
.children_as_string()?
.map(|content| {
let mut text = Text::new(content);
text.content_type = mime;
Some(text)
})
.ok_or(ParseFeedError::ParseError(ParseErrorKind::MissingContent("text")))
}