use crate::error::{Result, RssError, ValidationError};
use crate::generator::sanitize_content;
use quick_xml::events::{
BytesDecl, BytesEnd, BytesStart, BytesText, Event,
};
use quick_xml::Writer;
use serde::{Deserialize, Serialize};
use std::io::Cursor;
pub const ATOM_NAMESPACE: &str = "http://www.w3.org/2005/Atom";
const XML_VERSION: &str = "1.0";
const XML_ENCODING: &str = "utf-8";
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum FeedFormat {
Rss,
RssRdf,
Atom,
Unknown,
}
#[must_use]
pub fn detect_feed_format(xml: &str) -> FeedFormat {
use quick_xml::Reader;
let mut reader = Reader::from_str(xml);
reader.config_mut().trim_text(true);
let mut buf = Vec::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(start) | Event::Empty(start)) => {
let name = start.name();
let local = name.as_ref();
if local == b"rss" {
return FeedFormat::Rss;
}
if local == b"rdf:RDF" || local == b"RDF" {
return FeedFormat::RssRdf;
}
if local == b"feed" {
let has_atom_ns =
start.attributes().flatten().any(|a| {
a.key.as_ref() == b"xmlns"
&& a.value.as_ref()
== ATOM_NAMESPACE.as_bytes()
});
return if has_atom_ns {
FeedFormat::Atom
} else {
FeedFormat::Unknown
};
}
return FeedFormat::Unknown;
}
Ok(Event::Eof) | Err(_) => return FeedFormat::Unknown,
Ok(_) => buf.clear(),
}
}
}
#[derive(
Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize,
)]
#[non_exhaustive]
pub struct AtomPerson {
pub name: String,
pub email: String,
pub uri: String,
}
impl AtomPerson {
#[must_use]
pub fn new<S: Into<String>>(name: S) -> Self {
Self {
name: sanitize_input(&name.into()),
..Self::default()
}
}
#[must_use]
pub fn email<S: Into<String>>(mut self, value: S) -> Self {
self.email = sanitize_input(&value.into());
self
}
#[must_use]
pub fn uri<S: Into<String>>(mut self, value: S) -> Self {
self.uri = sanitize_input(&value.into());
self
}
}
#[derive(
Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize,
)]
#[non_exhaustive]
pub struct AtomLink {
pub href: String,
pub rel: String,
pub mime_type: String,
pub length: String,
pub title: String,
}
impl AtomLink {
#[must_use]
pub fn alternate<S: Into<String>>(href: S) -> Self {
Self {
href: sanitize_input(&href.into()),
rel: "alternate".to_string(),
..Self::default()
}
}
#[must_use]
pub fn self_ref<S: Into<String>>(href: S) -> Self {
Self {
href: sanitize_input(&href.into()),
rel: "self".to_string(),
..Self::default()
}
}
#[must_use]
pub fn enclosure<S, T>(href: S, mime_type: T, length: u64) -> Self
where
S: Into<String>,
T: Into<String>,
{
Self {
href: sanitize_input(&href.into()),
rel: "enclosure".to_string(),
mime_type: sanitize_input(&mime_type.into()),
length: length.to_string(),
..Self::default()
}
}
#[must_use]
pub fn title<S: Into<String>>(mut self, value: S) -> Self {
self.title = sanitize_input(&value.into());
self
}
}
#[derive(
Debug,
Clone,
Copy,
Default,
PartialEq,
Eq,
Hash,
Serialize,
Deserialize,
)]
#[non_exhaustive]
pub enum AtomTextType {
#[default]
Text,
Html,
}
impl AtomTextType {
fn as_attr(self) -> &'static str {
match self {
Self::Text => "text",
Self::Html => "html",
}
}
}
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
#[non_exhaustive]
pub struct AtomFeed {
pub id: String,
pub title: String,
pub subtitle: String,
pub updated: String,
pub rights: String,
pub generator: String,
pub icon: String,
pub logo: String,
pub language: String,
pub authors: Vec<AtomPerson>,
pub contributors: Vec<AtomPerson>,
pub links: Vec<AtomLink>,
pub categories: Vec<String>,
pub entries: Vec<AtomEntry>,
}
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
#[non_exhaustive]
pub struct AtomEntry {
pub id: String,
pub title: String,
pub updated: String,
pub published: String,
pub summary: String,
pub summary_type: AtomTextType,
pub content: String,
pub content_type: AtomTextType,
pub rights: String,
pub authors: Vec<AtomPerson>,
pub contributors: Vec<AtomPerson>,
pub links: Vec<AtomLink>,
pub categories: Vec<String>,
}
impl AtomFeed {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn id<S: Into<String>>(mut self, value: S) -> Self {
self.id = sanitize_input(&value.into());
self
}
#[must_use]
pub fn title<S: Into<String>>(mut self, value: S) -> Self {
self.title = sanitize_input(&value.into());
self
}
#[must_use]
pub fn subtitle<S: Into<String>>(mut self, value: S) -> Self {
self.subtitle = sanitize_input(&value.into());
self
}
#[must_use]
pub fn updated<S: Into<String>>(mut self, value: S) -> Self {
self.updated = sanitize_input(&value.into());
self
}
#[must_use]
pub fn rights<S: Into<String>>(mut self, value: S) -> Self {
self.rights = sanitize_input(&value.into());
self
}
#[must_use]
pub fn generator<S: Into<String>>(mut self, value: S) -> Self {
self.generator = sanitize_input(&value.into());
self
}
#[must_use]
pub fn icon<S: Into<String>>(mut self, value: S) -> Self {
self.icon = sanitize_input(&value.into());
self
}
#[must_use]
pub fn logo<S: Into<String>>(mut self, value: S) -> Self {
self.logo = sanitize_input(&value.into());
self
}
#[must_use]
pub fn language<S: Into<String>>(mut self, value: S) -> Self {
self.language = sanitize_input(&value.into());
self
}
#[must_use]
pub fn author_name<S: Into<String>>(mut self, name: S) -> Self {
self.authors.push(AtomPerson::new(name));
self
}
#[must_use]
pub fn add_author(mut self, author: AtomPerson) -> Self {
self.authors.push(author);
self
}
#[must_use]
pub fn add_contributor(mut self, contributor: AtomPerson) -> Self {
self.contributors.push(contributor);
self
}
#[must_use]
pub fn add_link(mut self, link: AtomLink) -> Self {
self.links.push(link);
self
}
#[must_use]
pub fn self_link<S: Into<String>>(self, href: S) -> Self {
self.add_link(AtomLink::self_ref(href))
}
#[must_use]
pub fn alternate_link<S: Into<String>>(self, href: S) -> Self {
self.add_link(AtomLink::alternate(href))
}
#[must_use]
pub fn add_category<S: Into<String>>(mut self, term: S) -> Self {
self.categories.push(sanitize_input(&term.into()));
self
}
#[must_use]
pub fn add_entry(mut self, entry: AtomEntry) -> Self {
self.entries.push(entry);
self
}
#[must_use]
pub fn entry_count(&self) -> usize {
self.entries.len()
}
pub fn validate(&self) -> Result<()> {
let mut errors: Vec<ValidationError> = Vec::new();
if self.id.is_empty() {
errors.push(ValidationError::new(
"feed.id",
"feed.id is missing",
));
}
if self.title.is_empty() {
errors.push(ValidationError::new(
"feed.title",
"feed.title is missing",
));
}
if self.updated.is_empty() {
errors.push(ValidationError::new(
"feed.updated",
"feed.updated is missing",
));
} else if !is_rfc3339(&self.updated) {
errors.push(ValidationError::new(
"feed.updated",
format!(
"feed.updated is not a valid RFC 3339 timestamp: {}",
self.updated
),
));
}
let feed_has_author = !self.authors.is_empty();
for (idx, entry) in self.entries.iter().enumerate() {
if let Err(RssError::ValidationErrors(mut entry_errors)) =
entry.validate_with_index(idx)
{
errors.append(&mut entry_errors);
}
if !feed_has_author && entry.authors.is_empty() {
errors.push(ValidationError::new(
format!("entry.{idx}.author"),
format!(
"entry.{idx}.author is missing (and feed has \
no feed-level author)"
),
));
}
}
if errors.is_empty() {
Ok(())
} else {
Err(RssError::ValidationErrors(errors))
}
}
}
impl AtomEntry {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn id<S: Into<String>>(mut self, value: S) -> Self {
self.id = sanitize_input(&value.into());
self
}
#[must_use]
pub fn title<S: Into<String>>(mut self, value: S) -> Self {
self.title = sanitize_input(&value.into());
self
}
#[must_use]
pub fn updated<S: Into<String>>(mut self, value: S) -> Self {
self.updated = sanitize_input(&value.into());
self
}
#[must_use]
pub fn published<S: Into<String>>(mut self, value: S) -> Self {
self.published = sanitize_input(&value.into());
self
}
#[must_use]
pub fn summary<S: Into<String>>(mut self, value: S) -> Self {
self.summary = sanitize_input(&value.into());
self.summary_type = AtomTextType::Text;
self
}
#[must_use]
pub fn summary_html<S: Into<String>>(mut self, value: S) -> Self {
self.summary = sanitize_input(&value.into());
self.summary_type = AtomTextType::Html;
self
}
#[must_use]
pub fn content<S: Into<String>>(mut self, value: S) -> Self {
self.content = sanitize_input(&value.into());
self.content_type = AtomTextType::Text;
self
}
#[must_use]
pub fn content_html<S: Into<String>>(mut self, value: S) -> Self {
self.content = sanitize_input(&value.into());
self.content_type = AtomTextType::Html;
self
}
#[must_use]
pub fn rights<S: Into<String>>(mut self, value: S) -> Self {
self.rights = sanitize_input(&value.into());
self
}
#[must_use]
pub fn author_name<S: Into<String>>(mut self, name: S) -> Self {
self.authors.push(AtomPerson::new(name));
self
}
#[must_use]
pub fn add_author(mut self, author: AtomPerson) -> Self {
self.authors.push(author);
self
}
#[must_use]
pub fn add_link(mut self, link: AtomLink) -> Self {
self.links.push(link);
self
}
#[must_use]
pub fn alternate_link<S: Into<String>>(self, href: S) -> Self {
self.add_link(AtomLink::alternate(href))
}
#[must_use]
pub fn add_enclosure<S, T>(
self,
href: S,
mime_type: T,
length: u64,
) -> Self
where
S: Into<String>,
T: Into<String>,
{
self.add_link(AtomLink::enclosure(href, mime_type, length))
}
#[must_use]
pub fn add_category<S: Into<String>>(mut self, term: S) -> Self {
self.categories.push(sanitize_input(&term.into()));
self
}
pub fn validate(&self) -> Result<()> {
let mut errors: Vec<ValidationError> = Vec::new();
push_entry_errors(self, "entry.", &mut errors);
if errors.is_empty() {
Ok(())
} else {
Err(RssError::ValidationErrors(errors))
}
}
fn validate_with_index(&self, idx: usize) -> Result<()> {
let prefix = format!("entry.{idx}.");
let mut errors: Vec<ValidationError> = Vec::new();
push_entry_errors(self, &prefix, &mut errors);
if errors.is_empty() {
Ok(())
} else {
Err(RssError::ValidationErrors(errors))
}
}
}
fn push_entry_errors(
entry: &AtomEntry,
prefix: &str,
errors: &mut Vec<ValidationError>,
) {
let field_path = |suffix: &str| format!("{prefix}{suffix}");
if entry.id.is_empty() {
errors.push(ValidationError::new(
field_path("id"),
format!("{prefix}id is missing"),
));
}
if entry.title.is_empty() {
errors.push(ValidationError::new(
field_path("title"),
format!("{prefix}title is missing"),
));
}
if entry.updated.is_empty() {
errors.push(ValidationError::new(
field_path("updated"),
format!("{prefix}updated is missing"),
));
} else if !is_rfc3339(&entry.updated) {
errors.push(ValidationError::new(
field_path("updated"),
format!(
"{prefix}updated is not a valid RFC 3339 timestamp: {}",
entry.updated
),
));
}
if !entry.published.is_empty() && !is_rfc3339(&entry.published) {
errors.push(ValidationError::new(
field_path("published"),
format!(
"{prefix}published is not a valid RFC 3339 timestamp: {}",
entry.published
),
));
}
}
fn is_rfc3339(value: &str) -> bool {
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
OffsetDateTime::parse(value, &Rfc3339).is_ok()
}
fn sanitize_input(value: &str) -> String {
value
.chars()
.filter(|c| !c.is_control() || matches!(*c, '\n' | '\r' | '\t'))
.collect()
}
pub fn generate_atom(feed: &AtomFeed) -> Result<String> {
feed.validate()?;
let mut writer = Writer::new(Cursor::new(Vec::new()));
writer.write_event(Event::Decl(BytesDecl::new(
XML_VERSION,
Some(XML_ENCODING),
None,
)))?;
let mut feed_start = BytesStart::new("feed");
feed_start.push_attribute(("xmlns", ATOM_NAMESPACE));
if !feed.language.is_empty() {
feed_start.push_attribute(("xml:lang", feed.language.as_str()));
}
writer.write_event(Event::Start(feed_start))?;
write_text_element(&mut writer, "id", &feed.id)?;
write_text_element(&mut writer, "title", &feed.title)?;
write_text_element(&mut writer, "updated", &feed.updated)?;
if !feed.subtitle.is_empty() {
write_text_element(&mut writer, "subtitle", &feed.subtitle)?;
}
if !feed.rights.is_empty() {
write_text_element(&mut writer, "rights", &feed.rights)?;
}
if !feed.icon.is_empty() {
write_text_element(&mut writer, "icon", &feed.icon)?;
}
if !feed.logo.is_empty() {
write_text_element(&mut writer, "logo", &feed.logo)?;
}
if !feed.generator.is_empty() {
write_text_element(&mut writer, "generator", &feed.generator)?;
}
for person in &feed.authors {
write_person(&mut writer, "author", person)?;
}
for person in &feed.contributors {
write_person(&mut writer, "contributor", person)?;
}
for link in &feed.links {
write_link(&mut writer, link)?;
}
for category in &feed.categories {
write_category(&mut writer, category)?;
}
for entry in &feed.entries {
write_entry(&mut writer, entry)?;
}
writer.write_event(Event::End(BytesEnd::new("feed")))?;
let xml = writer.into_inner().into_inner();
String::from_utf8(xml).map_err(RssError::from)
}
fn write_text_element<W: std::io::Write>(
writer: &mut Writer<W>,
name: &str,
content: &str,
) -> Result<()> {
let escaped = sanitize_content(content);
writer.write_event(Event::Start(BytesStart::new(name)))?;
writer
.write_event(Event::Text(BytesText::from_escaped(escaped)))?;
writer.write_event(Event::End(BytesEnd::new(name)))?;
Ok(())
}
fn write_typed_text<W: std::io::Write>(
writer: &mut Writer<W>,
name: &str,
content: &str,
text_type: AtomTextType,
) -> Result<()> {
let escaped = sanitize_content(content);
let mut start = BytesStart::new(name);
start.push_attribute(("type", text_type.as_attr()));
writer.write_event(Event::Start(start))?;
writer
.write_event(Event::Text(BytesText::from_escaped(escaped)))?;
writer.write_event(Event::End(BytesEnd::new(name)))?;
Ok(())
}
fn write_person<W: std::io::Write>(
writer: &mut Writer<W>,
element: &str,
person: &AtomPerson,
) -> Result<()> {
writer.write_event(Event::Start(BytesStart::new(element)))?;
write_text_element(writer, "name", &person.name)?;
if !person.email.is_empty() {
write_text_element(writer, "email", &person.email)?;
}
if !person.uri.is_empty() {
write_text_element(writer, "uri", &person.uri)?;
}
writer.write_event(Event::End(BytesEnd::new(element)))?;
Ok(())
}
fn write_link<W: std::io::Write>(
writer: &mut Writer<W>,
link: &AtomLink,
) -> Result<()> {
let mut start = BytesStart::new("link");
start.push_attribute(("href", link.href.as_str()));
if !link.rel.is_empty() {
start.push_attribute(("rel", link.rel.as_str()));
}
if !link.mime_type.is_empty() {
start.push_attribute(("type", link.mime_type.as_str()));
}
if !link.length.is_empty() {
start.push_attribute(("length", link.length.as_str()));
}
if !link.title.is_empty() {
start.push_attribute(("title", link.title.as_str()));
}
writer.write_event(Event::Empty(start))?;
Ok(())
}
fn write_category<W: std::io::Write>(
writer: &mut Writer<W>,
term: &str,
) -> Result<()> {
let mut start = BytesStart::new("category");
start.push_attribute(("term", term));
writer.write_event(Event::Empty(start))?;
Ok(())
}
fn write_entry<W: std::io::Write>(
writer: &mut Writer<W>,
entry: &AtomEntry,
) -> Result<()> {
writer.write_event(Event::Start(BytesStart::new("entry")))?;
write_text_element(writer, "id", &entry.id)?;
write_text_element(writer, "title", &entry.title)?;
write_text_element(writer, "updated", &entry.updated)?;
if !entry.published.is_empty() {
write_text_element(writer, "published", &entry.published)?;
}
if !entry.summary.is_empty() {
write_typed_text(
writer,
"summary",
&entry.summary,
entry.summary_type,
)?;
}
if !entry.content.is_empty() {
write_typed_text(
writer,
"content",
&entry.content,
entry.content_type,
)?;
}
if !entry.rights.is_empty() {
write_text_element(writer, "rights", &entry.rights)?;
}
for person in &entry.authors {
write_person(writer, "author", person)?;
}
for person in &entry.contributors {
write_person(writer, "contributor", person)?;
}
for link in &entry.links {
write_link(writer, link)?;
}
for category in &entry.categories {
write_category(writer, category)?;
}
writer.write_event(Event::End(BytesEnd::new("entry")))?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn minimal_feed() -> AtomFeed {
AtomFeed::new()
.id("urn:example:feed")
.title("Example")
.updated("2026-06-27T00:00:00Z")
.author_name("Tester")
}
#[test]
fn validate_rejects_missing_required_fields() {
let feed = AtomFeed::new();
let err = feed.validate().unwrap_err();
let RssError::ValidationErrors(errs) = err else {
panic!("expected ValidationErrors");
};
assert!(errs.iter().any(|e| e.field == "feed.id"
&& e.message == "feed.id is missing"));
assert!(errs.iter().any(|e| e.field == "feed.title"
&& e.message == "feed.title is missing"));
assert!(errs.iter().any(|e| e.field == "feed.updated"
&& e.message == "feed.updated is missing"));
}
#[test]
fn validate_rejects_non_rfc3339_updated() {
let feed = AtomFeed::new()
.id("urn:example:feed")
.title("Example")
.updated("yesterday afternoon")
.author_name("Tester");
let err = feed.validate().unwrap_err();
let RssError::ValidationErrors(errs) = err else {
panic!("expected ValidationErrors");
};
assert!(errs.iter().any(|e| e.field == "feed.updated"
&& e.message.starts_with(
"feed.updated is not a valid RFC 3339 timestamp"
)));
}
#[test]
fn entry_inherits_feed_author_requirement() {
let feed = AtomFeed::new()
.id("urn:example:feed")
.title("Example")
.updated("2026-06-27T00:00:00Z")
.add_entry(
AtomEntry::new()
.id("urn:example:entry-1")
.title("Entry 1")
.updated("2026-06-27T00:00:00Z"),
);
let err = feed.validate().unwrap_err();
let RssError::ValidationErrors(errs) = err else {
panic!("expected ValidationErrors");
};
assert!(errs.iter().any(|e| e.field == "entry.0.author"));
}
#[test]
fn entry_validate_uses_unindexed_prefix() {
let entry = AtomEntry::new();
let err = entry.validate().unwrap_err();
let RssError::ValidationErrors(errs) = err else {
panic!("expected ValidationErrors");
};
assert!(errs.iter().any(|e| e.field == "entry.id"
&& e.message == "entry.id is missing"));
assert!(errs.iter().any(|e| e.field == "entry.title"
&& e.message == "entry.title is missing"));
assert!(errs.iter().any(|e| e.field == "entry.updated"
&& e.message == "entry.updated is missing"));
}
#[test]
fn generate_minimal_feed_emits_required_elements() {
let xml = generate_atom(&minimal_feed()).unwrap();
assert!(xml
.contains(r#"<feed xmlns="http://www.w3.org/2005/Atom">"#));
assert!(xml.contains("<id>urn:example:feed</id>"));
assert!(xml.contains("<title>Example</title>"));
assert!(xml.contains("<updated>2026-06-27T00:00:00Z</updated>"));
assert!(xml.contains("<author>"));
assert!(xml.contains("<name>Tester</name>"));
}
#[test]
fn generate_feed_with_language_sets_xml_lang() {
let feed = minimal_feed().language("en-US");
let xml = generate_atom(&feed).unwrap();
assert!(xml.contains(r#"xml:lang="en-US""#));
}
#[test]
fn generate_feed_with_self_link_emits_rel_self() {
let feed =
minimal_feed().self_link("https://example.com/atom.xml");
let xml = generate_atom(&feed).unwrap();
assert!(xml.contains(
r#"<link href="https://example.com/atom.xml" rel="self"/>"#
));
}
#[test]
fn generate_entry_with_enclosure_emits_rel_enclosure() {
let feed = minimal_feed().add_entry(
AtomEntry::new()
.id("urn:example:ep-1")
.title("Episode 1")
.updated("2026-06-27T00:00:00Z")
.summary("Pilot episode")
.add_enclosure(
"https://example.com/ep-1.mp3",
"audio/mpeg",
12_345_678,
),
);
let xml = generate_atom(&feed).unwrap();
assert!(xml.contains(r#"rel="enclosure""#));
assert!(xml.contains(r#"type="audio/mpeg""#));
assert!(xml.contains(r#"length="12345678""#));
}
#[test]
fn generate_entry_with_html_content_sets_type_html() {
let feed = minimal_feed().add_entry(
AtomEntry::new()
.id("urn:example:post-1")
.title("Post 1")
.updated("2026-06-27T00:00:00Z")
.content_html("<p>Hello</p>"),
);
let xml = generate_atom(&feed).unwrap();
assert!(xml.contains(r#"<content type="html">"#));
assert!(xml.contains("<p>Hello</p>"));
}
#[test]
fn detect_feed_format_classifies_correctly() {
let rss = r#"<?xml version="1.0"?><rss version="2.0"><channel/></rss>"#;
let atom = r#"<?xml version="1.0"?><feed xmlns="http://www.w3.org/2005/Atom"><id/></feed>"#;
let rdf = r#"<?xml version="1.0"?><rdf:RDF xmlns:rdf="..."><channel/></rdf:RDF>"#;
let other = r#"<?xml version="1.0"?><html><body/></html>"#;
let unparseable = "not xml at all";
assert_eq!(detect_feed_format(rss), FeedFormat::Rss);
assert_eq!(detect_feed_format(atom), FeedFormat::Atom);
assert_eq!(detect_feed_format(rdf), FeedFormat::RssRdf);
assert_eq!(detect_feed_format(other), FeedFormat::Unknown);
assert_eq!(
detect_feed_format(unparseable),
FeedFormat::Unknown
);
}
#[test]
fn detect_treats_feed_without_atom_namespace_as_unknown() {
let no_ns = r#"<?xml version="1.0"?><feed><id/></feed>"#;
assert_eq!(detect_feed_format(no_ns), FeedFormat::Unknown);
}
#[test]
fn round_trip_detect_after_generate() {
let xml = generate_atom(&minimal_feed()).unwrap();
assert_eq!(detect_feed_format(&xml), FeedFormat::Atom);
}
#[test]
fn special_characters_are_escaped_in_text_payloads() {
let feed = AtomFeed::new()
.id("urn:example:feed")
.title("A & B < C > D")
.updated("2026-06-27T00:00:00Z")
.author_name("Tester");
let xml = generate_atom(&feed).unwrap();
assert!(xml.contains("<title>A & B < C > D</title>"));
}
}