use chrono::{DateTime, FixedOffset};
use html_escape::decode_html_entities;
use url::Url;
use crate::error::FeedError;
#[derive(Debug, Clone)]
pub struct Podcast {
pub title: String,
pub description: Option<String>,
pub link: Option<Url>,
pub author: Option<String>,
pub image_url: Option<Url>,
pub feed_url: Url,
pub episodes: Vec<Episode>,
}
#[derive(Debug, Clone)]
pub struct Episode {
pub title: String,
pub description: Option<String>,
pub pub_date: Option<DateTime<FixedOffset>>,
pub guid: Option<String>,
pub enclosure: Enclosure,
pub duration: Option<String>,
pub episode_number: Option<u32>,
pub season_number: Option<u32>,
}
#[derive(Debug, Clone)]
pub struct Enclosure {
pub url: Url,
pub length: Option<u64>,
pub mime_type: Option<String>,
}
pub fn parse_feed(xml_bytes: &[u8], feed_url: Url) -> Result<Podcast, FeedError> {
let channel = rss::Channel::read_from(xml_bytes)?;
let episodes = channel
.items()
.iter()
.filter_map(|item| parse_episode(item).ok())
.collect();
let image_url = channel
.image()
.and_then(|img| Url::parse(img.url()).ok())
.or_else(|| {
channel
.itunes_ext()
.and_then(|ext| ext.image())
.and_then(|url| Url::parse(url).ok())
});
let author = channel
.itunes_ext()
.and_then(|ext| ext.author().map(String::from))
.or_else(|| channel.managing_editor().map(String::from));
Ok(Podcast {
title: decode_html_entities(channel.title()).into_owned(),
description: Some(decode_html_entities(channel.description()).into_owned())
.filter(|s| !s.is_empty()),
link: Url::parse(channel.link()).ok(),
author: author.map(|a| decode_html_entities(&a).into_owned()),
image_url,
feed_url,
episodes,
})
}
fn parse_episode(item: &rss::Item) -> Result<Episode, FeedError> {
let title = item
.title()
.map(|t| decode_html_entities(t).into_owned())
.unwrap_or_else(|| "Untitled Episode".to_string());
let enclosure = item
.enclosure()
.ok_or_else(|| FeedError::MissingEnclosure {
title: title.clone(),
})?;
let enclosure_url = Url::parse(enclosure.url())?;
let pub_date = item.pub_date().and_then(|date_str| {
DateTime::parse_from_rfc2822(date_str)
.or_else(|_| parse_relaxed_date(date_str))
.ok()
});
let guid = item
.guid()
.map(|g| g.value().to_string())
.or_else(|| Some(enclosure.url().to_string()));
let itunes = item.itunes_ext();
Ok(Episode {
title,
description: item
.description()
.map(|d| decode_html_entities(d).into_owned()),
pub_date,
guid,
enclosure: Enclosure {
url: enclosure_url,
length: enclosure.length().parse().ok(),
mime_type: Some(enclosure.mime_type().to_string()).filter(|s| !s.is_empty()),
},
duration: itunes.and_then(|ext| ext.duration().map(String::from)),
episode_number: itunes.and_then(|ext| ext.episode().and_then(|e| e.parse().ok())),
season_number: itunes.and_then(|ext| ext.season().and_then(|s| s.parse().ok())),
})
}
fn parse_relaxed_date(date_str: &str) -> Result<DateTime<FixedOffset>, chrono::ParseError> {
let formats = [
"%a, %d %b %Y %H:%M:%S %z",
"%Y-%m-%dT%H:%M:%S%:z",
"%Y-%m-%d %H:%M:%S %z",
];
for format in formats {
if let Ok(dt) = DateTime::parse_from_str(date_str, format) {
return Ok(dt);
}
}
Err(chrono::DateTime::parse_from_rfc2822("invalid").unwrap_err())
}
#[cfg(test)]
mod tests {
use super::*;
const SAMPLE_FEED: &str = r#"<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
<channel>
<title>Test Podcast</title>
<description>A test podcast for unit testing</description>
<link>https://example.com</link>
<itunes:author>Test Author</itunes:author>
<itunes:image href="https://example.com/image.jpg"/>
<item>
<title>Episode 1</title>
<description>First episode</description>
<pubDate>Mon, 01 Jan 2024 12:00:00 +0000</pubDate>
<guid>ep1-guid</guid>
<enclosure url="https://example.com/ep1.mp3" length="1234567" type="audio/mpeg"/>
<itunes:duration>30:00</itunes:duration>
<itunes:episode>1</itunes:episode>
<itunes:season>1</itunes:season>
</item>
<item>
<title>Episode 2</title>
<enclosure url="https://example.com/ep2.mp3" type="audio/mpeg"/>
</item>
</channel>
</rss>"#;
#[test]
fn parse_feed_extracts_podcast_metadata() {
let feed_url = Url::parse("https://example.com/feed.xml").unwrap();
let podcast = parse_feed(SAMPLE_FEED.as_bytes(), feed_url.clone()).unwrap();
assert_eq!(podcast.title, "Test Podcast");
assert_eq!(
podcast.description,
Some("A test podcast for unit testing".to_string())
);
assert_eq!(podcast.author, Some("Test Author".to_string()));
assert_eq!(podcast.feed_url, feed_url);
}
#[test]
fn parse_feed_extracts_episodes() {
let feed_url = Url::parse("https://example.com/feed.xml").unwrap();
let podcast = parse_feed(SAMPLE_FEED.as_bytes(), feed_url).unwrap();
assert_eq!(podcast.episodes.len(), 2);
let ep1 = &podcast.episodes[0];
assert_eq!(ep1.title, "Episode 1");
assert_eq!(ep1.guid, Some("ep1-guid".to_string()));
assert_eq!(ep1.duration, Some("30:00".to_string()));
assert_eq!(ep1.episode_number, Some(1));
assert_eq!(ep1.season_number, Some(1));
assert_eq!(ep1.enclosure.length, Some(1234567));
}
#[test]
fn parse_feed_handles_missing_optional_fields() {
let feed_url = Url::parse("https://example.com/feed.xml").unwrap();
let podcast = parse_feed(SAMPLE_FEED.as_bytes(), feed_url).unwrap();
let ep2 = &podcast.episodes[1];
assert_eq!(ep2.title, "Episode 2");
assert!(ep2.pub_date.is_none());
assert!(ep2.duration.is_none());
assert!(ep2.episode_number.is_none());
}
#[test]
fn parse_feed_skips_items_without_enclosure() {
let feed_no_enclosure = r#"<?xml version="1.0"?>
<rss version="2.0">
<channel>
<title>Test</title>
<description>Test</description>
<item>
<title>No Audio</title>
</item>
</channel>
</rss>"#;
let feed_url = Url::parse("https://example.com/feed.xml").unwrap();
let podcast = parse_feed(feed_no_enclosure.as_bytes(), feed_url).unwrap();
assert!(podcast.episodes.is_empty());
}
#[test]
fn parse_feed_decodes_html_entities() {
let feed_with_entities = r#"<?xml version="1.0"?>
<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
<channel>
<title>Tom & Jerry's "Show"</title>
<description>A show about <cats> & dogs</description>
<itunes:author>Tom & Jerry</itunes:author>
<item>
<title>Episode: "The Chase" — Part 1</title>
<description>Jerry escapes & Tom chases…</description>
<enclosure url="https://example.com/ep.mp3" type="audio/mpeg"/>
</item>
</channel>
</rss>"#;
let feed_url = Url::parse("https://example.com/feed.xml").unwrap();
let podcast = parse_feed(feed_with_entities.as_bytes(), feed_url).unwrap();
assert_eq!(podcast.title, "Tom & Jerry's \"Show\"");
assert_eq!(
podcast.description,
Some("A show about <cats> & dogs".to_string())
);
assert_eq!(podcast.author, Some("Tom & Jerry".to_string()));
let ep = &podcast.episodes[0];
assert_eq!(ep.title, "Episode: \"The Chase\" — Part 1");
assert_eq!(
ep.description,
Some("Jerry escapes & Tom chases…".to_string())
);
}
}