use std::io::{BufReader, BufWriter, Write};
use std::path::PathBuf;
use std::{borrow::Cow, fs::File};
use std::{fs, mem};
use atom_syndication::{self as atom, Entry, Generator};
use chrono::{DateTime, TimeDelta, Utc};
use log::{info, trace};
use uriparse::URI;
use crate::webpage::WebPage;
use crate::{base62, Error};
pub const MIN_ENTRIES: usize = 50;
pub const TRIM_AGE: TimeDelta = TimeDelta::days(30);
pub struct Feed {
path: PathBuf,
feed: atom_syndication::Feed,
}
pub enum AddResult {
Added,
Duplicate,
}
impl Feed {
pub fn read<P: Into<PathBuf>>(path: P) -> Result<Feed, Error> {
let path = path.into();
let file = File::open(&path)?;
let feed = atom::Feed::read_from(BufReader::new(file))?;
Ok(Feed { feed, path })
}
pub fn generate_new<P: Into<PathBuf>>(path: P) -> Self {
let feed = atom::Feed {
title: env!("CARGO_PKG_NAME").into(),
updated: Utc::now().into(),
..Default::default()
};
let mut feed = Feed {
feed,
path: path.into(),
};
feed.set_feed_id();
feed.set_feed_author();
feed.set_generator();
feed
}
pub fn add_url_if_new(&mut self, url: &URI, page: WebPage) -> AddResult {
let url_str = url.to_string();
let duplicate = self
.feed
.entries()
.iter()
.any(|entry| entry.links().iter().any(|link| link.href() == &url_str));
if duplicate {
AddResult::Duplicate
} else {
self.add_url(url, page);
AddResult::Added
}
}
fn add_url(&mut self, url: &URI, page: WebPage) {
info!("Add {}", url);
let now: DateTime<Utc> = Utc::now();
let link = atom::Link {
href: url.to_string(),
rel: "alternate".to_string(),
..Default::default()
};
let authors = page
.author
.map(|author| {
vec![atom::Person {
name: author,
..Default::default()
}]
})
.unwrap_or_default();
let entry = atom::Entry {
title: page.title.unwrap_or_else(|| "Untitled".to_string()).into(),
id: unique_tag_id(),
updated: now.into(),
summary: Some(summary_for_url(url, page.description)),
links: vec![link],
authors,
..Default::default()
};
self.feed.entries.push(entry);
self.set_generator();
self.feed.set_updated(now);
}
pub fn trim_entries(&mut self) {
trim_entries(&mut self.feed.entries, MIN_ENTRIES, TRIM_AGE);
}
pub fn save(&self) -> Result<(), Error> {
let tmp_path = self.path.with_extension("tmp");
{
let tmp_file = File::create(&tmp_path)?;
let writer = BufWriter::new(tmp_file);
let mut writer = self.feed.write_to(writer)?;
writer.flush()?;
trace!("Wrote {}", tmp_path.display())
}
trace!("Move {} -> {}", tmp_path.display(), self.path.display());
fs::rename(tmp_path, &self.path).map_err(Error::from)
}
fn set_feed_id(&mut self) {
self.feed.set_id(unique_tag_id());
}
fn set_feed_author(&mut self) {
let author = atom::Person {
name: env!("CARGO_PKG_NAME").to_string(),
uri: Some(env!("CARGO_PKG_HOMEPAGE").to_string()),
..Default::default()
};
self.feed.set_authors(vec![author]);
}
fn set_generator(&mut self) {
let generator = Generator {
value: env!("CARGO_PKG_NAME").to_string(),
version: Some(env!("CARGO_PKG_VERSION").to_string()),
..Default::default()
};
self.feed.set_generator(generator);
}
}
fn trim_entries(entries: &mut Vec<Entry>, min_entries: usize, trim_age: TimeDelta) {
if entries.len() <= min_entries {
return;
}
entries.sort_by(|a, b| a.updated().cmp(b.updated()));
let now: DateTime<Utc> = Utc::now();
let mut num_trim = entries.len() - min_entries;
let new_entries = mem::take(entries);
*entries = new_entries
.into_iter()
.filter(|entry| {
if num_trim == 0 {
return true;
}
let age = now - <DateTime<Utc>>::from(*entry.updated());
if age > trim_age {
info!("Trim entry {}: {}", entry.id(), entry.title().as_str());
num_trim -= 1;
false
} else {
true
}
})
.collect();
}
fn summary_for_url(url: &URI, description: Option<String>) -> atom::Text {
let video_id = is_youtube(url).then(|| youtube_video_id(url)).flatten();
if let Some(video_id) = video_id {
let mut summary = format!(
r#"<iframe width="560" height="315" src="https://www.youtube.com/embed/{video_id}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>"#,
);
if let Some(desc) = description.as_deref() {
summary.push_str("<div>");
summary.push_str(desc); summary.push_str("</div>");
}
atom::Text::html(summary)
} else {
match description {
Some(desc) => atom::Text::plain(desc),
None => atom::Text::html(format!(r#"<a href="{url}">{url}</a>"#)),
}
}
}
fn is_youtube(url: &URI) -> bool {
let Some(host) = url.host() else {
return false;
};
match host {
uriparse::Host::IPv4Address(_) => false,
uriparse::Host::IPv6Address(_) => false,
uriparse::Host::RegisteredName(name) => matches!(
name.as_str(),
"www.youtube.com" | "youtu.be" | "m.youtube.com" | "youtube-nocookie.com"
),
}
}
fn is_short(url: &URI) -> bool {
let Some(host) = url.host() else {
return false;
};
match host {
uriparse::Host::IPv4Address(_) => false,
uriparse::Host::IPv6Address(_) => false,
uriparse::Host::RegisteredName(name) => name == "youtu.be",
}
}
fn youtube_video_id<'a>(url: &'a URI) -> Option<Cow<'a, str>> {
let id = url
.query()
.and_then(|q| {
form_urlencoded::parse(q.as_bytes()).find_map(|(key, value)| {
if key == "v" {
Some(value)
} else {
None
}
})
})
.or_else(|| match url.path().segments() {
[first, id] if first == "v" => Some(Cow::Borrowed(id.as_str())),
[id] if is_short(url) => Some(Cow::Borrowed(id.as_str())),
_ => None,
});
id
}
fn unique_tag_id() -> String {
let specific = base62::base62::<16>();
format!("tag:feedlynx.7bit.org,2024:{specific}")
}
#[cfg(test)]
mod tests {
use chrono::FixedOffset;
use super::*;
fn test_entry(title: atom::Text, updated: DateTime<FixedOffset>) -> Entry {
atom::Entry {
title,
id: unique_tag_id(),
updated,
summary: Some("Summary".into()),
authors: vec![atom::Person {
name: "Author".to_string(),
..Default::default()
}],
..Default::default()
}
}
#[test]
fn test_video_id_direct() {
let url = URI::try_from("https://www.youtube.com/watch?v=u1wfCnRINkE").unwrap();
assert!(is_youtube(&url));
assert_eq!(youtube_video_id(&url).unwrap(), "u1wfCnRINkE");
}
#[test]
fn test_video_id_short() {
let url = URI::try_from("https://youtu.be/u1wfCnRINkE").unwrap();
assert!(is_youtube(&url));
assert_eq!(youtube_video_id(&url).unwrap(), "u1wfCnRINkE");
}
#[test]
fn test_video_id_fullscreen() {
let url = URI::try_from("https://www.youtube.com/v/u1wfCnRINkE").unwrap();
assert!(is_youtube(&url));
assert_eq!(youtube_video_id(&url).unwrap(), "u1wfCnRINkE");
}
#[test]
fn test_video_id_fullscreen_param() {
let url = URI::try_from("https://www.youtube.com/v/u1wfCnRINkE?version=3").unwrap();
assert!(is_youtube(&url));
assert_eq!(youtube_video_id(&url).unwrap(), "u1wfCnRINkE");
}
#[test]
fn test_video_id_channel_url() {
let url =
URI::try_from("https://www.youtube.com/channel/UCLi0H57HGGpAdCkVOb_ykVg").unwrap();
assert!(is_youtube(&url));
assert_eq!(youtube_video_id(&url), None);
}
#[test]
fn test_trim_less_than_min() {
let now = Utc::now();
let updated = (now - TimeDelta::seconds(5)).into();
let entry = test_entry("Test".into(), updated);
let mut entries = vec![entry];
trim_entries(&mut entries, 3, TimeDelta::seconds(1));
assert_eq!(entries.len(), 1);
}
#[test]
fn text_trim_young() {
let now = Utc::now();
let updated = (now - TimeDelta::seconds(5)).into();
let entry = test_entry("Test".into(), updated);
let mut entries = vec![entry; 3];
trim_entries(&mut entries, 2, TimeDelta::seconds(10));
assert_eq!(entries.len(), 3);
}
#[test]
fn test_trim_one_old() {
let now = Utc::now();
let entry = test_entry("Test".into(), (now - TimeDelta::seconds(5)).into());
let mut entries = vec![entry; 3];
let entry = test_entry("Old".into(), (now - TimeDelta::seconds(15)).into());
entries.push(entry);
trim_entries(&mut entries, 2, TimeDelta::seconds(10));
assert_eq!(entries.len(), 3);
}
#[test]
fn test_trim_all_old() {
let now = Utc::now();
let mut entries = (0..4)
.map(|i| {
test_entry(
format!("Test {}", i + 1).into(),
(now - TimeDelta::seconds(11 + i)).into(),
)
})
.collect::<Vec<_>>();
trim_entries(&mut entries, 2, TimeDelta::seconds(10));
let titles = entries
.iter()
.map(|entry| entry.title().as_str())
.collect::<Vec<_>>();
assert_eq!(titles, ["Test 2", "Test 1"]);
}
}