use super::helpers::{
extract_xml_value, parse_rfc2822_lenient, read_meta_sidecars, xml_escape,
};
use crate::plugin::{Plugin, PluginContext};
use anyhow::{Context, Result};
use std::fs;
#[derive(Debug, Clone, Copy)]
pub struct RssAggregatePlugin;
fn collect_articles(
meta_entries: &[(String, std::collections::HashMap<String, String>)],
base_url: &str,
) -> Vec<(String, String)> {
let mut articles: Vec<(String, String)> = Vec::new();
for (rel_path, meta) in meta_entries {
if rel_path.is_empty() {
continue;
}
let title = meta.get("title").cloned().unwrap_or_default();
let description = meta.get("description").cloned().unwrap_or_default();
let pub_date = meta.get("item_pub_date").cloned().unwrap_or_default();
let author = meta.get("author").cloned().unwrap_or_default();
let banner = meta.get("banner").or_else(|| meta.get("image")).cloned();
let category = meta.get("category").cloned();
let tags = meta.get("tags").cloned();
if title.is_empty() {
continue;
}
let link = if base_url.is_empty() {
format!("{rel_path}/")
} else {
format!("{base_url}/{rel_path}/")
};
let sort_key = parse_rfc2822_lenient(&pub_date)
.map_or_else(|| pub_date.clone(), |dt| dt.to_rfc3339());
let escaped_desc = xml_escape(&description);
let mut extras = String::new();
if let Some(ref img) = banner {
let img_url = if img.starts_with("http") {
img.clone()
} else if !base_url.is_empty() {
format!("{base_url}/{}", img.trim_start_matches('/'))
} else {
img.clone()
};
let mime = if img_url.ends_with(".webp") {
"image/webp"
} else if img_url.ends_with(".png") {
"image/png"
} else {
"image/jpeg"
};
extras.push_str(&format!(
"\n <enclosure url=\"{img_url}\" type=\"{mime}\" length=\"0\"/>"
));
}
if let Some(ref cat) = category {
extras.push_str(&format!(
"\n <category>{}</category>",
xml_escape(cat)
));
}
if let Some(ref t) = tags {
for tag in t.split(',') {
let tag = tag.trim();
if !tag.is_empty() {
extras.push_str(&format!(
"\n <category>{}</category>",
xml_escape(tag)
));
}
}
}
let item = format!(
r#" <item>
<title>{title}</title>
<link>{link}</link>
<description>{escaped_desc}</description>
<guid isPermaLink="true">{link}</guid>
<pubDate>{pub_date}</pubDate>
<author>{author}</author>{extras}
</item>"#
);
articles.push((sort_key, item));
}
articles
}
fn build_rss_channel(
channel_title: &str,
channel_link: &str,
channel_desc: &str,
base_url: &str,
language: &str,
last_build_date: &str,
copyright: &str,
items_xml: &str,
) -> String {
let mut channel_extras = String::new();
if !language.is_empty() {
channel_extras
.push_str(&format!("\n <language>{language}</language>"));
}
if !last_build_date.is_empty() {
channel_extras.push_str(&format!(
"\n <lastBuildDate>{last_build_date}</lastBuildDate>"
));
}
if !copyright.is_empty() {
channel_extras.push_str(&format!(
"\n <copyright>{}</copyright>",
xml_escape(copyright)
));
}
format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>{channel_title}</title>
<link>{channel_link}</link>
<description>{channel_desc}</description>
<atom:link href="{base_url}/rss.xml" rel="self" type="application/rss+xml"/>{channel_extras}
{items_xml}
</channel>
</rss>
"#
)
}
impl Plugin for RssAggregatePlugin {
fn name(&self) -> &'static str {
"rss-aggregate"
}
fn after_compile(&self, ctx: &PluginContext) -> Result<()> {
let rss_path = ctx.site_dir.join("rss.xml");
if !rss_path.exists() {
return Ok(());
}
let content = fs::read_to_string(&rss_path)
.with_context(|| format!("cannot read {}", rss_path.display()))?;
if content.matches("<item>").count() > 1 {
return Ok(());
}
let meta_entries =
read_meta_sidecars(&ctx.site_dir).unwrap_or_default();
let base_url = ctx
.config
.as_ref()
.map(|c| c.base_url.trim_end_matches('/').to_string())
.unwrap_or_default();
let language = extract_language(ctx);
let copyright = extract_copyright(&meta_entries);
let mut articles = collect_articles(&meta_entries, &base_url);
articles.sort_by(|a, b| b.0.cmp(&a.0));
articles.truncate(50);
if articles.is_empty() {
return Ok(());
}
let last_build_date = extract_last_build_date(&articles);
let items_xml: String = articles
.iter()
.map(|(_, xml)| xml.as_str())
.collect::<Vec<_>>()
.join("\n");
let channel_title = extract_xml_value(&content, "title")
.unwrap_or_else(|| "Untitled".to_string());
let channel_link = extract_xml_value(&content, "link")
.unwrap_or_else(|| base_url.clone());
let channel_desc =
extract_xml_value(&content, "description").unwrap_or_default();
let rebuilt = build_rss_channel(
&channel_title,
&channel_link,
&channel_desc,
&base_url,
&language,
&last_build_date,
©right,
&items_xml,
);
fs::write(&rss_path, rebuilt)
.with_context(|| format!("cannot write {}", rss_path.display()))?;
log::info!(
"[rss-aggregate] Rebuilt rss.xml with {} article items",
articles.len()
);
Ok(())
}
}
fn extract_language(ctx: &PluginContext) -> String {
ctx.config
.as_ref()
.and_then(|c| {
if c.site_name.is_empty() {
None
} else {
Some("en".to_string())
}
})
.unwrap_or_else(|| "en".to_string())
}
fn extract_copyright(
meta_entries: &[(String, std::collections::HashMap<String, String>)],
) -> String {
meta_entries
.iter()
.find_map(|(_, m)| m.get("copyright").cloned())
.unwrap_or_default()
}
fn extract_last_build_date(articles: &[(String, String)]) -> String {
articles
.first()
.and_then(|(_, xml)| {
xml.find("<pubDate>").and_then(|s| {
let after = &xml[s + 9..];
after.find("</pubDate>").map(|e| after[..e].to_string())
})
})
.unwrap_or_default()
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use crate::plugin::PluginContext;
use std::collections::HashMap;
use std::path::Path;
use tempfile::tempdir;
fn write_meta_sidecar(
dir: &Path,
slug: &str,
meta: &HashMap<String, String>,
) {
let page_dir = dir.join(slug);
fs::create_dir_all(&page_dir).expect("create page dir");
let meta_path = page_dir.join("page.meta.json");
let json = serde_json::to_string(meta).expect("serialize meta");
fs::write(&meta_path, json).expect("write meta");
}
fn make_atom_ctx(site_dir: &Path) -> PluginContext {
crate::test_support::init_logger();
let config = crate::cmd::SsgConfig {
base_url: "https://example.com".to_string(),
site_name: "Test Site".to_string(),
site_title: "Test Site".to_string(),
site_description: "A test site".to_string(),
language: "en".to_string(),
content_dir: std::path::PathBuf::from("content"),
output_dir: std::path::PathBuf::from("build"),
template_dir: std::path::PathBuf::from("templates"),
serve_dir: None,
i18n: None,
};
PluginContext::with_config(
Path::new("content"),
Path::new("build"),
site_dir,
Path::new("templates"),
config,
)
}
fn test_ctx(site_dir: &Path) -> PluginContext {
crate::test_support::init_logger();
PluginContext::new(
Path::new("content"),
Path::new("build"),
site_dir,
Path::new("templates"),
)
}
#[test]
fn test_rss_aggregate_single_item_trigger() -> Result<()> {
let tmp = tempdir()?;
let rss_path = tmp.path().join("rss.xml");
fs::write(
&rss_path,
r#"<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>My Site</title>
<link>https://example.com</link>
<description>A test site</description>
<item>
<title>Feed itself</title>
<link>https://example.com/rss.xml</link>
</item>
</channel>
</rss>"#,
)?;
let ctx = test_ctx(tmp.path());
RssAggregatePlugin.after_compile(&ctx)?;
Ok(())
}
#[test]
fn test_rss_aggregate_with_full_metadata() -> Result<()> {
let tmp = tempdir()?;
let rss_path = tmp.path().join("rss.xml");
fs::write(
&rss_path,
r#"<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Blog</title>
<link>https://example.com</link>
<description>A test blog</description>
<item>
<title>Placeholder</title>
</item>
</channel>
</rss>"#,
)?;
let mut meta = HashMap::new();
let _ = meta.insert("title".to_string(), "Article One".to_string());
let _ = meta.insert(
"description".to_string(),
"First article desc".to_string(),
);
let _ = meta.insert(
"item_pub_date".to_string(),
"Thu, 11 Apr 2026 06:06:06 +0000".to_string(),
);
let _ = meta.insert("author".to_string(), "Alice".to_string());
let _ = meta
.insert("banner".to_string(), "/images/banner.webp".to_string());
let _ = meta.insert("category".to_string(), "Technology".to_string());
let _ = meta.insert("tags".to_string(), "rust, web".to_string());
let _ = meta.insert(
"copyright".to_string(),
"Copyright 2026 Alice".to_string(),
);
write_meta_sidecar(tmp.path(), "article-one", &meta);
let ctx = make_atom_ctx(tmp.path());
RssAggregatePlugin.after_compile(&ctx)?;
let result = fs::read_to_string(&rss_path)?;
assert!(
result.contains(
"<enclosure url=\"https://example.com/images/banner.webp\""
),
"Should have enclosure with base_url prefix: {result}"
);
assert!(
result.contains("type=\"image/webp\""),
"Should detect webp MIME type: {result}"
);
assert!(
result.contains("<category>Technology</category>"),
"Should have category element: {result}"
);
assert!(
result.contains("<category>rust</category>"),
"Should have tag category 'rust': {result}"
);
assert!(
result.contains("<category>web</category>"),
"Should have tag category 'web': {result}"
);
assert!(
result.contains("<language>en</language>"),
"Should have language element: {result}"
);
assert!(
result.contains("<lastBuildDate>"),
"Should have lastBuildDate: {result}"
);
assert!(
result.contains("<copyright>Copyright 2026 Alice</copyright>"),
"Should have copyright: {result}"
);
Ok(())
}
#[test]
fn test_rss_aggregate_banner_with_image_field() -> Result<()> {
let tmp = tempdir()?;
let rss_path = tmp.path().join("rss.xml");
fs::write(
&rss_path,
r#"<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"><channel><title>T</title><link>https://example.com</link><description>D</description><item><title>X</title></item></channel></rss>"#,
)?;
let mut meta = HashMap::new();
let _ = meta.insert("title".to_string(), "Image Test".to_string());
let _ =
meta.insert("description".to_string(), "Testing image".to_string());
let _ = meta.insert(
"item_pub_date".to_string(),
"Mon, 01 Sep 2025 12:00:00 +0000".to_string(),
);
let _ = meta.insert("author".to_string(), "Bob".to_string());
let _ = meta.insert(
"image".to_string(),
"https://cdn.example.com/photo.png".to_string(),
);
write_meta_sidecar(tmp.path(), "img-test", &meta);
let ctx = make_atom_ctx(tmp.path());
RssAggregatePlugin.after_compile(&ctx)?;
let result = fs::read_to_string(&rss_path)?;
assert!(
result.contains("url=\"https://cdn.example.com/photo.png\""),
"Should use absolute image URL as-is: {result}"
);
assert!(
result.contains("type=\"image/png\""),
"Should detect png MIME type: {result}"
);
Ok(())
}
#[test]
fn test_rss_aggregate_jpeg_mime() -> Result<()> {
let tmp = tempdir()?;
let rss_path = tmp.path().join("rss.xml");
fs::write(
&rss_path,
r#"<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"><channel><title>T</title><link>https://example.com</link><description>D</description><item><title>X</title></item></channel></rss>"#,
)?;
let mut meta = HashMap::new();
let _ = meta.insert("title".to_string(), "JPEG Test".to_string());
let _ = meta.insert("description".to_string(), "desc".to_string());
let _ = meta.insert(
"item_pub_date".to_string(),
"Mon, 01 Sep 2025 12:00:00 +0000".to_string(),
);
let _ = meta.insert("author".to_string(), "Carol".to_string());
let _ = meta.insert("banner".to_string(), "/img/photo.jpg".to_string());
write_meta_sidecar(tmp.path(), "jpeg-test", &meta);
let ctx = make_atom_ctx(tmp.path());
RssAggregatePlugin.after_compile(&ctx)?;
let result = fs::read_to_string(&rss_path)?;
assert!(
result.contains("type=\"image/jpeg\""),
"Should default to image/jpeg for .jpg: {result}"
);
Ok(())
}
#[test]
fn test_rss_aggregate_skips_multi_item() -> Result<()> {
let tmp = tempdir()?;
let rss_path = tmp.path().join("rss.xml");
let original = r#"<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"><channel><title>T</title><link>x</link><description>D</description>
<item><title>A</title></item>
<item><title>B</title></item>
</channel></rss>"#;
fs::write(&rss_path, original)?;
let ctx = test_ctx(tmp.path());
RssAggregatePlugin.after_compile(&ctx)?;
let result = fs::read_to_string(&rss_path)?;
assert_eq!(result, original, "Should not modify feed with >1 items");
Ok(())
}
#[test]
fn test_collect_articles_empty_entries() {
let articles = collect_articles(&[], "https://example.com");
assert!(
articles.is_empty(),
"no meta entries should produce no articles"
);
}
#[test]
fn test_collect_articles_skips_empty_title() {
let mut meta = HashMap::new();
let _ =
meta.insert("description".to_string(), "no title here".to_string());
let entries = vec![("page".to_string(), meta)];
let articles = collect_articles(&entries, "https://example.com");
assert!(
articles.is_empty(),
"entries without title should be skipped"
);
}
#[test]
fn test_collect_articles_skips_empty_path() {
let mut meta = HashMap::new();
let _ = meta.insert("title".to_string(), "Has Title".to_string());
let entries = vec![(String::new(), meta)];
let articles = collect_articles(&entries, "https://example.com");
assert!(
articles.is_empty(),
"entries with empty path should be skipped"
);
}
#[test]
fn test_collect_articles_multiple_entries_sorted() {
let mut meta1 = HashMap::new();
let _ = meta1.insert("title".to_string(), "Older".to_string());
let _ = meta1.insert("description".to_string(), "old".to_string());
let _ = meta1.insert(
"item_pub_date".to_string(),
"Mon, 01 Jan 2024 00:00:00 +0000".to_string(),
);
let _ = meta1.insert("author".to_string(), "A".to_string());
let mut meta2 = HashMap::new();
let _ = meta2.insert("title".to_string(), "Newer".to_string());
let _ = meta2.insert("description".to_string(), "new".to_string());
let _ = meta2.insert(
"item_pub_date".to_string(),
"Wed, 01 Jan 2025 00:00:00 +0000".to_string(),
);
let _ = meta2.insert("author".to_string(), "B".to_string());
let entries = vec![
("old-post".to_string(), meta1),
("new-post".to_string(), meta2),
];
let mut articles = collect_articles(&entries, "https://example.com");
assert_eq!(articles.len(), 2);
articles.sort_by(|a, b| b.0.cmp(&a.0));
assert!(
articles[0].1.contains("<title>Newer</title>"),
"newest article should sort first"
);
}
#[test]
fn test_collect_articles_xml_escapes_description() {
let mut meta = HashMap::new();
let _ = meta.insert("title".to_string(), "Escape Test".to_string());
let _ = meta.insert(
"description".to_string(),
"Use <b>bold</b> & \"quotes\"".to_string(),
);
let _ = meta.insert("author".to_string(), "X".to_string());
let entries = vec![("esc".to_string(), meta)];
let articles = collect_articles(&entries, "");
assert_eq!(articles.len(), 1);
let xml = &articles[0].1;
assert!(
xml.contains("<b>bold</b>"),
"angle brackets should be escaped: {xml}"
);
assert!(xml.contains("&"), "ampersands should be escaped: {xml}");
}
#[test]
fn test_build_rss_channel_minimal() {
let result = build_rss_channel(
"Title",
"https://x.example",
"Desc",
"https://x.example",
"",
"",
"",
"",
);
assert!(result.contains("<title>Title</title>"));
assert!(result.contains("<link>https://x.example</link>"));
assert!(result.contains("<description>Desc</description>"));
assert!(
!result.contains("<language>"),
"no language when empty string supplied"
);
assert!(
!result.contains("<lastBuildDate>"),
"no lastBuildDate when empty string supplied"
);
}
#[test]
fn test_build_rss_channel_with_all_extras() {
let result = build_rss_channel(
"T",
"L",
"D",
"https://x.example",
"en",
"Mon, 01 Jan 2024 00:00:00 +0000",
"Copyright 2024 X",
"<item><title>A</title></item>",
);
assert!(result.contains("<language>en</language>"));
assert!(result.contains(
"<lastBuildDate>Mon, 01 Jan 2024 00:00:00 +0000</lastBuildDate>"
));
assert!(result.contains("<copyright>Copyright 2024 X</copyright>"));
assert!(result.contains("<item><title>A</title></item>"));
}
#[test]
fn test_extract_last_build_date_from_articles() {
let articles = vec![
("2025".to_string(), "<item><pubDate>Mon, 01 Sep 2025 12:00:00 +0000</pubDate></item>".to_string()),
("2024".to_string(), "<item><pubDate>Mon, 01 Jan 2024 00:00:00 +0000</pubDate></item>".to_string()),
];
let date = extract_last_build_date(&articles);
assert_eq!(date, "Mon, 01 Sep 2025 12:00:00 +0000");
}
#[test]
fn test_extract_last_build_date_empty() {
let articles: Vec<(String, String)> = vec![];
let date = extract_last_build_date(&articles);
assert!(date.is_empty());
}
#[test]
fn test_rss_no_file_is_noop() -> Result<()> {
let tmp = tempdir()?;
let ctx = test_ctx(tmp.path());
RssAggregatePlugin.after_compile(&ctx)?;
assert!(!tmp.path().join("rss.xml").exists());
Ok(())
}
}