aphid 0.2.2

A static site generator for blogs and wikis, with wiki-links across both.
Documentation
use std::io::Cursor;

use quick_xml::Writer;
use quick_xml::events::{BytesDecl, BytesText};

use super::RootArtifact;
use crate::content::Site;
use crate::content::page::PageKind;
use crate::content::slug::Slug;
use crate::markdown::RenderedSite;

const SITEMAP_NS: &str = "http://www.sitemaps.org/schemas/sitemap/0.9";

/// A `sitemap.xml` built from the loaded site content.
///
/// Blog posts always carry `<lastmod>` (from `updated` or `created`). Wiki
/// pages include it only when a date is present in their frontmatter.
/// Standalone pages, index pages, and tag pages omit `<lastmod>`.
pub(super) struct Sitemap;

impl RootArtifact for Sitemap {
    fn filename(&self) -> &'static str {
        "sitemap.xml"
    }

    fn render(&self, rendered: &RenderedSite<'_>) -> Vec<u8> {
        tracing::debug!("generating sitemap.xml");
        let entries = collect_entries(rendered.site());
        write_xml(&entries)
    }
}

struct Entry {
    loc: String,
    lastmod: Option<String>,
}

impl Entry {
    fn new(site: &Site, url_path: &str, lastmod: Option<String>) -> Self {
        Self {
            loc: site.config.absolute_url(url_path).into(),
            lastmod,
        }
    }
}

fn collect_entries(site: &Site) -> Vec<Entry> {
    let mut entries = Vec::new();

    entries.push(Entry::new(site, "/", None));
    entries.push(Entry::new(site, "/blog/", None));

    for post in &site.blog {
        let url = PageKind::Blog.url_path(&post.slug);
        let date = post.frontmatter.updated.unwrap_or(post.frontmatter.created);
        entries.push(Entry::new(site, &url, Some(date.to_string())));
    }

    entries.push(Entry::new(site, "/wiki/", None));

    for page in &site.wiki {
        let url = PageKind::Wiki.url_path(&page.slug);
        let date = page
            .frontmatter
            .updated
            .or(page.frontmatter.created)
            .map(|d| d.to_string());
        entries.push(Entry::new(site, &url, date));
    }

    for page in &site.pages {
        let url = PageKind::Page.url_path(&page.slug);
        entries.push(Entry::new(site, &url, None));
    }

    if !site.tag_index.is_empty() {
        entries.push(Entry::new(site, "/tags/", None));
    }

    let mut tags: Vec<&String> = site.tag_index.keys().collect();
    tags.sort();
    for tag in tags {
        let slug: Slug = tag.as_str().into();
        entries.push(Entry::new(site, &format!("/tags/{slug}/"), None));
    }

    entries
}

fn write_xml(entries: &[Entry]) -> Vec<u8> {
    let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 2);

    writer
        .write_event(quick_xml::events::Event::Decl(BytesDecl::new(
            "1.0",
            Some("UTF-8"),
            None,
        )))
        .expect("write XML decl");

    writer
        .create_element("urlset")
        .with_attribute(("xmlns", SITEMAP_NS))
        .write_inner_content(|w| {
            for entry in entries {
                w.create_element("url").write_inner_content(|w| {
                    w.create_element("loc")
                        .write_text_content(BytesText::new(&entry.loc))?;
                    if let Some(ref date) = entry.lastmod {
                        w.create_element("lastmod")
                            .write_text_content(BytesText::new(date))?;
                    }
                    Ok(())
                })?;
            }
            Ok(())
        })
        .expect("write sitemap XML");

    let mut bytes = writer.into_inner().into_inner();
    bytes.push(b'\n');
    bytes
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::markdown::MarkdownRenderer;

    #[test]
    fn well_formed() {
        let config: crate::config::Config = "title = \"T\"\nbase_url = \"https://example.com\""
            .parse()
            .unwrap();
        let site = Site::from_parts(config, vec![], vec![], vec![]).unwrap();
        let rendered = MarkdownRenderer::new(&site).render_site();
        let xml = String::from_utf8(Sitemap.render(&rendered)).unwrap();

        assert!(xml.starts_with("<?xml"));
        assert!(xml.contains("<urlset"));
        assert!(xml.contains("</urlset>"));
        assert!(xml.contains("<loc>https://example.com/</loc>"));
        assert!(xml.contains("<loc>https://example.com/blog/</loc>"));
        assert!(xml.contains("<loc>https://example.com/wiki/</loc>"));
    }

    #[test]
    fn includes_blog_dates() {
        use crate::content::frontmatter::BlogFrontmatter;
        use crate::content::page::Page;
        use chrono::NaiveDate;
        use std::path::PathBuf;

        let config: crate::config::Config = "title = \"T\"\nbase_url = \"https://example.com\""
            .parse()
            .unwrap();
        let blog = vec![Page {
            slug: "hello".into(),
            body: String::new(),
            path: PathBuf::from("content/blog/hello.md"),
            frontmatter: BlogFrontmatter {
                title: "Hello".into(),
                slug: "hello".into(),
                author: "A".into(),
                created: NaiveDate::from_ymd_opt(2026, 1, 15).unwrap(),
                updated: None,
                image: None,
                description: None,
                tags: vec![],
                draft: false,
            },
        }];
        let site = Site::from_parts(config, blog, vec![], vec![]).unwrap();
        let rendered = MarkdownRenderer::new(&site).render_site();
        let xml = String::from_utf8(Sitemap.render(&rendered)).unwrap();

        assert!(xml.contains("<loc>https://example.com/blog/hello/</loc>"));
        assert!(xml.contains("<lastmod>2026-01-15</lastmod>"));
    }

    #[test]
    fn escapes_ampersands() {
        let entries = vec![Entry {
            loc: "https://example.com/a&b/".into(),
            lastmod: None,
        }];
        let xml = String::from_utf8(write_xml(&entries)).unwrap();
        assert!(xml.contains("https://example.com/a&amp;b/"));
    }
}