use std::fmt::Write as _;
use std::future::Future;
use std::path::Path;
use std::pin::Pin;
use std::sync::Arc;
use axum::Router;
use axum::body::Body;
use axum::http::Response;
use axum::routing::get;
#[cfg(feature = "maud")]
use maud::{Markup, html};
#[derive(Debug, Clone)]
pub struct SitemapEntry {
pub loc: String,
pub lastmod: Option<String>,
pub changefreq: Option<SitemapChangefreq>,
pub priority: Option<f32>,
}
impl SitemapEntry {
pub fn new(loc: impl Into<String>) -> Self {
Self {
loc: loc.into(),
lastmod: None,
changefreq: None,
priority: None,
}
}
#[must_use]
pub fn lastmod(mut self, lastmod: impl Into<String>) -> Self {
self.lastmod = Some(lastmod.into());
self
}
#[must_use]
pub const fn changefreq(mut self, changefreq: SitemapChangefreq) -> Self {
self.changefreq = Some(changefreq);
self
}
#[must_use]
pub const fn priority(mut self, priority: f32) -> Self {
self.priority = Some(priority.clamp(0.0, 1.0));
self
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SitemapChangefreq {
Always,
Hourly,
Daily,
Weekly,
Monthly,
Yearly,
Never,
}
impl SitemapChangefreq {
#[must_use]
pub(crate) const fn as_str(self) -> &'static str {
match self {
Self::Always => "always",
Self::Hourly => "hourly",
Self::Daily => "daily",
Self::Weekly => "weekly",
Self::Monthly => "monthly",
Self::Yearly => "yearly",
Self::Never => "never",
}
}
}
pub trait SitemapSource: Send + Sync {
fn entries(&self) -> Pin<Box<dyn Future<Output = Vec<SitemapEntry>> + Send + '_>>;
}
#[doc(hidden)]
pub struct RegisteredSitemapSources(pub Vec<Arc<dyn SitemapSource>>);
#[doc(hidden)]
pub struct RegisteredSeoConfig(pub crate::config::SeoConfig);
#[must_use]
pub fn robots_txt(profile: &str, sitemap_url: Option<&str>, additional_rules: &[String]) -> String {
let mut txt = String::new();
let is_prod = matches!(profile, "prod" | "production");
if is_prod {
txt.push_str("User-agent: *\nAllow: /\n");
} else {
txt.push_str("User-agent: *\nDisallow: /\n");
}
for rule in additional_rules {
txt.push_str(rule);
txt.push('\n');
}
if let Some(url) = sitemap_url {
txt.push('\n');
txt.push_str("Sitemap: ");
txt.push_str(url);
txt.push('\n');
}
txt
}
#[must_use]
pub fn sitemap_xml(entries: &[SitemapEntry], _base_url: Option<&str>) -> String {
const CHUNK_SIZE: usize = 50_000;
if entries.len() > CHUNK_SIZE {
tracing::warn!(
count = entries.len(),
limit = CHUNK_SIZE,
"sitemap: entry count exceeds the {CHUNK_SIZE}-URL per-file limit; \
only the first {CHUNK_SIZE} entries will be served. \
Register a custom /sitemap.xml handler to serve a sitemap index for larger sites.",
);
return sitemap_urlset_xml(&entries[..CHUNK_SIZE]);
}
sitemap_urlset_xml(entries)
}
#[must_use]
pub(crate) fn sitemap_urlset_xml(entries: &[SitemapEntry]) -> String {
let mut xml = String::from(
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\
<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">",
);
for entry in entries {
xml.push_str("\n <url>");
xml.push_str("\n <loc>");
xml.push_str(&xml_escape(&entry.loc));
xml.push_str("</loc>");
if let Some(lastmod) = &entry.lastmod {
xml.push_str("\n <lastmod>");
xml.push_str(lastmod);
xml.push_str("</lastmod>");
}
if let Some(freq) = entry.changefreq {
xml.push_str("\n <changefreq>");
xml.push_str(freq.as_str());
xml.push_str("</changefreq>");
}
if let Some(prio) = entry.priority {
xml.push_str("\n <priority>");
write!(xml, "{prio:.1}").ok();
xml.push_str("</priority>");
}
xml.push_str("\n </url>");
}
xml.push_str("\n</urlset>");
xml
}
fn xml_escape(s: &str) -> String {
let mut escaped = String::with_capacity(s.len());
for c in s.chars() {
match c {
'&' => escaped.push_str("&"),
'<' => escaped.push_str("<"),
'>' => escaped.push_str(">"),
'"' => escaped.push_str("""),
'\'' => escaped.push_str("'"),
_ => escaped.push(c),
}
}
escaped
}
#[derive(Debug, Default, Clone)]
pub struct SeoMeta {
title: Option<String>,
description: Option<String>,
canonical: Option<String>,
og_title: Option<String>,
og_description: Option<String>,
og_image: Option<String>,
og_type: Option<String>,
og_url: Option<String>,
twitter_card: Option<String>,
twitter_title: Option<String>,
twitter_description: Option<String>,
twitter_image: Option<String>,
robots_directive: Option<String>,
}
impl SeoMeta {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn title(mut self, title: impl Into<String>) -> Self {
self.title = Some(title.into());
self
}
#[must_use]
pub fn description(mut self, description: impl Into<String>) -> Self {
self.description = Some(description.into());
self
}
#[must_use]
pub fn canonical(mut self, url: impl Into<String>) -> Self {
self.canonical = Some(url.into());
self
}
#[must_use]
pub fn og_image(mut self, url: impl Into<String>) -> Self {
self.og_image = Some(url.into());
self
}
#[must_use]
pub fn og_type(mut self, og_type: impl Into<String>) -> Self {
self.og_type = Some(og_type.into());
self
}
#[must_use]
pub fn og_title(mut self, title: impl Into<String>) -> Self {
self.og_title = Some(title.into());
self
}
#[must_use]
pub fn og_description(mut self, desc: impl Into<String>) -> Self {
self.og_description = Some(desc.into());
self
}
#[must_use]
pub fn og_url(mut self, url: impl Into<String>) -> Self {
self.og_url = Some(url.into());
self
}
#[must_use]
pub fn twitter_card(mut self, card_type: impl Into<String>) -> Self {
self.twitter_card = Some(card_type.into());
self
}
#[must_use]
pub fn twitter_title(mut self, title: impl Into<String>) -> Self {
self.twitter_title = Some(title.into());
self
}
#[must_use]
pub fn twitter_description(mut self, desc: impl Into<String>) -> Self {
self.twitter_description = Some(desc.into());
self
}
#[must_use]
pub fn twitter_image(mut self, url: impl Into<String>) -> Self {
self.twitter_image = Some(url.into());
self
}
#[must_use]
pub fn robots(mut self, directive: impl Into<String>) -> Self {
self.robots_directive = Some(directive.into());
self
}
#[cfg(feature = "maud")]
#[must_use]
pub fn render(&self) -> Markup {
let og_title = self.og_title.as_ref().or(self.title.as_ref());
let og_desc = self.og_description.as_ref().or(self.description.as_ref());
let twitter_title = self.twitter_title.as_ref().or(self.title.as_ref());
let twitter_desc = self
.twitter_description
.as_ref()
.or(self.description.as_ref());
let og_url = self.og_url.as_ref().or(self.canonical.as_ref());
let has_twitter = self.twitter_card.is_some();
html! {
@if let Some(title) = &self.title {
title { (title) }
}
@if let Some(desc) = &self.description {
meta name="description" content=(desc);
}
@if let Some(dir) = &self.robots_directive {
meta name="robots" content=(dir);
}
@if let Some(url) = &self.canonical {
link rel="canonical" href=(url);
}
@if let Some(t) = og_title {
meta property="og:title" content=(t);
}
@if let Some(d) = og_desc {
meta property="og:description" content=(d);
}
@if let Some(img) = &self.og_image {
meta property="og:image" content=(img);
}
@if let Some(ot) = &self.og_type {
meta property="og:type" content=(ot);
}
@if let Some(url) = og_url {
meta property="og:url" content=(url);
}
@if let Some(card) = &self.twitter_card {
meta name="twitter:card" content=(card);
}
@if has_twitter {
@if let Some(t) = twitter_title {
meta name="twitter:title" content=(t);
}
@if let Some(d) = twitter_desc {
meta name="twitter:description" content=(d);
}
}
@if let Some(img) = &self.twitter_image {
meta name="twitter:image" content=(img);
}
}
}
}
pub fn build_seo_router<S>(
profile: &str,
base_url: Option<&str>,
additional_rules: &[String],
) -> Router<S>
where
S: Clone + Send + Sync + 'static,
{
build_seo_router_with_entries(profile, base_url, additional_rules, &[])
}
pub fn build_seo_router_with_entries<S>(
profile: &str,
base_url: Option<&str>,
additional_rules: &[String],
entries: &[SitemapEntry],
) -> Router<S>
where
S: Clone + Send + Sync + 'static,
{
let base_url = base_url.map(|u| u.trim_end_matches('/'));
let sitemap_url = base_url.map(|b| format!("{b}/sitemap.xml"));
let robots_body = robots_txt(profile, sitemap_url.as_deref(), additional_rules);
let sitemap_body = sitemap_xml(entries, base_url);
build_seo_router_from_bodies(robots_body, sitemap_body)
}
pub fn build_seo_router_from_bodies<S>(robots_body: String, sitemap_body: String) -> Router<S>
where
S: Clone + Send + Sync + 'static,
{
Router::<S>::new()
.route(
"/robots.txt",
get(move || {
let body = robots_body.clone();
async move {
Response::builder()
.header("Content-Type", "text/plain; charset=utf-8")
.body(Body::from(body))
.unwrap()
}
}),
)
.route(
"/sitemap.xml",
get(move || {
let body = sitemap_body.clone();
async move {
Response::builder()
.header("Content-Type", "application/xml; charset=utf-8")
.body(Body::from(body))
.unwrap()
}
}),
)
}
pub(crate) const fn has_seo_config(seo_cfg: &crate::config::SeoConfig) -> bool {
seo_cfg.base_url.is_some()
|| !seo_cfg.robots.additional_rules.is_empty()
|| seo_cfg.robots.allow_all.is_some()
|| seo_cfg.robots.sitemap_url.is_some()
}
pub(crate) const fn effective_seo_profile(raw_profile: &str, allow_all: Option<bool>) -> &str {
match allow_all {
Some(true) => "prod",
Some(false) => "dev",
None => raw_profile,
}
}
pub(crate) async fn assemble_seo_bodies(
profile: &str,
base_url: Option<&str>,
sitemap_url_override: Option<&str>,
additional_rules: &[String],
sources: &[Arc<dyn SitemapSource>],
static_paths: &[&str],
) -> (String, String) {
let base_url = base_url.map(|u| u.trim_end_matches('/'));
let mut sitemap_entries = Vec::new();
for source in sources {
let mut entries = source.entries().await;
sitemap_entries.append(&mut entries);
}
if let Some(bu) = base_url {
for path in static_paths {
if !path.contains('{') {
sitemap_entries.push(SitemapEntry::new(format!("{bu}{path}")));
}
}
}
let derived_sitemap_url = base_url.map(|b| format!("{b}/sitemap.xml"));
let sitemap_url = sitemap_url_override.or(derived_sitemap_url.as_deref());
let robots_body = robots_txt(profile, sitemap_url, additional_rules);
let sitemap_body = sitemap_xml(&sitemap_entries, base_url);
(robots_body, sitemap_body)
}
pub async fn write_seo_files(
dist_dir: &Path,
profile: &str,
base_url: Option<&str>,
sitemap_url_override: Option<&str>,
additional_rules: &[String],
entries: &[SitemapEntry],
) -> Result<(), std::io::Error> {
let base_url = base_url.map(|u| u.trim_end_matches('/'));
let derived_sitemap_url = base_url.map(|b| format!("{b}/sitemap.xml"));
let sitemap_url = sitemap_url_override.or(derived_sitemap_url.as_deref());
let robots = robots_txt(profile, sitemap_url, additional_rules);
let sitemap = sitemap_xml(entries, base_url);
tokio::fs::write(dist_dir.join("robots.txt"), robots).await?;
tokio::fs::write(dist_dir.join("sitemap.xml"), sitemap).await?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sitemap_entry_builder() {
let e = SitemapEntry::new("https://example.com/")
.lastmod("2026-01-01")
.changefreq(SitemapChangefreq::Weekly)
.priority(0.9);
assert_eq!(e.loc, "https://example.com/");
assert_eq!(e.lastmod.as_deref(), Some("2026-01-01"));
assert_eq!(e.changefreq, Some(SitemapChangefreq::Weekly));
assert!((e.priority.unwrap() - 0.9).abs() < 0.001);
}
#[test]
fn sitemap_entry_priority_clamped() {
let hi = SitemapEntry::new("https://example.com/").priority(1.5);
let lo = SitemapEntry::new("https://example.com/").priority(-0.5);
assert!((hi.priority.unwrap() - 1.0).abs() < 0.001);
assert!((lo.priority.unwrap() - 0.0).abs() < 0.001);
}
#[test]
fn xml_escape_replaces_special_chars() {
assert_eq!(
xml_escape("a&b<c>d\"e'f"),
"a&b<c>d"e'f"
);
}
#[test]
fn robots_txt_staging_profile_disallows() {
let txt = robots_txt("staging", None, &[]);
assert!(txt.contains("Disallow: /"));
assert!(!txt.contains("Allow: /"));
}
#[test]
fn has_seo_config_false_when_empty() {
let cfg = crate::config::SeoConfig::default();
assert!(!has_seo_config(&cfg));
}
#[test]
fn has_seo_config_true_when_base_url_set() {
let cfg = crate::config::SeoConfig {
base_url: Some("https://example.com".to_string()),
..Default::default()
};
assert!(has_seo_config(&cfg));
}
#[test]
fn has_seo_config_true_when_allow_all_set() {
let cfg = crate::config::SeoConfig {
robots: crate::config::RobotsConfig {
allow_all: Some(true),
..Default::default()
},
..Default::default()
};
assert!(has_seo_config(&cfg));
}
#[test]
fn has_seo_config_true_when_sitemap_url_set() {
let cfg = crate::config::SeoConfig {
robots: crate::config::RobotsConfig {
sitemap_url: Some("https://example.com/sitemap.xml".to_string()),
..Default::default()
},
..Default::default()
};
assert!(has_seo_config(&cfg));
}
#[test]
fn has_seo_config_true_when_additional_rules_set() {
let cfg = crate::config::SeoConfig {
robots: crate::config::RobotsConfig {
additional_rules: vec!["Disallow: /admin".to_string()],
..Default::default()
},
..Default::default()
};
assert!(has_seo_config(&cfg));
}
#[test]
fn effective_seo_profile_respects_allow_all_true() {
assert_eq!(effective_seo_profile("dev", Some(true)), "prod");
}
#[test]
fn effective_seo_profile_respects_allow_all_false() {
assert_eq!(effective_seo_profile("prod", Some(false)), "dev");
}
#[test]
fn effective_seo_profile_falls_back_to_raw_when_none() {
assert_eq!(effective_seo_profile("staging", None), "staging");
}
struct SimpleSitemapSource {
entries: Vec<SitemapEntry>,
}
impl SitemapSource for SimpleSitemapSource {
fn entries(
&self,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Vec<SitemapEntry>> + Send + '_>>
{
let entries = self.entries.clone();
Box::pin(async move { entries })
}
}
#[tokio::test]
async fn assemble_seo_bodies_empty() {
let (robots, sitemap) = assemble_seo_bodies("prod", None, None, &[], &[], &[]).await;
assert!(robots.contains("Allow: /"));
assert!(sitemap.contains("<urlset"));
}
#[tokio::test]
async fn assemble_seo_bodies_collects_source_entries() {
let source = Arc::new(SimpleSitemapSource {
entries: vec![SitemapEntry::new("https://example.com/post/1")],
}) as Arc<dyn SitemapSource>;
let (_, sitemap) = assemble_seo_bodies(
"prod",
Some("https://example.com"),
None,
&[],
&[source],
&[],
)
.await;
assert!(
sitemap.contains("https://example.com/post/1"),
"should include source entry; got:\n{sitemap}"
);
}
#[tokio::test]
async fn assemble_seo_bodies_includes_static_paths() {
let (_, sitemap) = assemble_seo_bodies(
"prod",
Some("https://example.com"),
None,
&[],
&[],
&["/about", "/contact"],
)
.await;
assert!(sitemap.contains("https://example.com/about"));
assert!(sitemap.contains("https://example.com/contact"));
}
#[tokio::test]
async fn assemble_seo_bodies_skips_dynamic_paths() {
let (_, sitemap) = assemble_seo_bodies(
"prod",
Some("https://example.com"),
None,
&[],
&[],
&["/posts/{slug}"],
)
.await;
assert!(
!sitemap.contains("/posts/"),
"should skip paths with params; got:\n{sitemap}"
);
}
#[tokio::test]
async fn assemble_seo_bodies_uses_sitemap_url_override() {
let (robots, _) = assemble_seo_bodies(
"prod",
Some("https://example.com"),
Some("https://cdn.example.com/sitemap.xml"),
&[],
&[],
&[],
)
.await;
assert!(
robots.contains("Sitemap: https://cdn.example.com/sitemap.xml"),
"should use override url; got:\n{robots}"
);
}
#[tokio::test]
async fn assemble_seo_bodies_trims_trailing_slash() {
let (_, sitemap) = assemble_seo_bodies(
"prod",
Some("https://example.com/"),
None,
&[],
&[],
&["/about"],
)
.await;
assert!(
sitemap.contains("https://example.com/about"),
"base_url trailing slash should be trimmed; got:\n{sitemap}"
);
}
}