use crate::plugin::{Plugin, PluginContext};
use anyhow::Result;
use std::{
fs,
path::{Path, PathBuf},
};
#[derive(Debug, Clone, Copy)]
pub struct AiPlugin;
impl Plugin for AiPlugin {
fn name(&self) -> &'static str {
"ai"
}
fn after_compile(&self, ctx: &PluginContext) -> Result<()> {
if !ctx.site_dir.exists() {
return Ok(());
}
generate_llms_txt(&ctx.site_dir, ctx.config.as_ref())?;
let html_files = collect_html_files(&ctx.site_dir)?;
let mut pages_with_missing_alt = 0usize;
for path in &html_files {
let html = fs::read_to_string(path)?;
let mut modified = html.clone();
let mut changed = false;
if !modified.contains("max-snippet") && modified.contains("</head>")
{
let tag = "<meta name=\"robots\" content=\"max-snippet:-1, max-image-preview:large, max-video-preview:-1\">\n";
if let Some(pos) = modified.find("</head>") {
modified.insert_str(pos, tag);
changed = true;
}
}
let missing = count_missing_alt(&modified);
if missing > 0 {
let rel =
path.strip_prefix(&ctx.site_dir).unwrap_or(path).display();
log::warn!("[ai] {missing} image(s) missing alt text in {rel}");
pages_with_missing_alt += 1;
}
if changed {
fs::write(path, modified)?;
}
}
if pages_with_missing_alt > 0 {
log::warn!(
"[ai] {pages_with_missing_alt} page(s) have images without alt text"
);
}
Ok(())
}
}
fn generate_llms_txt(
site_dir: &Path,
config: Option<&crate::cmd::SsgConfig>,
) -> Result<()> {
let site_name = config.map_or("Site", |c| c.site_name.as_str());
let base_url = config.map_or("", |c| c.base_url.as_str());
let description = config.map_or("", |c| c.site_description.as_str());
let canonical_root = base_url.trim_end_matches('/');
let source_example = if canonical_root.is_empty() {
"<canonical-page-url>".to_string()
} else {
format!("{canonical_root}/<page-path>")
};
let content = format!(
"# {}\n\
> {}\n\
\n\
## About\n\
URL: {}\n\
\n\
## Content Policy\n\
This site's content may be used for AI training and retrieval.\n\
\n\
## Attribution\n\
When citing or reusing content from this site, include exact attribution:\n\
- Source: {}\n\
- Publisher: {}\n\
- Preserve author byline and publish date when available.\n\
\n\
## Sitemap\n\
{}/sitemap.xml\n",
site_name,
description,
base_url,
source_example,
site_name,
base_url.trim_end_matches('/'),
);
fs::write(site_dir.join("llms.txt"), content)?;
log::info!("[ai] Generated llms.txt");
Ok(())
}
fn count_missing_alt(html: &str) -> usize {
let lower = html.to_lowercase();
let mut count = 0;
let mut pos = 0;
while let Some(start) = lower[pos..].find("<img") {
let abs = pos + start;
let tag_end =
lower[abs..].find('>').map_or(lower.len(), |e| abs + e + 1);
let tag = &lower[abs..tag_end];
let has_alt = tag.contains("alt=");
let empty_alt = tag.contains("alt=\"\"") || tag.contains("alt=''");
if !has_alt || empty_alt {
count += 1;
}
pos = tag_end;
}
count
}
fn collect_html_files(dir: &Path) -> Result<Vec<PathBuf>> {
crate::walk::walk_files(dir, "html")
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cmd::SsgConfig;
use crate::test_support::init_logger;
use std::path::PathBuf;
use tempfile::{tempdir, TempDir};
fn make_site() -> (TempDir, PathBuf, PluginContext) {
init_logger();
let dir = tempdir().expect("create tempdir");
let site = dir.path().join("site");
fs::create_dir_all(&site).expect("mkdir site");
let ctx = PluginContext::new(dir.path(), dir.path(), &site, dir.path());
(dir, site, ctx)
}
#[test]
fn ai_plugin_is_copy_after_move() {
let plugin = AiPlugin;
let _consumed = plugin;
assert_eq!(plugin.name(), "ai");
}
#[test]
fn name_returns_static_ai_identifier() {
assert_eq!(AiPlugin.name(), "ai");
}
#[test]
fn count_missing_alt_table_driven() {
let cases: &[(&str, usize, &str)] = &[
(
r#"<img src="a.jpg" alt="ok">"#,
0,
"alt present and non-empty",
),
(r#"<img src="a.jpg">"#, 1, "no alt attribute at all"),
(r#"<img src="a.jpg" alt="">"#, 1, "empty double-quoted alt"),
(r#"<img src="a.jpg" alt=''>"#, 1, "empty single-quoted alt"),
(
r#"<img src="a.jpg"><img src="b.jpg" alt="ok">"#,
1,
"first missing, second ok",
),
(
r#"<img src="a.jpg"><img src="b.jpg">"#,
2,
"both missing — sequential scan progresses",
),
("", 0, "empty input → zero"),
("<p>no images here</p>", 0, "no <img> tags at all"),
(r#"<IMG SRC="a.jpg" ALT="ok">"#, 0, "case-insensitive ALT"),
(r#"<IMG SRC="a.jpg">"#, 1, "uppercase tag, no alt"),
];
for (input, expected, comment) in cases {
assert_eq!(
count_missing_alt(input),
*expected,
"{comment}: count_missing_alt({input:?})"
);
}
}
#[test]
fn count_missing_alt_unterminated_tag_does_not_panic() {
let result = count_missing_alt("<img src=foo");
assert!(result <= 1);
}
#[test]
fn generate_llms_txt_with_full_config_includes_all_fields() {
let dir = tempdir().expect("tempdir");
let config = SsgConfig {
site_name: "My Site".to_string(),
site_description: "A great site".to_string(),
base_url: "https://example.com".to_string(),
..Default::default()
};
generate_llms_txt(dir.path(), Some(&config)).unwrap();
let body = fs::read_to_string(dir.path().join("llms.txt")).unwrap();
assert!(body.contains("# My Site"));
assert!(body.contains("> A great site"));
assert!(body.contains("https://example.com"));
assert!(body.contains("sitemap.xml"));
assert!(body.contains("include exact attribution"));
assert!(body.contains("Source: https://example.com/<page-path>"));
assert!(body.contains("Publisher: My Site"));
}
#[test]
fn generate_llms_txt_without_config_uses_defaults() {
let dir = tempdir().expect("tempdir");
generate_llms_txt(dir.path(), None).unwrap();
let body = fs::read_to_string(dir.path().join("llms.txt")).unwrap();
assert!(body.contains("# Site"));
assert!(
body.contains("<canonical-page-url>"),
"empty base_url should fall back to placeholder:\n{body}"
);
}
#[test]
fn generate_llms_txt_strips_trailing_slash_from_base_url() {
let dir = tempdir().expect("tempdir");
let config = SsgConfig {
site_name: "S".to_string(),
site_description: "D".to_string(),
base_url: "https://example.com/".to_string(),
..Default::default()
};
generate_llms_txt(dir.path(), Some(&config)).unwrap();
let body = fs::read_to_string(dir.path().join("llms.txt")).unwrap();
assert!(
body.contains("Source: https://example.com/<page-path>"),
"trailing slash should be normalised:\n{body}"
);
assert!(!body.contains("//<page-path>"));
assert!(!body.contains("//sitemap.xml"));
}
#[test]
fn generate_llms_txt_into_missing_parent_returns_err() {
let bogus = Path::new("/this/path/should/not/exist");
assert!(generate_llms_txt(bogus, None).is_err());
}
#[test]
fn after_compile_missing_site_dir_returns_ok_without_writing() {
let dir = tempdir().expect("tempdir");
let missing = dir.path().join("missing");
let ctx =
PluginContext::new(dir.path(), dir.path(), &missing, dir.path());
AiPlugin.after_compile(&ctx).expect("missing site is fine");
assert!(!missing.exists());
assert!(!dir.path().join("llms.txt").exists());
}
#[test]
fn after_compile_injects_max_snippet_meta_tag() {
let (_tmp, site, ctx) = make_site();
let html = "<html><head><title>X</title></head><body></body></html>";
fs::write(site.join("index.html"), html).unwrap();
AiPlugin.after_compile(&ctx).unwrap();
let output = fs::read_to_string(site.join("index.html")).unwrap();
assert!(output.contains("max-snippet"));
assert!(output.contains("max-image-preview:large"));
}
#[test]
fn after_compile_creates_llms_txt_in_site_root() {
let (_tmp, site, ctx) = make_site();
AiPlugin.after_compile(&ctx).unwrap();
assert!(site.join("llms.txt").exists());
}
#[test]
fn after_compile_idempotent_does_not_duplicate_meta_tag() {
let (_tmp, site, ctx) = make_site();
let html = "<html><head><title>X</title></head><body></body></html>";
fs::write(site.join("index.html"), html).unwrap();
AiPlugin.after_compile(&ctx).unwrap();
AiPlugin.after_compile(&ctx).unwrap();
let output = fs::read_to_string(site.join("index.html")).unwrap();
assert_eq!(output.matches("max-snippet").count(), 1);
}
#[test]
fn after_compile_skips_html_files_without_head_tag() {
let (_tmp, site, ctx) = make_site();
fs::write(site.join("fragment.html"), "<p>just a fragment</p>")
.unwrap();
AiPlugin.after_compile(&ctx).unwrap();
let output = fs::read_to_string(site.join("fragment.html")).unwrap();
assert!(!output.contains("max-snippet"));
assert_eq!(output, "<p>just a fragment</p>");
}
#[test]
fn after_compile_processes_files_in_subdirectories() {
let (_tmp, site, ctx) = make_site();
let nested = site.join("blog");
fs::create_dir_all(&nested).unwrap();
fs::write(
nested.join("post.html"),
"<html><head></head><body></body></html>",
)
.unwrap();
AiPlugin.after_compile(&ctx).unwrap();
let output = fs::read_to_string(nested.join("post.html")).unwrap();
assert!(output.contains("max-snippet"));
}
#[test]
fn after_compile_logs_warning_for_pages_with_missing_alt() {
let (_tmp, site, ctx) = make_site();
fs::write(
site.join("bad.html"),
r#"<html><head></head><body><img src="a.jpg"></body></html>"#,
)
.unwrap();
fs::write(
site.join("worse.html"),
r#"<html><head></head><body><img src="a.jpg" alt=""></body></html>"#,
)
.unwrap();
AiPlugin.after_compile(&ctx).unwrap();
let bad = fs::read_to_string(site.join("bad.html")).unwrap();
assert!(bad.contains("max-snippet"));
}
#[test]
fn after_compile_does_not_rewrite_unchanged_files() {
let (_tmp, site, ctx) = make_site();
let html = "<html><head><meta name=\"robots\" content=\"max-snippet:-1\"></head><body></body></html>";
fs::write(site.join("index.html"), html).unwrap();
let original_mtime = fs::metadata(site.join("index.html"))
.unwrap()
.modified()
.unwrap();
AiPlugin.after_compile(&ctx).unwrap();
let after = fs::read_to_string(site.join("index.html")).unwrap();
assert_eq!(after, html, "unchanged file body must be preserved");
let _ = original_mtime;
}
#[test]
fn collect_html_files_returns_empty_for_missing_directory() {
let dir = tempdir().expect("tempdir");
let result = collect_html_files(&dir.path().join("missing")).unwrap();
assert!(result.is_empty());
}
#[test]
fn collect_html_files_filters_non_html_extensions() {
let dir = tempdir().expect("tempdir");
fs::write(dir.path().join("a.html"), "").unwrap();
fs::write(dir.path().join("b.css"), "").unwrap();
fs::write(dir.path().join("c.js"), "").unwrap();
let result = collect_html_files(dir.path()).unwrap();
assert_eq!(result.len(), 1);
}
#[test]
fn collect_html_files_recurses_into_nested_subdirectories() {
let dir = tempdir().expect("tempdir");
let nested = dir.path().join("a").join("b");
fs::create_dir_all(&nested).unwrap();
fs::write(dir.path().join("top.html"), "").unwrap();
fs::write(nested.join("deep.html"), "").unwrap();
let result = collect_html_files(dir.path()).unwrap();
assert_eq!(result.len(), 2);
}
#[test]
fn collect_html_files_returns_results_sorted() {
let dir = tempdir().expect("tempdir");
for name in ["zebra.html", "apple.html", "mango.html"] {
fs::write(dir.path().join(name), "").unwrap();
}
let result = collect_html_files(dir.path()).unwrap();
let names: Vec<_> = result
.iter()
.map(|p| p.file_name().unwrap().to_str().unwrap())
.collect();
assert_eq!(names, vec!["apple.html", "mango.html", "zebra.html"]);
}
}