use crate::plugin::{Plugin, PluginContext};
use anyhow::Result;
use std::{fs, path::Path, process::Command};
#[derive(Debug, Clone)]
pub struct LlmConfig {
pub model: String,
pub endpoint: String,
pub dry_run: bool,
pub target_grade: f64,
pub max_refinement_attempts: usize,
}
impl Default for LlmConfig {
fn default() -> Self {
Self {
model: "llama3".to_string(),
endpoint: "http://localhost:11434".to_string(),
dry_run: false,
target_grade: 8.0,
max_refinement_attempts: 1,
}
}
}
#[derive(Debug)]
pub struct LlmPlugin {
config: LlmConfig,
}
impl LlmPlugin {
#[must_use]
pub const fn new(config: LlmConfig) -> Self {
Self { config }
}
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct FileAuditResult {
pub path: String,
pub grade_level: f64,
pub reading_ease: f64,
pub avg_sentence_len: f64,
pub passes: bool,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct AuditReport {
pub target_grade: f64,
pub total_files: usize,
pub passing: usize,
pub failing: usize,
pub results: Vec<FileAuditResult>,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct AiFixResult {
pub path: String,
pub before_grade: f64,
pub after_grade: f64,
pub improved: bool,
pub action: String,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct AiFixReport {
pub total_audited: usize,
pub total_failing: usize,
pub total_fixed: usize,
pub results: Vec<AiFixResult>,
}
impl LlmPlugin {
pub fn audit_all(
content_dir: &Path,
target_grade: f64,
) -> Result<AuditReport> {
let md_files =
crate::walk::walk_files(content_dir, "md").unwrap_or_default();
let mut results = Vec::with_capacity(md_files.len());
for path in &md_files {
let Ok(content) = fs::read_to_string(path) else {
continue; };
let body = strip_frontmatter(&content);
let lang = extract_frontmatter_lang(&content);
let audit = ReadabilityAudit::analyze_with_lang(&body, &lang);
let rel = path
.strip_prefix(content_dir)
.unwrap_or(path)
.to_string_lossy()
.to_string();
results.push(FileAuditResult {
path: rel,
grade_level: (audit.grade_level * 10.0).round() / 10.0,
reading_ease: (audit.reading_ease * 10.0).round() / 10.0,
avg_sentence_len: (audit.avg_sentence_len * 10.0).round()
/ 10.0,
passes: audit.grade_level <= target_grade,
});
}
let passing = results.iter().filter(|r| r.passes).count();
let failing = results.len() - passing;
Ok(AuditReport {
target_grade,
total_files: results.len(),
passing,
failing,
results,
})
}
pub fn audit_and_fix(
content_dir: &Path,
config: &LlmConfig,
) -> Result<usize> {
if !is_ollama_available(&config.endpoint) {
log::warn!(
"[llm] Ollama not reachable at {}, skipping auto-fix",
config.endpoint
);
return Ok(0);
}
let report = Self::audit_all(content_dir, config.target_grade)?;
let failing: Vec<_> =
report.results.iter().filter(|r| !r.passes).collect();
if failing.is_empty() {
log::info!(
"[llm] All {} file(s) pass grade {:.0}",
report.total_files,
config.target_grade
);
return Ok(0);
}
log::info!(
"[llm] {} file(s) exceed grade {:.0}, attempting refinement",
failing.len(),
config.target_grade
);
let mut rewritten = 0usize;
for result in &failing {
let path = content_dir.join(&result.path);
let original = fs::read_to_string(&path)?;
let (frontmatter_block, body) = split_frontmatter(&original);
let body_trimmed = body.trim();
if body_trimmed.is_empty() {
continue;
}
let prompt = format!(
"Rewrite this Markdown content at a 6th-grade reading level. \
Rules:\n\
- Max 20 words per sentence\n\
- Max 4 sentences per paragraph\n\
- Use simple, common words\n\
- Keep ALL facts, numbers, dates, and code blocks exactly the same\n\
- Keep ALL Markdown headings (#, ##, ###) and formatting\n\
- Return ONLY the rewritten Markdown, nothing else\n\n\
{body_trimmed}"
);
if let Some(refined) = generate_with_refinement(
&config.endpoint,
&config.model,
&prompt,
config.target_grade,
config.max_refinement_attempts,
) {
let refined_audit = ReadabilityAudit::analyze(&refined);
let original_audit = ReadabilityAudit::analyze(body_trimmed);
if refined_audit.grade_level < original_audit.grade_level {
if config.dry_run {
log::info!(
"[llm] [dry-run] {}: grade {:.1} → {:.1}",
result.path,
original_audit.grade_level,
refined_audit.grade_level
);
} else {
let output =
format!("{frontmatter_block}\n{refined}\n");
fs::write(&path, output)?;
log::info!(
"[llm] Rewrote {}: grade {:.1} → {:.1}",
result.path,
original_audit.grade_level,
refined_audit.grade_level
);
rewritten += 1;
}
} else {
log::warn!(
"[llm] Could not improve {}: grade {:.1} (refined: {:.1})",
result.path,
original_audit.grade_level,
refined_audit.grade_level
);
}
}
}
Ok(rewritten)
}
pub fn audit_and_fix_with_report(
content_dir: &Path,
config: &LlmConfig,
) -> Result<AiFixReport> {
if !is_ollama_available(&config.endpoint) {
log::warn!(
"[ai-fix] Ollama not reachable at {}, skipping",
config.endpoint
);
return Ok(AiFixReport {
total_audited: 0,
total_failing: 0,
total_fixed: 0,
results: vec![],
});
}
let report = Self::audit_all(content_dir, config.target_grade)?;
let failing: Vec<_> =
report.results.iter().filter(|r| !r.passes).collect();
let mut fix_results = Vec::new();
for result in &failing {
let path = content_dir.join(&result.path);
let Ok(original) = fs::read_to_string(&path) else {
fix_results.push(AiFixResult {
path: result.path.clone(),
before_grade: result.grade_level,
after_grade: result.grade_level,
improved: false,
action: "skipped".to_string(),
});
continue;
};
let (frontmatter_block, body) = split_frontmatter(&original);
let body_trimmed = body.trim();
if body_trimmed.is_empty() {
fix_results.push(AiFixResult {
path: result.path.clone(),
before_grade: result.grade_level,
after_grade: result.grade_level,
improved: false,
action: "skipped".to_string(),
});
continue;
}
let prompt = format!(
"Rewrite this Markdown content at a 6th-grade reading level. \
Rules:\n\
- Max 20 words per sentence\n\
- Max 4 sentences per paragraph\n\
- Use simple, common words\n\
- Keep ALL facts, numbers, dates, and code blocks exactly the same\n\
- Keep ALL Markdown headings (#, ##, ###) and formatting\n\
- Return ONLY the rewritten Markdown, nothing else\n\n\
{body_trimmed}"
);
if let Some(refined) = generate_with_refinement(
&config.endpoint,
&config.model,
&prompt,
config.target_grade,
config.max_refinement_attempts,
) {
let refined_audit = ReadabilityAudit::analyze(&refined);
let original_audit = ReadabilityAudit::analyze(body_trimmed);
if refined_audit.grade_level < original_audit.grade_level {
if !config.dry_run {
let output =
format!("{frontmatter_block}\n{refined}\n");
fs::write(&path, output)?;
}
fix_results.push(AiFixResult {
path: result.path.clone(),
before_grade: (original_audit.grade_level * 10.0)
.round()
/ 10.0,
after_grade: (refined_audit.grade_level * 10.0).round()
/ 10.0,
improved: true,
action: if config.dry_run {
"dry-run".to_string()
} else {
"rewritten".to_string()
},
});
} else {
fix_results.push(AiFixResult {
path: result.path.clone(),
before_grade: (original_audit.grade_level * 10.0)
.round()
/ 10.0,
after_grade: (refined_audit.grade_level * 10.0).round()
/ 10.0,
improved: false,
action: "no-improvement".to_string(),
});
}
} else {
fix_results.push(AiFixResult {
path: result.path.clone(),
before_grade: result.grade_level,
after_grade: result.grade_level,
improved: false,
action: "skipped".to_string(),
});
}
}
let total_fixed = fix_results.iter().filter(|r| r.improved).count();
Ok(AiFixReport {
total_audited: report.total_files,
total_failing: failing.len(),
total_fixed,
results: fix_results,
})
}
}
fn split_frontmatter(content: &str) -> (String, String) {
let trimmed = content.trim_start();
let leading_ws = &content[..content.len() - trimmed.len()];
for delim in ["---", "+++"] {
if let Some(rest) = trimmed.strip_prefix(delim) {
if let Some(end) = rest.find(delim) {
let fm_end = delim.len() + end + delim.len();
let frontmatter = &trimmed[..fm_end];
let body = &trimmed[fm_end..];
return (
format!("{leading_ws}{frontmatter}"),
body.to_string(),
);
}
}
}
(String::new(), content.to_string())
}
fn extract_frontmatter_lang(content: &str) -> String {
let trimmed = content.trim_start();
for delim in ["---", "+++"] {
if let Some(rest) = trimmed.strip_prefix(delim) {
if let Some(end) = rest.find(delim) {
let fm = &rest[..end];
for line in fm.lines() {
let line = line.trim();
for key in ["language:", "lang:"] {
if let Some(val) = line.strip_prefix(key) {
let val =
val.trim().trim_matches('"').trim_matches('\'');
if !val.is_empty() {
return val.to_string();
}
}
}
}
for line in fm.lines() {
let line = line.trim();
for key in ["language", "lang"] {
if line.starts_with(key) {
if let Some(val) = line.split('=').nth(1) {
let val = val
.trim()
.trim_matches('"')
.trim_matches('\'');
if !val.is_empty() {
return val.to_string();
}
}
}
}
}
}
}
}
String::new()
}
fn strip_frontmatter(content: &str) -> String {
let trimmed = content.trim_start();
for delim in ["---", "+++"] {
if let Some(rest) = trimmed.strip_prefix(delim) {
if let Some(end) = rest.find(delim) {
return rest[end + delim.len()..].to_string();
}
}
}
content.to_string()
}
impl Plugin for LlmPlugin {
fn name(&self) -> &'static str {
"llm"
}
fn after_compile(&self, ctx: &PluginContext) -> Result<()> {
if !ctx.site_dir.exists() {
return Ok(());
}
if !is_ollama_available(&self.config.endpoint) {
log::warn!(
"[llm] Ollama not reachable at {}, skipping AI augmentation",
self.config.endpoint
);
return Ok(());
}
let html_files = ctx.get_html_files();
let mut augmented = 0usize;
for path in &html_files {
let html = fs::read_to_string(path)?;
let mut modified = html.clone();
if needs_meta_description(&modified) {
if let Some(desc) = generate_meta_description(
&modified,
&self.config.model,
&self.config.endpoint,
self.config.target_grade,
self.config.max_refinement_attempts,
) {
let audit = ReadabilityAudit::analyze(&desc);
if self.config.dry_run {
let rel = path
.strip_prefix(&ctx.site_dir)
.unwrap_or(path)
.display();
log::info!(
"[llm] [dry-run] {rel}: description = {desc}"
);
log::info!(
"[llm] [dry-run] {rel}: grade={:.1}, ease={:.1}, avg_sentence={:.1}",
audit.grade_level, audit.reading_ease, audit.avg_sentence_len
);
} else {
modified = inject_meta_description(&modified, &desc);
modified = inject_jsonld_description(&modified, &desc);
}
}
}
let alt_count = generate_missing_alt_text(
&mut modified,
&self.config.model,
&self.config.endpoint,
self.config.dry_run,
path,
&ctx.site_dir,
);
if !self.config.dry_run && modified != html {
fs::write(path, &modified)?;
augmented += 1;
}
if alt_count > 0 {
augmented += 1;
}
}
if augmented > 0 {
log::info!(
"[llm] Augmented {augmented} page(s) with model '{}'",
self.config.model
);
}
Ok(())
}
}
fn is_ollama_available(endpoint: &str) -> bool {
Command::new("curl")
.args(["-sf", "--max-time", "2", endpoint])
.output()
.is_ok_and(|o| o.status.success())
}
fn needs_meta_description(html: &str) -> bool {
if let Some(start) = html.find("name=\"description\"") {
if let Some(content_start) = html[start..].find("content=\"") {
let abs = start + content_start + 9;
if let Some(end) = html[abs..].find('"') {
let desc = &html[abs..abs + end];
return desc.len() < 50;
}
}
}
!html.contains("name=\"description\"")
}
fn generate_meta_description(
html: &str,
model: &str,
endpoint: &str,
target_grade: f64,
max_attempts: usize,
) -> Option<String> {
let text = extract_page_text(html, 500);
if text.len() < 20 {
return None;
}
let prompt = format!(
"Write a concise SEO meta description (120-155 characters) for this page content. \
Use simple words and short sentences. \
Return ONLY the description text, no quotes or explanation:\n\n{text}"
);
generate_with_refinement(
endpoint,
model,
&prompt,
target_grade,
max_attempts,
)
}
fn inject_meta_description(html: &str, description: &str) -> String {
let escaped = description
.replace('&', "&")
.replace('"', """)
.replace('<', "<");
let tag = format!("<meta name=\"description\" content=\"{escaped}\">\n");
if let Some(pos) = html.find("</head>") {
let mut result = html.to_string();
result.insert_str(pos, &tag);
result
} else {
html.to_string()
}
}
fn generate_missing_alt_text(
html: &mut String,
model: &str,
endpoint: &str,
dry_run: bool,
path: &Path,
site_dir: &Path,
) -> usize {
let mut count = 0;
let mut search_from = 0;
while let Some(start) = html[search_from..].find("<img") {
let abs_start = search_from + start;
let Some(tag_end) = html[abs_start..].find('>') else {
break;
};
let tag_end_abs = abs_start + tag_end + 1;
let tag = &html[abs_start..tag_end_abs];
if !tag.contains("alt=") || tag.contains("alt=\"\"") {
let src = extract_attr(tag, "src").unwrap_or_default();
let prompt = format!(
"Describe this image for an alt text attribute. The image file is named '{}'. \
Return ONLY the alt text (max 125 characters), no quotes:\n",
src
);
if let Some(alt) = call_ollama(endpoint, model, &prompt) {
let alt = alt.trim().replace('"', """);
if dry_run {
let rel =
path.strip_prefix(site_dir).unwrap_or(path).display();
log::info!(
"[llm] [dry-run] {rel}: alt=\"{alt}\" for {src}"
);
} else {
let new_tag = if tag.contains("alt=\"\"") {
tag.replace("alt=\"\"", &format!("alt=\"{alt}\""))
} else {
tag.replace("<img", &format!("<img alt=\"{alt}\""))
};
html.replace_range(abs_start..tag_end_abs, &new_tag);
}
count += 1;
}
}
search_from = tag_end_abs;
}
count
}
fn extract_page_text(html: &str, max_chars: usize) -> String {
let body_start = html
.find("<main")
.or_else(|| html.find("<body"))
.unwrap_or(0);
let body = &html[body_start..];
let mut text = String::with_capacity(max_chars + 50);
let mut in_tag = false;
for ch in body.chars() {
if text.len() >= max_chars {
break;
}
match ch {
'<' => in_tag = true,
'>' => in_tag = false,
_ if !in_tag && !ch.is_control() => text.push(ch),
_ => {}
}
}
text.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn extract_attr(tag: &str, attr: &str) -> Option<String> {
let pattern = format!("{attr}=\"");
let start = tag.find(&pattern)? + pattern.len();
let end = tag[start..].find('"')? + start;
Some(tag[start..end].to_string())
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ReadabilityFormula {
FleschKincaid,
KandelMoles,
WienerSachtextformel,
Gulpease,
Lix,
FernandezHuerta,
}
impl ReadabilityFormula {
#[must_use]
pub fn from_lang(lang: &str) -> Option<Self> {
let primary = lang.split(['-', '_']).next().unwrap_or(lang);
match primary.to_lowercase().as_str() {
"en" => Some(Self::FleschKincaid),
"fr" => Some(Self::KandelMoles),
"de" => Some(Self::WienerSachtextformel),
"it" => Some(Self::Gulpease),
"sv" | "nb" | "nn" | "da" | "no" => Some(Self::Lix),
"es" => Some(Self::FernandezHuerta),
_ => None,
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct ReadabilityAudit {
pub grade_level: f64,
pub reading_ease: f64,
pub avg_sentence_len: f64,
}
impl ReadabilityAudit {
#[must_use]
pub fn analyze(text: &str) -> Self {
let words = count_words(text);
let sentences = count_sentences(text);
let syllables = count_syllables(text);
if words == 0 || sentences == 0 {
return Self {
grade_level: 0.0,
reading_ease: 100.0,
avg_sentence_len: 0.0,
};
}
let wps = words as f64 / sentences as f64;
let spw = syllables as f64 / words as f64;
let grade = 0.39f64.mul_add(wps, 11.8f64.mul_add(spw, -15.59));
let ease = (-1.015f64).mul_add(wps, (-84.6f64).mul_add(spw, 206.835));
Self {
grade_level: grade.max(0.0),
reading_ease: ease.clamp(0.0, 100.0),
avg_sentence_len: wps,
}
}
#[must_use]
pub fn analyze_with_lang(text: &str, lang: &str) -> Self {
let formula = if lang.is_empty() {
ReadabilityFormula::FleschKincaid
} else {
ReadabilityFormula::from_lang(lang)
.unwrap_or(ReadabilityFormula::FleschKincaid)
};
let words = count_words(text);
let sentences = count_sentences(text);
let syllables = count_syllables(text);
let chars: usize = text.chars().filter(|c| c.is_alphanumeric()).count();
if words == 0 || sentences == 0 {
return Self {
grade_level: 0.0,
reading_ease: 100.0,
avg_sentence_len: 0.0,
};
}
let wps = words as f64 / sentences as f64;
let spw = syllables as f64 / words as f64;
match formula {
ReadabilityFormula::FleschKincaid => Self::analyze(text),
ReadabilityFormula::KandelMoles => {
let ease = 68.0f64.mul_add(-spw, 1.15f64.mul_add(-wps, 209.0));
Self {
grade_level: ((100.0 - ease.clamp(0.0, 100.0)) / 10.0)
.max(0.0),
reading_ease: ease.clamp(0.0, 100.0),
avg_sentence_len: wps,
}
}
ReadabilityFormula::WienerSachtextformel => {
let word_list: Vec<&str> = text.split_whitespace().collect();
let total = word_list.len().max(1) as f64;
let pct_3plus_syl = word_list
.iter()
.filter(|w| count_word_syllables(w) >= 3)
.count() as f64
/ total
* 100.0;
let pct_6plus_char = word_list
.iter()
.filter(|w| {
w.chars().filter(|c| c.is_alphabetic()).count() > 6
})
.count() as f64
/ total
* 100.0;
let pct_1syl = word_list
.iter()
.filter(|w| count_word_syllables(w) == 1)
.count() as f64
/ total
* 100.0;
let grade = 0.1935f64.mul_add(
pct_3plus_syl,
0.1672f64.mul_add(
wps,
(-0.1297f64).mul_add(
pct_6plus_char,
(-0.0327f64).mul_add(pct_1syl, -0.875),
),
),
);
Self {
grade_level: grade.max(0.0),
reading_ease: grade
.clamp(0.0, 20.0)
.mul_add(-5.0, 100.0)
.clamp(0.0, 100.0),
avg_sentence_len: wps,
}
}
ReadabilityFormula::Gulpease => {
let ease = 89.0
+ 10.0f64
.mul_add(-(chars as f64), 300.0 * sentences as f64)
/ words as f64;
Self {
grade_level: ((100.0 - ease.clamp(0.0, 100.0)) / 10.0)
.max(0.0),
reading_ease: ease.clamp(0.0, 100.0),
avg_sentence_len: wps,
}
}
ReadabilityFormula::Lix => {
let word_list: Vec<&str> = text.split_whitespace().collect();
let total = word_list.len().max(1) as f64;
let long_words = word_list
.iter()
.filter(|w| {
w.chars().filter(|c| c.is_alphabetic()).count() > 6
})
.count() as f64;
let lix = wps + 100.0 * long_words / total;
Self {
grade_level: (lix / 5.0).max(0.0),
reading_ease: (100.0 - lix).clamp(0.0, 100.0),
avg_sentence_len: wps,
}
}
ReadabilityFormula::FernandezHuerta => {
let ease =
1.02f64.mul_add(-wps, (-60.0f64).mul_add(spw, 206.84));
Self {
grade_level: ((100.0 - ease.clamp(0.0, 100.0)) / 10.0)
.max(0.0),
reading_ease: ease.clamp(0.0, 100.0),
avg_sentence_len: wps,
}
}
}
}
}
fn count_words(text: &str) -> usize {
text.split_whitespace().count()
}
fn count_sentences(text: &str) -> usize {
text.chars()
.filter(|&c| c == '.' || c == '!' || c == '?')
.count()
.max(1)
}
fn count_syllables(text: &str) -> usize {
text.split_whitespace()
.map(|word| count_word_syllables(word))
.sum()
}
fn count_word_syllables(word: &str) -> usize {
let word = word.to_lowercase();
let chars: Vec<char> = word.chars().filter(|c| c.is_alphabetic()).collect();
if chars.is_empty() {
return 1;
}
let vowels = b"aeiouy";
let mut count = 0usize;
let mut prev_vowel = false;
for &ch in &chars {
let is_vowel = vowels.contains(&(ch as u8));
if is_vowel && !prev_vowel {
count += 1;
}
prev_vowel = is_vowel;
}
if chars.len() > 2 && chars.last() == Some(&'e') && count > 1 {
count -= 1;
}
count.max(1)
}
fn generate_with_refinement(
endpoint: &str,
model: &str,
prompt: &str,
target_grade: f64,
max_attempts: usize,
) -> Option<String> {
let mut text = call_ollama(endpoint, model, prompt)?;
let mut audit = ReadabilityAudit::analyze(&text);
for attempt in 0..max_attempts {
if audit.grade_level <= target_grade {
break;
}
log::info!(
"[llm] Grade {:.1} exceeds target {:.1}, refining (attempt {})",
audit.grade_level,
target_grade,
attempt + 1
);
let simplify_prompt = format!(
"Rewrite this text at a 6th-grade reading level. \
Use short sentences (max 20 words). Use simple words. \
Keep all facts and numbers exactly the same. \
Return ONLY the rewritten text:\n\n{text}"
);
if let Some(refined) = call_ollama(endpoint, model, &simplify_prompt) {
let refined_audit = ReadabilityAudit::analyze(&refined);
if refined_audit.grade_level < audit.grade_level {
text = refined;
audit = refined_audit;
}
}
}
Some(text)
}
fn inject_jsonld_description(html: &str, description: &str) -> String {
if html.contains("\"@type\":\"Article\"")
&& html.contains("\"description\"")
{
return html.to_string();
}
let jsonld = serde_json::json!({
"@context": "https://schema.org",
"@type": "Article",
"description": description,
});
let script =
format!("<script type=\"application/ld+json\">{}</script>\n", jsonld);
if let Some(pos) = html.find("</head>") {
let mut result = html.to_string();
result.insert_str(pos, &script);
result
} else {
html.to_string()
}
}
fn call_ollama(endpoint: &str, model: &str, prompt: &str) -> Option<String> {
let url = format!("{}/api/generate", endpoint.trim_end_matches('/'));
let payload = serde_json::json!({
"model": model,
"prompt": prompt,
"stream": false,
});
let output = Command::new("curl")
.args([
"-sf",
"--max-time",
"30",
"-X",
"POST",
&url,
"-H",
"Content-Type: application/json",
"-d",
&payload.to_string(),
])
.output()
.ok()?;
if !output.status.success() {
return None;
}
let response: serde_json::Value =
serde_json::from_slice(&output.stdout).ok()?;
response
.get("response")
.and_then(|v| v.as_str())
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
#[test]
fn needs_meta_description_missing() {
assert!(needs_meta_description("<html><head></head></html>"));
}
#[test]
fn needs_meta_description_short() {
let html = r#"<html><head><meta name="description" content="Short"></head></html>"#;
assert!(needs_meta_description(html));
}
#[test]
fn needs_meta_description_adequate() {
let html = r#"<html><head><meta name="description" content="This is a sufficiently long meta description that exceeds fifty characters easily"></head></html>"#;
assert!(!needs_meta_description(html));
}
#[test]
fn inject_meta_description_into_head() {
let html = "<html><head><title>T</title></head><body></body></html>";
let result = inject_meta_description(html, "Test description");
assert!(result.contains("name=\"description\""));
assert!(result.contains("Test description"));
}
#[test]
fn extract_attr_basic() {
assert_eq!(
extract_attr(r#"<img src="photo.jpg" alt="x">"#, "src"),
Some("photo.jpg".to_string())
);
}
#[test]
fn extract_attr_missing() {
assert_eq!(extract_attr(r#"<img src="x.jpg">"#, "alt"), None);
}
#[test]
fn extract_page_text_strips_tags() {
let html = "<body><p>Hello <b>world</b></p></body>";
let text = extract_page_text(html, 100);
assert_eq!(text, "Hello world");
}
#[test]
fn llm_plugin_name() {
let plugin = LlmPlugin::new(LlmConfig::default());
assert_eq!(plugin.name(), "llm");
}
#[test]
fn flesch_kincaid_simple_text() {
let audit = ReadabilityAudit::analyze("The cat sat on the mat.");
assert!(
audit.grade_level < 4.0,
"Simple text should be below grade 4, got {:.1}",
audit.grade_level
);
assert!(audit.reading_ease > 80.0);
}
#[test]
fn flesch_kincaid_complex_text() {
let text = "The implementation of sophisticated cryptographic \
algorithms necessitates comprehensive understanding \
of mathematical foundations. Asymmetric encryption \
protocols demonstrate considerable computational \
overhead compared to symmetric alternatives.";
let audit = ReadabilityAudit::analyze(text);
assert!(
audit.grade_level > 12.0,
"Complex text should be above grade 12, got {:.1}",
audit.grade_level
);
}
#[test]
fn flesch_kincaid_empty_text() {
let audit = ReadabilityAudit::analyze("");
assert!(audit.grade_level.abs() < f64::EPSILON);
assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
}
#[test]
fn syllable_count_known_words() {
assert_eq!(count_word_syllables("cat"), 1);
assert_eq!(count_word_syllables("hello"), 2);
assert_eq!(count_word_syllables("beautiful"), 3);
assert_eq!(count_word_syllables("implementation"), 5);
}
#[test]
fn count_sentences_basic() {
assert_eq!(count_sentences("Hello. World!"), 2);
assert_eq!(count_sentences("One sentence"), 1); assert_eq!(count_sentences("A? B? C!"), 3);
}
#[test]
fn inject_jsonld_adds_article_block() {
let html = "<html><head><title>T</title></head><body></body></html>";
let result = inject_jsonld_description(html, "Test desc");
assert!(result.contains("application/ld+json"));
assert!(result.contains("\"@type\":\"Article\""));
assert!(result.contains("Test desc"));
}
#[test]
fn inject_jsonld_skips_existing() {
let html = r#"<html><head><script type="application/ld+json">{"@type":"Article","description":"Existing"}</script></head></html>"#;
let result = inject_jsonld_description(html, "New desc");
assert!(!result.contains("New desc"));
assert!(result.contains("Existing"));
}
#[test]
fn audit_all_scans_markdown_files() {
let dir = tempfile::tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
fs::write(
content.join("simple.md"),
"---\ntitle: Simple\n---\nThe cat sat on the mat. It was a good day.",
)
.unwrap();
fs::write(
content.join("complex.md"),
"---\ntitle: Complex\n---\n\
The implementation of sophisticated cryptographic algorithms \
necessitates comprehensive understanding of mathematical \
foundations and computational complexity theory.",
)
.unwrap();
let report = LlmPlugin::audit_all(&content, 8.0).unwrap();
assert_eq!(report.total_files, 2);
assert!(report.failing > 0, "complex.md should fail grade 8");
}
#[test]
fn audit_all_empty_dir() {
let dir = tempfile::tempdir().unwrap();
let content = dir.path().join("empty");
fs::create_dir_all(&content).unwrap();
let report = LlmPlugin::audit_all(&content, 8.0).unwrap();
assert_eq!(report.total_files, 0);
assert_eq!(report.failing, 0);
}
#[test]
fn strip_frontmatter_yaml() {
let input = "---\ntitle: Hello\n---\nBody text here.";
let body = strip_frontmatter(input);
assert!(body.contains("Body text here"));
assert!(!body.contains("title:"));
}
#[test]
fn strip_frontmatter_toml() {
let input = "+++\ntitle = \"Hello\"\n+++\nBody text here.";
let body = strip_frontmatter(input);
assert!(body.contains("Body text here"));
assert!(!body.contains("title"));
}
#[test]
fn strip_frontmatter_none() {
let input = "Just plain content.";
assert_eq!(strip_frontmatter(input), input);
}
#[test]
fn split_frontmatter_preserves_delimiters() {
let input = "---\ntitle: Hello\ndate: 2026-01-01\n---\n\n# Body text";
let (fm, body) = split_frontmatter(input);
assert!(fm.starts_with("---"));
assert!(fm.ends_with("---"));
assert!(fm.contains("title: Hello"));
assert!(body.contains("# Body text"));
}
#[test]
fn split_frontmatter_toml_preserves() {
let input = "+++\ntitle = \"Hello\"\n+++\nBody.";
let (fm, body) = split_frontmatter(input);
assert!(fm.starts_with("+++"));
assert!(body.contains("Body."));
}
#[test]
fn split_frontmatter_no_frontmatter() {
let input = "Just plain content.";
let (fm, body) = split_frontmatter(input);
assert!(fm.is_empty());
assert_eq!(body, input);
}
#[test]
fn audit_and_fix_skips_when_ollama_unavailable() {
let dir = tempfile::tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
fs::write(content.join("test.md"), "---\ntitle: T\n---\nSimple text.")
.unwrap();
let config = LlmConfig {
endpoint: "http://localhost:99999".to_string(),
..LlmConfig::default()
};
let result = LlmPlugin::audit_and_fix(&content, &config).unwrap();
assert_eq!(result, 0);
}
#[test]
fn full_repo_readability_audit() {
let dirs = [
("docs/guide", 15.0),
("examples/basic/content", 10.0),
("examples/blog/content", 10.0),
("examples/docs/content", 13.0),
("examples/landing/content", 10.0),
("examples/plugins/content", 10.0),
("examples/portfolio/content", 10.0),
("examples/quickstart/content", 10.0),
("examples/content/en", 10.0),
];
let mut total_files = 0usize;
let mut total_pass = 0usize;
let mut total_fail = 0usize;
println!("\n{}", "=".repeat(60));
println!(" FULL REPOSITORY READABILITY AUDIT");
println!("{}\n", "=".repeat(60));
for (dir, target) in &dirs {
let path = Path::new(dir);
if !path.exists() {
continue;
}
let report = LlmPlugin::audit_all(path, *target).unwrap();
if report.total_files == 0 {
continue;
}
println!("── {dir} (target: grade {target:.0}) ��─");
for r in &report.results {
let status = if r.passes { "PASS" } else { "FAIL" };
println!(
" {:.<40} grade {:>5.1} ease {:>5.1} [{status}]",
r.path, r.grade_level, r.reading_ease
);
}
println!(" → {}/{} pass\n", report.passing, report.total_files);
total_files += report.total_files;
total_pass += report.passing;
total_fail += report.failing;
}
println!("{}", "=".repeat(60));
println!(
" TOTAL: {total_files} files — {total_pass} pass, {total_fail} fail"
);
println!("{}\n", "=".repeat(60));
}
#[test]
fn audit_docs_guide() {
let guide_dir = Path::new("docs/guide");
if !guide_dir.exists() {
return; }
let report = LlmPlugin::audit_all(guide_dir, 17.0).unwrap();
for result in &report.results {
let status = if result.passes { "PASS" } else { "FAIL" };
println!(
"[readability] {}: grade={:.1}, ease={:.1}, avg_sentence={:.1} — {}",
result.path,
result.grade_level,
result.reading_ease,
result.avg_sentence_len,
status
);
}
println!(
"\n[readability] {}/{} files pass (target: grade {:.0})",
report.passing, report.total_files, report.target_grade
);
}
#[test]
fn is_ollama_available_unreachable() {
assert!(!is_ollama_available("http://localhost:99999"));
}
#[test]
fn call_ollama_unreachable_returns_none() {
assert!(call_ollama("http://localhost:99999", "llama3", "hi").is_none());
}
#[test]
fn needs_meta_description_with_content_attr_first() {
let html = r#"<meta content="Decent length description that is more than fifty characters long enough" name="description">"#;
assert!(!needs_meta_description(html));
}
#[test]
fn inject_meta_description_no_head() {
let html = "<html><body>No head tag</body></html>";
let result = inject_meta_description(html, "desc");
assert_eq!(result, html); }
#[test]
fn inject_jsonld_no_head() {
let html = "<html><body>No head</body></html>";
let result = inject_jsonld_description(html, "desc");
assert_eq!(result, html);
}
#[test]
fn extract_page_text_no_body() {
let html = "just plain text no tags";
let text = extract_page_text(html, 100);
assert_eq!(text, "just plain text no tags");
}
#[test]
fn extract_page_text_truncates() {
let html = "<body><p>word </p></body>";
let text = extract_page_text(html, 3);
assert!(text.len() <= 5);
}
#[test]
fn generate_missing_alt_text_no_images() {
let mut html = "<html><body><p>No images</p></body></html>".to_string();
let count = generate_missing_alt_text(
&mut html,
"llama3",
"http://localhost:99999",
true,
Path::new("test.html"),
Path::new("."),
);
assert_eq!(count, 0);
}
#[test]
fn readability_audit_single_word() {
let audit = ReadabilityAudit::analyze("Hello");
assert!(audit.grade_level >= 0.0);
assert!(audit.avg_sentence_len >= 0.0);
}
#[test]
fn count_word_syllables_empty() {
assert_eq!(count_word_syllables(""), 1);
}
#[test]
fn count_word_syllables_numbers() {
assert_eq!(count_word_syllables("123"), 1);
}
#[test]
fn split_frontmatter_unclosed() {
let input = "---\ntitle: Hello\nNo closing delimiter";
let (fm, body) = split_frontmatter(input);
assert!(fm.is_empty());
assert_eq!(body, input);
}
#[test]
fn llm_plugin_skips_missing_site_dir() {
let plugin = LlmPlugin::new(LlmConfig::default());
let ctx = PluginContext::new(
Path::new("/tmp/c"),
Path::new("/tmp/b"),
Path::new("/nonexistent/site"),
Path::new("/tmp/t"),
);
assert!(plugin.after_compile(&ctx).is_ok());
}
#[test]
fn config_defaults_readability() {
let config = LlmConfig::default();
assert!((config.target_grade - 8.0).abs() < f64::EPSILON);
assert_eq!(config.max_refinement_attempts, 1);
}
#[test]
fn llm_plugin_skips_when_ollama_unavailable() {
let plugin = LlmPlugin::new(LlmConfig {
endpoint: "http://localhost:99999".to_string(),
..LlmConfig::default()
});
let dir = tempfile::tempdir().unwrap();
let site = dir.path().join("site");
fs::create_dir_all(&site).unwrap();
fs::write(site.join("index.html"), "<html><body></body></html>")
.unwrap();
let ctx = PluginContext::new(dir.path(), dir.path(), &site, dir.path());
plugin.after_compile(&ctx).unwrap();
}
#[test]
fn ai_fix_report_serializes_to_json() {
let report = AiFixReport {
total_audited: 10,
total_failing: 3,
total_fixed: 2,
results: vec![
AiFixResult {
path: "docs/guide.md".to_string(),
before_grade: 12.5,
after_grade: 7.2,
improved: true,
action: "rewritten".to_string(),
},
AiFixResult {
path: "docs/api.md".to_string(),
before_grade: 14.0,
after_grade: 13.8,
improved: false,
action: "no-improvement".to_string(),
},
],
};
let json = serde_json::to_string(&report).unwrap();
assert!(json.contains("\"total_fixed\":2"));
assert!(json.contains("\"action\":\"rewritten\""));
}
#[test]
fn ai_fix_report_skips_when_ollama_unavailable() {
let dir = tempfile::tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
fs::write(
content.join("test.md"),
"---\ntitle: T\n---\nThe implementation of sophisticated algorithms.",
)
.unwrap();
let config = LlmConfig {
endpoint: "http://localhost:99999".to_string(),
max_refinement_attempts: 3,
..LlmConfig::default()
};
let report =
LlmPlugin::audit_and_fix_with_report(&content, &config).unwrap();
assert_eq!(report.total_fixed, 0);
assert!(report.results.is_empty());
}
#[test]
fn formula_from_lang_english() {
assert_eq!(
ReadabilityFormula::from_lang("en"),
Some(ReadabilityFormula::FleschKincaid)
);
assert_eq!(
ReadabilityFormula::from_lang("en-US"),
Some(ReadabilityFormula::FleschKincaid)
);
}
#[test]
fn formula_from_lang_french() {
assert_eq!(
ReadabilityFormula::from_lang("fr"),
Some(ReadabilityFormula::KandelMoles)
);
assert_eq!(
ReadabilityFormula::from_lang("fr-CA"),
Some(ReadabilityFormula::KandelMoles)
);
}
#[test]
fn formula_from_lang_german() {
assert_eq!(
ReadabilityFormula::from_lang("de"),
Some(ReadabilityFormula::WienerSachtextformel)
);
assert_eq!(
ReadabilityFormula::from_lang("de-AT"),
Some(ReadabilityFormula::WienerSachtextformel)
);
}
#[test]
fn formula_from_lang_italian() {
assert_eq!(
ReadabilityFormula::from_lang("it"),
Some(ReadabilityFormula::Gulpease)
);
}
#[test]
fn formula_from_lang_swedish() {
assert_eq!(
ReadabilityFormula::from_lang("sv"),
Some(ReadabilityFormula::Lix)
);
assert_eq!(
ReadabilityFormula::from_lang("nb"),
Some(ReadabilityFormula::Lix)
);
assert_eq!(
ReadabilityFormula::from_lang("da"),
Some(ReadabilityFormula::Lix)
);
}
#[test]
fn formula_from_lang_spanish() {
assert_eq!(
ReadabilityFormula::from_lang("es"),
Some(ReadabilityFormula::FernandezHuerta)
);
}
#[test]
fn formula_from_lang_unsupported() {
assert_eq!(ReadabilityFormula::from_lang("ja"), None);
assert_eq!(ReadabilityFormula::from_lang("zh"), None);
assert_eq!(ReadabilityFormula::from_lang("ar"), None);
}
#[test]
fn kandel_moles_simple_french() {
let text = "Le chat est sur le tapis. Il fait beau. Le soleil brille.";
let audit = ReadabilityAudit::analyze_with_lang(text, "fr");
assert!(
audit.reading_ease > 50.0,
"Simple French should be readable, got {:.1}",
audit.reading_ease
);
}
#[test]
fn wiener_simple_german() {
let text = "Die Katze sitzt auf der Matte. Es ist ein guter Tag. Die Sonne scheint.";
let audit = ReadabilityAudit::analyze_with_lang(text, "de");
assert!(
audit.grade_level < 15.0,
"Simple German got grade {:.1}",
audit.grade_level
);
}
#[test]
fn gulpease_simple_italian() {
let text = "Il gatto si siede sul tappeto. Il sole splende. Oggi è una bella giornata.";
let audit = ReadabilityAudit::analyze_with_lang(text, "it");
assert!(
audit.reading_ease > 40.0,
"Simple Italian got ease {:.1}",
audit.reading_ease
);
}
#[test]
fn lix_simple_swedish() {
let text = "Katten sitter på mattan. Solen skiner. Det är en fin dag.";
let audit = ReadabilityAudit::analyze_with_lang(text, "sv");
assert!(audit.grade_level >= 0.0);
assert!(audit.reading_ease > 0.0);
}
#[test]
fn fernandez_huerta_simple_spanish() {
let text = "El gato está en la mesa. El sol brilla. Es un buen día.";
let audit = ReadabilityAudit::analyze_with_lang(text, "es");
assert!(
audit.reading_ease > 50.0,
"Simple Spanish got ease {:.1}",
audit.reading_ease
);
}
#[test]
fn analyze_with_lang_empty_defaults_to_english() {
let text = "The cat sat on the mat.";
let a = ReadabilityAudit::analyze(text);
let b = ReadabilityAudit::analyze_with_lang(text, "");
assert!((a.grade_level - b.grade_level).abs() < f64::EPSILON);
}
#[test]
fn analyze_with_lang_unsupported_falls_back() {
let text = "The cat sat on the mat.";
let a = ReadabilityAudit::analyze(text);
let b = ReadabilityAudit::analyze_with_lang(text, "ja");
assert!((a.grade_level - b.grade_level).abs() < f64::EPSILON);
}
#[test]
fn extract_frontmatter_lang_yaml() {
let content = "---\ntitle: Hello\nlanguage: fr\n---\nBody.";
assert_eq!(extract_frontmatter_lang(content), "fr");
}
#[test]
fn extract_frontmatter_lang_yaml_short() {
let content = "---\ntitle: Hello\nlang: de\n---\nBody.";
assert_eq!(extract_frontmatter_lang(content), "de");
}
#[test]
fn extract_frontmatter_lang_toml() {
let content = "+++\ntitle = \"Hello\"\nlanguage = \"it\"\n+++\nBody.";
assert_eq!(extract_frontmatter_lang(content), "it");
}
#[test]
fn extract_frontmatter_lang_missing() {
let content = "---\ntitle: Hello\n---\nBody.";
assert_eq!(extract_frontmatter_lang(content), "");
}
#[test]
fn extract_frontmatter_lang_no_frontmatter() {
let content = "Just plain text.";
assert_eq!(extract_frontmatter_lang(content), "");
}
#[test]
fn audit_all_respects_language() {
let dir = tempfile::tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
fs::write(
content.join("french.md"),
"---\ntitle: Bonjour\nlanguage: fr\n---\nLe chat est sur le tapis. Il fait beau.",
)
.unwrap();
let report = LlmPlugin::audit_all(&content, 8.0).unwrap();
assert_eq!(report.total_files, 1);
}
#[test]
fn kandel_moles_empty_text() {
let audit = ReadabilityAudit::analyze_with_lang("", "fr");
assert!(audit.grade_level.abs() < f64::EPSILON);
assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
assert!(audit.avg_sentence_len.abs() < f64::EPSILON);
}
#[test]
fn wiener_empty_text() {
let audit = ReadabilityAudit::analyze_with_lang("", "de");
assert!(audit.grade_level.abs() < f64::EPSILON);
assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
}
#[test]
fn gulpease_empty_text() {
let audit = ReadabilityAudit::analyze_with_lang("", "it");
assert!(audit.grade_level.abs() < f64::EPSILON);
assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
}
#[test]
fn lix_empty_text() {
let audit = ReadabilityAudit::analyze_with_lang("", "sv");
assert!(audit.grade_level.abs() < f64::EPSILON);
assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
}
#[test]
fn fernandez_huerta_empty_text() {
let audit = ReadabilityAudit::analyze_with_lang("", "es");
assert!(audit.grade_level.abs() < f64::EPSILON);
assert!((audit.reading_ease - 100.0).abs() < f64::EPSILON);
}
#[test]
fn kandel_moles_single_word() {
let audit = ReadabilityAudit::analyze_with_lang("Bonjour", "fr");
assert!(audit.grade_level >= 0.0);
assert!(audit.reading_ease >= 0.0);
assert!(audit.avg_sentence_len >= 1.0);
}
#[test]
fn wiener_single_word() {
let audit = ReadabilityAudit::analyze_with_lang("Hallo", "de");
assert!(audit.grade_level >= 0.0);
assert!(audit.avg_sentence_len >= 1.0);
}
#[test]
fn gulpease_single_word() {
let audit = ReadabilityAudit::analyze_with_lang("Ciao", "it");
assert!(audit.grade_level >= 0.0);
assert!(audit.avg_sentence_len >= 1.0);
}
#[test]
fn lix_single_word() {
let audit = ReadabilityAudit::analyze_with_lang("Hej", "sv");
assert!(audit.grade_level >= 0.0);
}
#[test]
fn fernandez_huerta_single_word() {
let audit = ReadabilityAudit::analyze_with_lang("Hola", "es");
assert!(audit.grade_level >= 0.0);
}
#[test]
fn kandel_moles_long_text() {
let text = "Le développement de nouvelles infrastructures \
technologiques nécessite une compréhension \
approfondie des systèmes complexes. \
Les algorithmes sophistiqués démontrent \
une efficacité considérable. \
La modernisation progressive des architectures \
informatiques représente un défi majeur.";
let audit = ReadabilityAudit::analyze_with_lang(text, "fr");
assert!(audit.grade_level > 0.0);
assert!(audit.reading_ease >= 0.0);
assert!(audit.avg_sentence_len > 1.0);
}
#[test]
fn wiener_long_text() {
let text = "Die Implementierung fortschrittlicher kryptografischer \
Algorithmen erfordert umfassendes Verständnis \
mathematischer Grundlagen. Asymmetrische \
Verschlüsselungsprotokolle weisen erheblichen \
Rechenaufwand auf. Die systematische Optimierung \
komplexer Datenstrukturen bleibt herausfordernd.";
let audit = ReadabilityAudit::analyze_with_lang(text, "de");
assert!(audit.grade_level > 0.0);
assert!(audit.avg_sentence_len > 1.0);
}
#[test]
fn gulpease_long_text() {
let text = "L'implementazione di algoritmi crittografici sofisticati \
richiede una comprensione approfondita dei fondamenti \
matematici. I protocolli di crittografia asimmetrica \
dimostrano un considerevole sovraccarico computazionale. \
L'ottimizzazione sistematica delle strutture dati \
complesse rimane impegnativa.";
let audit = ReadabilityAudit::analyze_with_lang(text, "it");
assert!(audit.grade_level > 0.0);
assert!(audit.avg_sentence_len > 1.0);
}
#[test]
fn lix_long_text() {
let text = "Implementeringen av avancerade kryptografiska algoritmer \
kräver omfattande förståelse av matematiska grunder. \
Asymmetriska krypteringsprotokoll uppvisar betydande \
beräkningsbelastning. Systematisk optimering av komplexa \
datastrukturer förblir utmanande.";
let audit = ReadabilityAudit::analyze_with_lang(text, "sv");
assert!(audit.grade_level > 0.0);
assert!(audit.avg_sentence_len > 1.0);
}
#[test]
fn fernandez_huerta_long_text() {
let text =
"La implementación de algoritmos criptográficos sofisticados \
requiere una comprensión profunda de los fundamentos \
matemáticos. Los protocolos de cifrado asimétrico \
demuestran una considerable sobrecarga computacional. \
La optimización sistemática de estructuras de datos \
complejas sigue siendo un desafío.";
let audit = ReadabilityAudit::analyze_with_lang(text, "es");
assert!(audit.grade_level > 0.0);
assert!(audit.avg_sentence_len > 1.0);
}
#[test]
fn wiener_mixed_syllable_words() {
let text = "Ich bin gut. Das Haus ist sehr interessant. \
Die Universität hat viele Studenten.";
let audit = ReadabilityAudit::analyze_with_lang(text, "de");
assert!(audit.grade_level >= 0.0);
assert!(audit.reading_ease >= 0.0);
assert!(audit.reading_ease <= 100.0);
}
#[test]
fn lix_mixed_word_lengths() {
let text = "En bok om programmering. \
Datavetenskapliga beräkningar kräver noggrannhet.";
let audit = ReadabilityAudit::analyze_with_lang(text, "sv");
assert!(audit.grade_level > 0.0);
assert!(audit.reading_ease >= 0.0);
assert!(audit.reading_ease <= 100.0);
}
#[test]
fn extract_frontmatter_lang_toml_with_quotes() {
let content =
"+++\ntitle = \"Hello\"\nlanguage = \"en-US\"\n+++\nBody.";
assert_eq!(extract_frontmatter_lang(content), "en-US");
}
#[test]
fn extract_frontmatter_lang_first_wins() {
let content = "---\nlanguage: fr\nlang: de\n---\nBody.";
assert_eq!(extract_frontmatter_lang(content), "fr");
}
#[test]
fn extract_frontmatter_lang_whitespace_around_value() {
let content = "---\nlanguage: es \n---\nBody.";
assert_eq!(extract_frontmatter_lang(content), "es");
}
#[test]
fn extract_frontmatter_lang_yaml_quoted_value() {
let content = "---\nlanguage: \"de\"\n---\nBody.";
assert_eq!(extract_frontmatter_lang(content), "de");
}
#[test]
fn extract_frontmatter_lang_single_quoted() {
let content = "---\nlanguage: 'it'\n---\nBody.";
assert_eq!(extract_frontmatter_lang(content), "it");
}
#[test]
fn extract_frontmatter_lang_empty_value() {
let content = "---\nlanguage: \n---\nBody.";
assert_eq!(extract_frontmatter_lang(content), "");
}
#[test]
fn extract_frontmatter_lang_toml_lang_key() {
let content = "+++\nlang = \"sv\"\n+++\nBody.";
assert_eq!(extract_frontmatter_lang(content), "sv");
}
#[test]
fn audit_and_fix_with_report_all_passing() {
let dir = tempfile::tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
fs::write(
content.join("simple.md"),
"---\ntitle: Simple\n---\nThe cat sat. It was good.",
)
.unwrap();
let config = LlmConfig {
endpoint: "http://localhost:99999".to_string(),
target_grade: 20.0,
..LlmConfig::default()
};
let report =
LlmPlugin::audit_and_fix_with_report(&content, &config).unwrap();
assert_eq!(report.total_fixed, 0);
}
#[test]
fn audit_and_fix_with_report_empty_dir() {
let dir = tempfile::tempdir().unwrap();
let content = dir.path().join("empty_content");
fs::create_dir_all(&content).unwrap();
let config = LlmConfig {
endpoint: "http://localhost:99999".to_string(),
..LlmConfig::default()
};
let report =
LlmPlugin::audit_and_fix_with_report(&content, &config).unwrap();
assert_eq!(report.total_audited, 0);
assert_eq!(report.total_failing, 0);
assert!(report.results.is_empty());
}
#[test]
fn audit_all_file_with_empty_body() {
let dir = tempfile::tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
fs::write(content.join("empty_body.md"), "---\ntitle: T\n---\n")
.unwrap();
let report = LlmPlugin::audit_all(&content, 8.0).unwrap();
assert_eq!(report.total_files, 1);
assert_eq!(report.passing, 1);
}
#[test]
fn needs_meta_description_no_content_attr() {
let html = r#"<meta name="description">"#;
assert!(!needs_meta_description(html));
}
#[test]
fn needs_meta_description_multiple_meta_tags() {
let html = r#"<meta name="author" content="Alice"><meta name="description" content="This is a sufficiently long description that is more than fifty characters long">"#;
assert!(!needs_meta_description(html));
}
#[test]
fn needs_meta_description_empty_content() {
let html = r#"<meta name="description" content="">"#;
assert!(needs_meta_description(html));
}
#[test]
fn inject_meta_description_escapes_ampersand() {
let html = "<html><head></head><body></body></html>";
let result = inject_meta_description(html, "Tom & Jerry");
assert!(result.contains("Tom & Jerry"));
}
#[test]
fn inject_meta_description_escapes_quotes() {
let html = "<html><head></head><body></body></html>";
let result = inject_meta_description(html, r#"A "great" page"#);
assert!(result.contains("A "great" page"));
}
#[test]
fn inject_meta_description_escapes_angle_brackets() {
let html = "<html><head></head><body></body></html>";
let result = inject_meta_description(html, "x < y");
assert!(result.contains("x < y"));
}
#[test]
fn inject_meta_description_all_special_chars() {
let html = "<html><head></head><body></body></html>";
let result = inject_meta_description(html, r#"A & B "C" <D>"#);
assert!(result.contains("A & B "C" <D>"));
}
#[test]
fn extract_page_text_with_main_tag() {
let html = "<html><body><div>ignored</div><main><p>Main content here.</p></main></body></html>";
let text = extract_page_text(html, 500);
assert!(text.contains("Main content here"));
assert!(!text.contains("ignored"));
}
#[test]
fn extract_page_text_large_truncated() {
let long_body = "word ".repeat(200);
let html = format!("<body><p>{long_body}</p></body>");
let text = extract_page_text(&html, 50);
assert!(text.len() <= 60);
}
#[test]
fn extract_page_text_strips_control_chars() {
let html = "<body>Hello\x00\x01World</body>";
let text = extract_page_text(html, 100);
assert_eq!(text, "HelloWorld");
}
#[test]
fn extract_page_text_nested_tags() {
let html = "<body><div><span>A</span> <em>B</em></div></body>";
let text = extract_page_text(html, 100);
assert!(text.contains('A'));
assert!(text.contains('B'));
}
#[test]
fn generate_missing_alt_text_empty_alt() {
let mut html =
r#"<html><body><img src="photo.jpg" alt=""></body></html>"#
.to_string();
let count = generate_missing_alt_text(
&mut html,
"llama3",
"http://localhost:99999",
false,
Path::new("test.html"),
Path::new("."),
);
assert_eq!(count, 0);
}
#[test]
fn generate_missing_alt_text_missing_closing_bracket() {
let mut html =
"<html><body><img src=\"photo.jpg\"</body></html>".to_string();
let count = generate_missing_alt_text(
&mut html,
"llama3",
"http://localhost:99999",
false,
Path::new("test.html"),
Path::new("."),
);
assert_eq!(count, 0);
}
#[test]
fn generate_missing_alt_text_mixed_images() {
let mut html = r#"<html><body>
<img src="a.jpg" alt="Good alt">
<img src="b.jpg">
<img src="c.jpg" alt="">
</body></html>"#
.to_string();
let count = generate_missing_alt_text(
&mut html,
"llama3",
"http://localhost:99999",
true,
Path::new("test.html"),
Path::new("."),
);
assert_eq!(count, 0);
}
#[test]
fn generate_missing_alt_text_with_alt_present() {
let mut html =
r#"<html><body><img src="x.jpg" alt="Has alt text"></body></html>"#
.to_string();
let count = generate_missing_alt_text(
&mut html,
"llama3",
"http://localhost:99999",
false,
Path::new("test.html"),
Path::new("."),
);
assert_eq!(count, 0);
}
#[test]
fn formula_from_lang_underscore_separator() {
assert_eq!(
ReadabilityFormula::from_lang("en_US"),
Some(ReadabilityFormula::FleschKincaid)
);
assert_eq!(
ReadabilityFormula::from_lang("de_DE"),
Some(ReadabilityFormula::WienerSachtextformel)
);
}
#[test]
fn formula_from_lang_norwegian_variants() {
assert_eq!(
ReadabilityFormula::from_lang("nn"),
Some(ReadabilityFormula::Lix)
);
assert_eq!(
ReadabilityFormula::from_lang("no"),
Some(ReadabilityFormula::Lix)
);
}
#[test]
fn llm_config_default_values() {
let config = LlmConfig::default();
assert_eq!(config.model, "llama3");
assert_eq!(config.endpoint, "http://localhost:11434");
assert!(!config.dry_run);
}
#[test]
fn llm_plugin_debug_impl() {
let plugin = LlmPlugin::new(LlmConfig::default());
let debug = format!("{plugin:?}");
assert!(debug.contains("LlmPlugin"));
assert!(debug.contains("llama3"));
}
#[test]
fn split_frontmatter_leading_whitespace() {
let input = " ---\ntitle: Hello\n---\nBody.";
let (fm, body) = split_frontmatter(input);
assert!(fm.contains("title: Hello"));
assert!(body.contains("Body."));
}
#[test]
fn split_frontmatter_toml_unclosed() {
let input = "+++\ntitle = \"Hello\"\nNo closing delimiter";
let (fm, body) = split_frontmatter(input);
assert!(fm.is_empty());
assert_eq!(body, input);
}
#[test]
fn file_audit_result_serializes() {
let result = FileAuditResult {
path: "test.md".to_string(),
grade_level: 7.5,
reading_ease: 65.0,
avg_sentence_len: 12.0,
passes: true,
};
let json = serde_json::to_string(&result).unwrap();
assert!(json.contains("\"path\":\"test.md\""));
assert!(json.contains("\"passes\":true"));
}
#[test]
fn audit_report_serializes() {
let report = AuditReport {
target_grade: 8.0,
total_files: 2,
passing: 1,
failing: 1,
results: vec![],
};
let json = serde_json::to_string(&report).unwrap();
assert!(json.contains("\"target_grade\":8.0"));
assert!(json.contains("\"total_files\":2"));
}
#[test]
fn inject_jsonld_with_special_chars() {
let html = "<html><head></head><body></body></html>";
let result = inject_jsonld_description(html, "Tom & Jerry's \"show\"");
assert!(result.contains("application/ld+json"));
assert!(result.contains("Tom & Jerry"));
}
#[test]
fn count_syllables_multiple_vowel_groups() {
assert!(count_word_syllables("beautiful") >= 2);
}
#[test]
fn count_syllables_consecutive_vowels() {
assert_eq!(count_word_syllables("queue"), 1);
}
#[test]
fn count_syllables_all_consonants() {
assert_eq!(count_word_syllables("rhythm"), 1);
}
#[test]
fn count_syllables_text_total() {
let total = count_syllables("The cat sat on the mat.");
assert!(total >= 6); }
#[test]
fn count_words_basic() {
assert_eq!(count_words("one two three"), 3);
assert_eq!(count_words(""), 0);
assert_eq!(count_words(" "), 0);
assert_eq!(count_words("single"), 1);
}
#[test]
fn readability_grade_never_negative() {
let audit = ReadabilityAudit::analyze("Hi.");
assert!(audit.grade_level >= 0.0);
assert!(audit.reading_ease >= 0.0);
assert!(audit.reading_ease <= 100.0);
}
#[test]
fn readability_ease_clamped_to_100() {
let audit = ReadabilityAudit::analyze("Go. Do. Be.");
assert!(audit.reading_ease <= 100.0);
assert!(audit.reading_ease >= 0.0);
}
}