use std::fs;
use std::path::{Path, PathBuf};
use serde::Serialize;
use crate::lint::relative_path;
use crate::sources::atomic::write_atomic;
use crate::vault::{CODE_ROOT, KNOWLEDGE_ROOT};
use crate::{ScopeIdentity, WikiError};
const AUTHORED_ROOTS: &[&str] = &[CODE_ROOT, KNOWLEDGE_ROOT];
const AUTHORED_ROOT_FILES: &[&str] = &["_index.md", "log.md"];
#[derive(Debug, Serialize)]
pub struct NormalizeReport {
pub command: &'static str,
pub scope: ScopeIdentity,
pub root: PathBuf,
pub scanned: usize,
pub changed: Vec<PathBuf>,
pub check_only: bool,
}
pub fn run(
vault_root: &Path,
scope: ScopeIdentity,
check_only: bool,
) -> Result<NormalizeReport, WikiError> {
let mut files = Vec::new();
for root_name in AUTHORED_ROOTS {
let root = vault_root.join(root_name);
if root.exists() {
collect_markdown(&root, &mut files)?;
}
}
for file_name in AUTHORED_ROOT_FILES {
let path = vault_root.join(file_name);
if path.is_file() {
files.push(path);
}
}
files.sort();
let mut changed = Vec::new();
for path in &files {
let original = fs::read_to_string(path).map_err(|source| WikiError::Io {
action: "read vault markdown for normalize",
path: Some(path.clone()),
source,
})?;
let normalized = crate::markdown::normalize(&original);
if normalized != original {
if !check_only {
write_atomic(
path,
normalized.as_bytes(),
"write normalized vault markdown",
)?;
}
changed.push(relative_path(vault_root, path));
}
}
Ok(NormalizeReport {
command: "normalize",
scope,
root: vault_root.to_path_buf(),
scanned: files.len(),
changed,
check_only,
})
}
pub fn render_text(report: &NormalizeReport) -> String {
let verb = if report.check_only {
"need normalization"
} else {
"normalized"
};
let mut text = format!(
"Wiki markdown normalize\nScope: {}\nScanned {} authored file(s); {} {}.\n",
report.scope,
report.scanned,
report.changed.len(),
verb,
);
for path in &report.changed {
text.push_str("- ");
text.push_str(&path.display().to_string());
text.push('\n');
}
text
}
pub(crate) fn check_exit_code(report: &NormalizeReport) -> u8 {
if report.check_only && !report.changed.is_empty() {
1
} else {
0
}
}
fn collect_markdown(directory: &Path, files: &mut Vec<PathBuf>) -> Result<(), WikiError> {
let entries = match fs::read_dir(directory) {
Ok(entries) => entries,
Err(error) if error.kind() == std::io::ErrorKind::NotFound => return Ok(()),
Err(source) => {
return Err(WikiError::Io {
action: "read vault directory for normalize",
path: Some(directory.to_path_buf()),
source,
});
}
};
for entry in entries {
let entry = entry.map_err(|source| WikiError::Io {
action: "read vault entry for normalize",
path: Some(directory.to_path_buf()),
source,
})?;
let path = entry.path();
let file_type = entry.file_type().map_err(|source| WikiError::Io {
action: "read vault file type for normalize",
path: Some(path.clone()),
source,
})?;
if file_type.is_dir() {
collect_markdown(&path, files)?;
} else if file_type.is_file() && is_markdown_path(&path) {
files.push(path);
}
}
Ok(())
}
fn is_markdown_path(path: &Path) -> bool {
path.extension()
.and_then(|extension| extension.to_str())
.is_some_and(|extension| {
matches!(extension.to_ascii_lowercase().as_str(), "md" | "markdown")
})
}
#[cfg(test)]
mod tests {
use super::*;
fn write(root: &Path, relative: &str, contents: &str) {
let path = root.join(relative);
std::fs::create_dir_all(path.parent().expect("parent")).expect("create parent");
std::fs::write(path, contents).expect("write fixture");
}
const DIRTY: &str = "# Title \n\n\nBody text \n## Section\nMore text.\n\n\n";
#[test]
fn normalizes_authored_docs_and_leaves_raw_captures_untouched() {
let temp = tempfile::tempdir().expect("tempdir");
let root = temp.path();
write(root, "knowledge/concepts/dirty.md", DIRTY);
write(root, "code/INDEX.md", "# Code \n\n\n");
let raw_bytes = b"# Raw capture \n\n\nverbatim \n";
let raw_path = root.join("raw/src-abc.md");
std::fs::create_dir_all(raw_path.parent().expect("raw parent")).expect("raw dir");
std::fs::write(&raw_path, raw_bytes).expect("write raw");
let report = run(root, ScopeIdentity::topic("ops"), false).expect("normalize runs");
assert!(
report
.changed
.contains(&PathBuf::from("knowledge/concepts/dirty.md")),
"authored doc should be normalized: {:?}",
report.changed
);
let fixed =
std::fs::read_to_string(root.join("knowledge/concepts/dirty.md")).expect("read fixed");
assert_eq!(fixed, crate::markdown::normalize(DIRTY));
assert!(
report.changed.contains(&PathBuf::from("code/INDEX.md")),
"code/** docs are in scope: {:?}",
report.changed
);
assert_eq!(report.scanned, 2, "raw/<id>.md must not be scanned");
assert_eq!(
std::fs::read(&raw_path).expect("read raw"),
raw_bytes,
"raw/<id>.md capture must not be rewritten"
);
assert!(
!report.changed.iter().any(|p| p.starts_with("raw")),
"raw captures must never appear in the changed set"
);
}
#[test]
fn is_idempotent() {
let temp = tempfile::tempdir().expect("tempdir");
let root = temp.path();
write(root, "knowledge/topics/page.md", DIRTY);
let first = run(root, ScopeIdentity::topic("ops"), false).expect("first pass");
assert_eq!(first.changed.len(), 1);
let second = run(root, ScopeIdentity::topic("ops"), false).expect("second pass");
assert!(
second.changed.is_empty(),
"second pass over normalized docs should report no changes: {:?}",
second.changed
);
}
#[test]
fn check_mode_reports_without_writing() {
let temp = tempfile::tempdir().expect("tempdir");
let root = temp.path();
write(root, "knowledge/topics/page.md", DIRTY);
let report = run(root, ScopeIdentity::topic("ops"), true).expect("check pass");
assert_eq!(
report.changed,
vec![PathBuf::from("knowledge/topics/page.md")]
);
assert!(report.check_only);
assert_eq!(
std::fs::read_to_string(root.join("knowledge/topics/page.md")).expect("read page"),
DIRTY
);
}
#[test]
fn normalizes_vault_root_stub_files() {
let temp = tempfile::tempdir().expect("tempdir");
let root = temp.path();
write(root, "_index.md", "# Wiki Index \n\n\n");
write(root, "log.md", "# Log\n\n\n");
let report = run(root, ScopeIdentity::topic("ops"), false).expect("normalize runs");
assert!(report.changed.contains(&PathBuf::from("_index.md")));
assert!(report.changed.contains(&PathBuf::from("log.md")));
}
fn report_with(check_only: bool, changed: Vec<PathBuf>) -> NormalizeReport {
NormalizeReport {
command: "normalize",
scope: ScopeIdentity::topic("ops"),
root: PathBuf::from("/tmp/vault"),
scanned: changed.len(),
changed,
check_only,
}
}
#[test]
fn render_text_summarizes_changed_paths() {
let report = report_with(
true,
vec![PathBuf::from("knowledge/a.md"), PathBuf::from("_index.md")],
);
let text = render_text(&report);
assert!(text.contains("2 need normalization."), "{text}");
assert!(text.contains("- knowledge/a.md"));
assert!(text.contains("- _index.md"));
}
#[test]
fn check_exit_code_gates_only_dirty_check_runs() {
assert_eq!(check_exit_code(&report_with(true, Vec::new())), 0);
assert_eq!(
check_exit_code(&report_with(true, vec![PathBuf::from("knowledge/a.md")])),
1,
);
assert_eq!(
check_exit_code(&report_with(false, vec![PathBuf::from("knowledge/a.md")])),
0,
);
}
}