use normalize_output::OutputFormatter;
use normalize_output::diagnostics::{DiagnosticsReport, Issue, Severity};
use serde::Serialize;
use std::path::Path;
static CODE_REF_RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
#[derive(Debug, Clone, Serialize, schemars::JsonSchema)]
struct BrokenRef {
file: String,
line: usize,
reference: String,
context: String,
}
#[derive(Debug, Serialize, schemars::JsonSchema)]
pub struct CheckRefsReport {
broken_refs: Vec<BrokenRef>,
files_checked: usize,
symbols_indexed: usize,
}
impl OutputFormatter for CheckRefsReport {
fn format_text(&self) -> String {
let mut lines = Vec::new();
lines.push("Documentation Reference Check".to_string());
lines.push(String::new());
lines.push(format!("Files checked: {}", self.files_checked));
lines.push(format!("Symbols indexed: {}", self.symbols_indexed));
lines.push(String::new());
if self.broken_refs.is_empty() {
lines.push("No broken references found.".to_string());
} else {
lines.push(format!("Broken references ({}):", self.broken_refs.len()));
lines.push(String::new());
for r in &self.broken_refs {
lines.push(format!(" {}:{}: `{}`", r.file, r.line, r.reference));
if r.context.len() <= 80 {
lines.push(format!(" {}", r.context));
}
}
}
lines.join("\n")
}
}
pub fn normalize_dir_for_root(root: &Path) -> std::path::PathBuf {
if let Ok(index_dir) = std::env::var("NORMALIZE_INDEX_DIR") {
let path = std::path::PathBuf::from(&index_dir);
if path.is_absolute() {
return path;
}
let data_home = std::env::var("XDG_DATA_HOME")
.map(std::path::PathBuf::from)
.unwrap_or_else(|_| {
dirs::home_dir()
.unwrap_or_else(|| std::path::PathBuf::from("."))
.join(".local/share")
});
return data_home.join("normalize").join(path);
}
root.join(".normalize")
}
pub async fn build_check_refs_report(
root: &Path,
walk_config: &normalize_rules_config::WalkConfig,
) -> Result<CheckRefsReport, String> {
let db_path = normalize_dir_for_root(root).join("index.sqlite");
let idx = normalize_facts::FileIndex::open(&db_path, root)
.await
.map_err(|e| format!("Failed to open index: {e}"))?;
let all_symbols = match idx.all_symbol_names().await {
Ok(syms) => syms,
Err(e) => {
tracing::warn!(
"normalize-native-rules: failed to query symbol names: {}",
e
);
std::collections::HashSet::new()
}
};
if all_symbols.is_empty() {
return Err("No symbols indexed. Run: normalize structure rebuild".to_string());
}
let md_files: Vec<_> = crate::walk::gitignore_walk(root, walk_config)
.filter(|e| e.path().extension().and_then(|s| s.to_str()) == Some("md"))
.map(|e| e.path().to_path_buf())
.collect();
if md_files.is_empty() {
return Ok(CheckRefsReport {
broken_refs: Vec::new(),
files_checked: 0,
symbols_indexed: all_symbols.len(),
});
}
let code_ref_re = CODE_REF_RE.get_or_init(|| {
regex::Regex::new(r"`([A-Z][a-zA-Z0-9_]*(?:[:\.][a-zA-Z_][a-zA-Z0-9_]*)*)`").unwrap()
});
let mut broken_refs: Vec<BrokenRef> = Vec::new();
for md_file in &md_files {
let content = match std::fs::read_to_string(md_file) {
Ok(c) => c,
Err(_) => continue,
};
let rel_path = md_file
.strip_prefix(root)
.unwrap_or(md_file)
.display()
.to_string();
let md_dir = md_file.parent().unwrap_or(root);
let mut in_code_block = false;
for (line_num, line) in content.lines().enumerate() {
if line.trim().starts_with("```") {
in_code_block = !in_code_block;
continue;
}
if in_code_block {
continue;
}
for cap in code_ref_re.captures_iter(line) {
let reference = &cap[1];
if is_common_non_symbol(reference) {
continue;
}
if looks_like_file_path(reference) {
let file_path = md_dir.join(reference.replace("::", "/"));
if !file_path.exists() && !root.join(reference.replace("::", "/")).exists() {
broken_refs.push(BrokenRef {
file: rel_path.clone(),
line: line_num + 1,
reference: reference.to_string(),
context: line.trim().to_string(),
});
}
} else if !all_symbols.contains(reference) {
broken_refs.push(BrokenRef {
file: rel_path.clone(),
line: line_num + 1,
reference: reference.to_string(),
context: line.trim().to_string(),
});
}
}
}
}
Ok(CheckRefsReport {
broken_refs,
files_checked: md_files.len(),
symbols_indexed: all_symbols.len(),
})
}
impl From<CheckRefsReport> for DiagnosticsReport {
fn from(report: CheckRefsReport) -> Self {
DiagnosticsReport {
issues: report
.broken_refs
.into_iter()
.map(|r| Issue {
file: r.file,
line: Some(r.line),
column: None,
end_line: None,
end_column: None,
rule_id: "broken-ref".into(),
message: if looks_like_file_path(&r.reference) {
format!("broken file link `{}`", r.reference)
} else {
format!("unknown symbol `{}`", r.reference)
},
severity: Severity::Warning,
source: "check-refs".into(),
related: vec![],
suggestion: None,
})
.collect(),
files_checked: report.files_checked,
sources_run: vec!["check-refs".into()],
tool_errors: vec![],
daemon_cached: false,
}
}
}
fn looks_like_file_path(s: &str) -> bool {
let Some(dot) = s.rfind('.') else {
return false;
};
let ext = &s[dot + 1..];
!ext.is_empty() && ext.len() <= 5 && ext.chars().all(|c| c.is_ascii_lowercase())
}
fn is_common_non_symbol(s: &str) -> bool {
matches!(
s,
"TODO"
| "FIXME"
| "NOTE"
| "HACK"
| "XXX"
| "BUG"
| "OK"
| "Err"
| "Ok"
| "None"
| "Some"
| "True"
| "False"
| "String"
| "Vec"
| "Option"
| "Result"
| "Box"
| "Arc"
| "Rc"
| "HashMap"
| "HashSet"
| "BTreeMap"
| "BTreeSet"
| "PathBuf"
| "Path"
| "File"
| "Read"
| "Write"
| "Debug"
| "Clone"
| "Copy"
| "Default"
| "Send"
| "Sync"
| "Serialize"
| "Deserialize"
) || s.len() < 2
|| s.chars().all(|c| c.is_uppercase() || c == '_') }