normalize_native_rules/
check_refs.rs1use normalize_output::OutputFormatter;
4use normalize_output::diagnostics::{DiagnosticsReport, Issue, Severity};
5use serde::Serialize;
6use std::path::Path;
7
8static CODE_REF_RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
9
10#[derive(Debug, Clone, Serialize, schemars::JsonSchema)]
12struct BrokenRef {
13 file: String,
14 line: usize,
15 reference: String,
16 context: String,
17}
18
19#[derive(Debug, Serialize, schemars::JsonSchema)]
21pub struct CheckRefsReport {
22 broken_refs: Vec<BrokenRef>,
23 files_checked: usize,
24 symbols_indexed: usize,
25}
26
27impl OutputFormatter for CheckRefsReport {
28 fn format_text(&self) -> String {
29 let mut lines = Vec::new();
30 lines.push("Documentation Reference Check".to_string());
31 lines.push(String::new());
32 lines.push(format!("Files checked: {}", self.files_checked));
33 lines.push(format!("Symbols indexed: {}", self.symbols_indexed));
34 lines.push(String::new());
35
36 if self.broken_refs.is_empty() {
37 lines.push("No broken references found.".to_string());
38 } else {
39 lines.push(format!("Broken references ({}):", self.broken_refs.len()));
40 lines.push(String::new());
41 for r in &self.broken_refs {
42 lines.push(format!(" {}:{}: `{}`", r.file, r.line, r.reference));
43 if r.context.len() <= 80 {
44 lines.push(format!(" {}", r.context));
45 }
46 }
47 }
48
49 lines.join("\n")
50 }
51}
52
53pub fn normalize_dir_for_root(root: &Path) -> std::path::PathBuf {
63 if let Ok(index_dir) = std::env::var("NORMALIZE_INDEX_DIR") {
64 let path = std::path::PathBuf::from(&index_dir);
65 if path.is_absolute() {
66 return path;
67 }
68 let data_home = std::env::var("XDG_DATA_HOME")
69 .map(std::path::PathBuf::from)
70 .unwrap_or_else(|_| {
71 dirs::home_dir()
72 .unwrap_or_else(|| std::path::PathBuf::from("."))
73 .join(".local/share")
74 });
75 return data_home.join("normalize").join(path);
76 }
77 root.join(".normalize")
78}
79
80pub async fn build_check_refs_report(
82 root: &Path,
83 walk_config: &normalize_rules_config::WalkConfig,
84) -> Result<CheckRefsReport, String> {
85 let db_path = normalize_dir_for_root(root).join("index.sqlite");
87 let idx = normalize_facts::FileIndex::open(&db_path, root)
88 .await
89 .map_err(|e| format!("Failed to open index: {e}"))?;
90
91 let all_symbols = match idx.all_symbol_names().await {
93 Ok(syms) => syms,
94 Err(e) => {
95 tracing::warn!(
96 "normalize-native-rules: failed to query symbol names: {}",
97 e
98 );
99 std::collections::HashSet::new()
100 }
101 };
102
103 if all_symbols.is_empty() {
104 return Err("No symbols indexed. Run: normalize structure rebuild".to_string());
105 }
106
107 let md_files: Vec<_> = crate::walk::gitignore_walk(root, walk_config)
109 .filter(|e| e.path().extension().and_then(|s| s.to_str()) == Some("md"))
110 .map(|e| e.path().to_path_buf())
111 .collect();
112
113 if md_files.is_empty() {
114 return Ok(CheckRefsReport {
115 broken_refs: Vec::new(),
116 files_checked: 0,
117 symbols_indexed: all_symbols.len(),
118 });
119 }
120
121 let code_ref_re = CODE_REF_RE.get_or_init(|| {
123 regex::Regex::new(r"`([A-Z][a-zA-Z0-9_]*(?:[:\.][a-zA-Z_][a-zA-Z0-9_]*)*)`").unwrap()
125 });
126
127 let mut broken_refs: Vec<BrokenRef> = Vec::new();
128
129 for md_file in &md_files {
130 let content = match std::fs::read_to_string(md_file) {
131 Ok(c) => c,
132 Err(_) => continue,
133 };
134
135 let rel_path = md_file
136 .strip_prefix(root)
137 .unwrap_or(md_file)
138 .display()
139 .to_string();
140
141 let md_dir = md_file.parent().unwrap_or(root);
142
143 let mut in_code_block = false;
144 for (line_num, line) in content.lines().enumerate() {
145 if line.trim().starts_with("```") {
146 in_code_block = !in_code_block;
147 continue;
148 }
149 if in_code_block {
150 continue;
151 }
152
153 for cap in code_ref_re.captures_iter(line) {
154 let reference = &cap[1];
155
156 if is_common_non_symbol(reference) {
157 continue;
158 }
159
160 if looks_like_file_path(reference) {
161 let file_path = md_dir.join(reference.replace("::", "/"));
163 if !file_path.exists() && !root.join(reference.replace("::", "/")).exists() {
164 broken_refs.push(BrokenRef {
165 file: rel_path.clone(),
166 line: line_num + 1,
167 reference: reference.to_string(),
168 context: line.trim().to_string(),
169 });
170 }
171 } else if !all_symbols.contains(reference) {
172 broken_refs.push(BrokenRef {
173 file: rel_path.clone(),
174 line: line_num + 1,
175 reference: reference.to_string(),
176 context: line.trim().to_string(),
177 });
178 }
179 }
180 }
181 }
182
183 Ok(CheckRefsReport {
184 broken_refs,
185 files_checked: md_files.len(),
186 symbols_indexed: all_symbols.len(),
187 })
188}
189
190impl From<CheckRefsReport> for DiagnosticsReport {
191 fn from(report: CheckRefsReport) -> Self {
192 DiagnosticsReport {
193 issues: report
194 .broken_refs
195 .into_iter()
196 .map(|r| Issue {
197 file: r.file,
198 line: Some(r.line),
199 column: None,
200 end_line: None,
201 end_column: None,
202 rule_id: "broken-ref".into(),
203 message: if looks_like_file_path(&r.reference) {
204 format!("broken file link `{}`", r.reference)
205 } else {
206 format!("unknown symbol `{}`", r.reference)
207 },
208 severity: Severity::Warning,
209 source: "check-refs".into(),
210 related: vec![],
211 suggestion: None,
212 })
213 .collect(),
214 files_checked: report.files_checked,
215 sources_run: vec!["check-refs".into()],
216 tool_errors: vec![],
217 daemon_cached: false,
218 }
219 }
220}
221
222fn looks_like_file_path(s: &str) -> bool {
227 let Some(dot) = s.rfind('.') else {
228 return false;
229 };
230 let ext = &s[dot + 1..];
232 !ext.is_empty() && ext.len() <= 5 && ext.chars().all(|c| c.is_ascii_lowercase())
233}
234
235fn is_common_non_symbol(s: &str) -> bool {
237 matches!(
239 s,
240 "TODO"
241 | "FIXME"
242 | "NOTE"
243 | "HACK"
244 | "XXX"
245 | "BUG"
246 | "OK"
247 | "Err"
248 | "Ok"
249 | "None"
250 | "Some"
251 | "True"
252 | "False"
253 | "String"
254 | "Vec"
255 | "Option"
256 | "Result"
257 | "Box"
258 | "Arc"
259 | "Rc"
260 | "HashMap"
261 | "HashSet"
262 | "BTreeMap"
263 | "BTreeSet"
264 | "PathBuf"
265 | "Path"
266 | "File"
267 | "Read"
268 | "Write"
269 | "Debug"
270 | "Clone"
271 | "Copy"
272 | "Default"
273 | "Send"
274 | "Sync"
275 | "Serialize"
276 | "Deserialize"
277 ) || s.len() < 2
278 || s.chars().all(|c| c.is_uppercase() || c == '_') }