1use regex::Regex;
4use serde::Serialize;
5use std::collections::{HashMap, HashSet};
6use std::path::Path;
7
8#[derive(Debug, Clone, Serialize)]
10pub struct BrokenReference {
11 pub source_file: String,
13 pub target: String,
15 pub kind: BrokenRefKind,
17}
18
19#[derive(Debug, Clone, Serialize)]
20pub enum BrokenRefKind {
21 FileNotFound,
22 HeadingNotFound,
23 DirectiveReference,
25}
26
27#[derive(Debug, Clone, Serialize)]
29pub struct IntegrityResult {
30 pub score: f64,
32 pub total_refs: usize,
34 pub valid_refs: usize,
36 pub broken: Vec<BrokenReference>,
38}
39
40pub struct IntegrityAnalyzer;
41
42impl IntegrityAnalyzer {
43 pub fn analyze(
49 file_links: &HashMap<String, Vec<String>>,
50 known_files: &HashSet<String>,
51 file_headings: &HashMap<String, Vec<String>>,
52 ) -> IntegrityResult {
53 let mut total_refs: usize = 0;
54 let mut valid_refs: usize = 0;
55 let mut broken = Vec::new();
56
57 for (source, links) in file_links {
58 let source_dir = Path::new(source)
59 .parent()
60 .map(|p| p.to_string_lossy().to_string())
61 .unwrap_or_default();
62
63 for link in links {
64 if link.starts_with("http://") || link.starts_with("https://") || link.starts_with("mailto:") {
66 continue;
67 }
68
69 total_refs += 1;
70
71 let (file_part, heading_part) = if let Some(idx) = link.find('#') {
73 let file = &link[..idx];
74 let heading = &link[idx + 1..];
75 (file, if heading.is_empty() { None } else { Some(heading) })
76 } else {
77 (link.as_str(), None)
78 };
79
80 let resolved = if file_part.is_empty() {
82 source.clone()
84 } else {
85 Self::resolve_path(&source_dir, file_part)
86 };
87
88 if !file_part.is_empty() && !known_files.contains(&resolved) {
90 broken.push(BrokenReference {
91 source_file: source.clone(),
92 target: link.clone(),
93 kind: BrokenRefKind::FileNotFound,
94 });
95 continue;
96 }
97
98 if let Some(heading) = heading_part {
100 let headings = file_headings.get(&resolved);
101 let heading_exists = headings
102 .map(|hs| hs.iter().any(|h| h == heading))
103 .unwrap_or(false);
104
105 if heading_exists {
106 valid_refs += 1;
107 } else {
108 broken.push(BrokenReference {
109 source_file: source.clone(),
110 target: link.clone(),
111 kind: BrokenRefKind::HeadingNotFound,
112 });
113 }
114 } else {
115 valid_refs += 1;
116 }
117 }
118 }
119
120 Self::check_directive_references(
122 file_links,
123 known_files,
124 &mut total_refs,
125 &mut valid_refs,
126 &mut broken,
127 );
128
129 let score = if total_refs == 0 {
130 1.0 } else {
132 valid_refs as f64 / total_refs as f64
133 };
134
135 IntegrityResult {
136 score,
137 total_refs,
138 valid_refs,
139 broken,
140 }
141 }
142
143 fn check_directive_references(
148 file_links: &HashMap<String, Vec<String>>,
149 known_files: &HashSet<String>,
150 total_refs: &mut usize,
151 valid_refs: &mut usize,
152 broken: &mut Vec<BrokenReference>,
153 ) {
154 let directive_re = Regex::new(r"@([a-zA-Z0-9_./-]+(?:\.[a-zA-Z]{1,6}|/[a-zA-Z0-9_.-]+))").unwrap();
163
164 for (source, links) in file_links {
165 for link in links {
168 if link.starts_with("http://") || link.starts_with("https://") || link.starts_with("mailto:") {
170 continue;
171 }
172 if let Some(cap) = directive_re.captures(link) {
173 let target = cap.get(1).unwrap().as_str();
174 *total_refs += 1;
175
176 if known_files.contains(target) {
177 *valid_refs += 1;
178 } else {
179 broken.push(BrokenReference {
180 source_file: source.clone(),
181 target: format!("@{}", target),
182 kind: BrokenRefKind::DirectiveReference,
183 });
184 }
185 }
186 }
187 }
188 }
189
190 pub fn analyze_directives(
195 config_contents: &HashMap<String, String>,
196 known_files: &HashSet<String>,
197 ) -> Vec<BrokenReference> {
198 let directive_re = Regex::new(r"@([a-zA-Z0-9_./-]+(?:\.[a-zA-Z]{1,6}|/[a-zA-Z0-9_.-]+))").unwrap();
199 let mut broken = Vec::new();
200
201 for (source, content) in config_contents {
202 for cap in directive_re.captures_iter(content) {
203 let target = cap.get(1).unwrap().as_str();
204 if !known_files.contains(target) {
205 broken.push(BrokenReference {
206 source_file: source.clone(),
207 target: format!("@{}", target),
208 kind: BrokenRefKind::DirectiveReference,
209 });
210 }
211 }
212 }
213
214 broken
215 }
216
217 fn resolve_path(base_dir: &str, relative: &str) -> String {
219 if relative.starts_with('/') {
220 relative.trim_start_matches('/').to_string()
222 } else {
223 let base = Path::new(base_dir);
224 let resolved = base.join(relative);
225 let mut parts: Vec<&str> = Vec::new();
227 for component in resolved.components() {
228 match component {
229 std::path::Component::Normal(s) => {
230 parts.push(s.to_str().unwrap_or(""));
231 }
232 std::path::Component::ParentDir => {
233 parts.pop();
234 }
235 std::path::Component::CurDir => {}
236 _ => {}
237 }
238 }
239 parts.join("/")
240 }
241 }
242}
243
244#[cfg(test)]
245mod tests {
246 use super::*;
247
248 fn make_known_files(files: &[&str]) -> HashSet<String> {
249 files.iter().map(|s| s.to_string()).collect()
250 }
251
252 fn make_headings(entries: &[(&str, &[&str])]) -> HashMap<String, Vec<String>> {
253 entries
254 .iter()
255 .map(|(file, headings)| {
256 (file.to_string(), headings.iter().map(|h| h.to_string()).collect())
257 })
258 .collect()
259 }
260
261 #[test]
262 fn test_all_valid_links() {
263 let mut links = HashMap::new();
264 links.insert(
265 "docs/guide.md".to_string(),
266 vec!["../README.md".to_string(), "./api.md".to_string()],
267 );
268
269 let files = make_known_files(&["README.md", "docs/guide.md", "docs/api.md"]);
270 let headings = HashMap::new();
271
272 let result = IntegrityAnalyzer::analyze(&links, &files, &headings);
273 assert_eq!(result.total_refs, 2);
274 assert_eq!(result.valid_refs, 2);
275 assert!((result.score - 1.0).abs() < 0.01);
276 assert!(result.broken.is_empty());
277 }
278
279 #[test]
280 fn test_broken_file_link() {
281 let mut links = HashMap::new();
282 links.insert(
283 "README.md".to_string(),
284 vec!["docs/missing.md".to_string()],
285 );
286
287 let files = make_known_files(&["README.md"]);
288 let headings = HashMap::new();
289
290 let result = IntegrityAnalyzer::analyze(&links, &files, &headings);
291 assert_eq!(result.total_refs, 1);
292 assert_eq!(result.valid_refs, 0);
293 assert!((result.score - 0.0).abs() < 0.01);
294 assert_eq!(result.broken.len(), 1);
295 assert!(matches!(result.broken[0].kind, BrokenRefKind::FileNotFound));
296 }
297
298 #[test]
299 fn test_broken_heading_link() {
300 let mut links = HashMap::new();
301 links.insert(
302 "README.md".to_string(),
303 vec!["docs/api.md#nonexistent".to_string()],
304 );
305
306 let files = make_known_files(&["README.md", "docs/api.md"]);
307 let headings = make_headings(&[("docs/api.md", &["getting-started", "usage"])]);
308
309 let result = IntegrityAnalyzer::analyze(&links, &files, &headings);
310 assert_eq!(result.total_refs, 1);
311 assert_eq!(result.valid_refs, 0);
312 assert_eq!(result.broken.len(), 1);
313 assert!(matches!(result.broken[0].kind, BrokenRefKind::HeadingNotFound));
314 }
315
316 #[test]
317 fn test_valid_heading_link() {
318 let mut links = HashMap::new();
319 links.insert(
320 "README.md".to_string(),
321 vec!["docs/api.md#usage".to_string()],
322 );
323
324 let files = make_known_files(&["README.md", "docs/api.md"]);
325 let headings = make_headings(&[("docs/api.md", &["usage"])]);
326
327 let result = IntegrityAnalyzer::analyze(&links, &files, &headings);
328 assert_eq!(result.valid_refs, 1);
329 assert!((result.score - 1.0).abs() < 0.01);
330 }
331
332 #[test]
333 fn test_external_links_skipped() {
334 let mut links = HashMap::new();
335 links.insert(
336 "README.md".to_string(),
337 vec![
338 "https://example.com".to_string(),
339 "http://example.com".to_string(),
340 "mailto:test@test.com".to_string(),
341 ],
342 );
343
344 let files = make_known_files(&["README.md"]);
345 let headings = HashMap::new();
346
347 let result = IntegrityAnalyzer::analyze(&links, &files, &headings);
348 assert_eq!(result.total_refs, 0); assert!((result.score - 1.0).abs() < 0.01);
350 }
351
352 #[test]
353 fn test_no_links() {
354 let links = HashMap::new();
355 let files = make_known_files(&["README.md"]);
356 let headings = HashMap::new();
357
358 let result = IntegrityAnalyzer::analyze(&links, &files, &headings);
359 assert!((result.score - 1.0).abs() < 0.01);
360 }
361
362 #[test]
363 fn test_at_reference_valid() {
364 let mut config = HashMap::new();
366 config.insert("CLAUDE.md".to_string(), "@AGENTS.md\n@.claude/instructions".to_string());
367
368 let files = make_known_files(&["CLAUDE.md", "AGENTS.md", ".claude/instructions"]);
369 let broken = IntegrityAnalyzer::analyze_directives(&config, &files);
370 assert!(broken.is_empty(), "Valid @references should not be broken");
371 }
372
373 #[test]
374 fn test_at_reference_broken() {
375 let mut config = HashMap::new();
376 config.insert("CLAUDE.md".to_string(), "@AGENTS.md\n@.claude/missing.md".to_string());
377
378 let files = make_known_files(&["CLAUDE.md", "AGENTS.md"]);
379 let broken = IntegrityAnalyzer::analyze_directives(&config, &files);
380 assert_eq!(broken.len(), 1);
381 assert!(broken[0].target.contains("missing.md"));
382 assert!(matches!(broken[0].kind, BrokenRefKind::DirectiveReference));
383 }
384
385 #[test]
386 fn test_claude_dir_reference_validation() {
387 let mut config = HashMap::new();
388 config.insert(
389 "CLAUDE.md".to_string(),
390 "Read @.claude/AGENT_FLOW_RULES.md for details".to_string(),
391 );
392
393 let files = make_known_files(&["CLAUDE.md", ".claude/AGENT_FLOW_RULES.md"]);
394 let broken = IntegrityAnalyzer::analyze_directives(&config, &files);
395 assert!(broken.is_empty());
396
397 let files_without = make_known_files(&["CLAUDE.md"]);
399 let broken2 = IntegrityAnalyzer::analyze_directives(&config, &files_without);
400 assert_eq!(broken2.len(), 1);
401 }
402
403 #[test]
404 fn test_mixed_valid_and_broken() {
405 let mut links = HashMap::new();
406 links.insert(
407 "README.md".to_string(),
408 vec![
409 "docs/api.md".to_string(), "docs/missing.md".to_string(), ],
412 );
413
414 let files = make_known_files(&["README.md", "docs/api.md"]);
415 let headings = HashMap::new();
416
417 let result = IntegrityAnalyzer::analyze(&links, &files, &headings);
418 assert_eq!(result.total_refs, 2);
419 assert_eq!(result.valid_refs, 1);
420 assert!((result.score - 0.5).abs() < 0.01);
421 }
422}