1use normalize_output::diagnostics::{DiagnosticsReport, Issue, RelatedLocation, Severity};
8use std::collections::HashMap;
9use std::path::{Path, PathBuf};
10
11#[derive(serde::Deserialize, Default)]
14pub struct StaleDocConfig {
15 #[serde(default)]
17 pub min_co_changes: Option<u64>,
18 #[serde(default)]
20 pub min_lag_days: Option<u64>,
21 #[serde(default)]
23 pub doc_patterns: Vec<String>,
24}
25
26const DEFAULT_DOC_PATTERNS: &[&str] = &["**/*.md", "**/*.rst", "docs/**/*"];
28
29const EXCLUDED_FILENAMES: &[&str] = &["SUMMARY.md"];
31
32fn is_doc_file(rel_path: &str, patterns: &[glob::Pattern]) -> bool {
34 let file_name = std::path::Path::new(rel_path)
35 .file_name()
36 .map(|n| n.to_string_lossy().into_owned())
37 .unwrap_or_default();
38 if EXCLUDED_FILENAMES.contains(&file_name.as_str()) {
39 return false;
40 }
41 patterns.iter().any(|p| p.matches(rel_path))
42}
43
44fn gix_open(root: &Path) -> Option<gix::Repository> {
46 gix::discover(root).ok()
47}
48
49pub fn git_last_commit_time(root: &Path, rel_path: &str) -> Option<i64> {
54 let repo = gix_open(root)?;
55 let head_id = repo.head_id().ok()?;
56 let walk = head_id
57 .ancestors()
58 .sorting(gix::revision::walk::Sorting::ByCommitTime(
59 gix::traverse::commit::simple::CommitTimeOrder::NewestFirst,
60 ))
61 .all()
62 .ok()?;
63
64 for info in walk {
65 let Ok(info) = info else { continue };
66 let Ok(commit) = info.object() else { continue };
67 let Ok(tree) = commit.tree() else { continue };
68 let parent_tree = info
69 .parent_ids()
70 .next()
71 .and_then(|pid| pid.object().ok())
72 .and_then(|obj| obj.into_commit().tree().ok());
73 let changes = match repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None) {
74 Ok(c) => c,
75 Err(_) => continue,
76 };
77 let touches = changes.iter().any(|change| {
78 use gix::object::tree::diff::ChangeDetached;
79 let loc = match change {
80 ChangeDetached::Addition { location, .. }
81 | ChangeDetached::Deletion { location, .. }
82 | ChangeDetached::Modification { location, .. } => location.as_slice(),
83 ChangeDetached::Rewrite {
84 source_location, ..
85 } => source_location.as_slice(),
86 };
87 loc == rel_path.as_bytes()
88 });
89 if touches {
90 return info.commit_time;
91 }
92 }
93 None
94}
95
96pub fn build_stale_doc_report(
107 root: &Path,
108 config: StaleDocConfig,
109 files: Option<&[PathBuf]>,
110) -> DiagnosticsReport {
111 let min_co_changes = config.min_co_changes.unwrap_or(3) as usize;
112 let min_lag_secs = config.min_lag_days.unwrap_or(0) * 86400;
113
114 let raw_patterns: Vec<&str> = if config.doc_patterns.is_empty() {
116 DEFAULT_DOC_PATTERNS.to_vec()
117 } else {
118 config.doc_patterns.iter().map(String::as_str).collect()
119 };
120 let patterns: Vec<glob::Pattern> = raw_patterns
121 .iter()
122 .filter_map(|p| glob::Pattern::new(p).ok())
123 .collect();
124
125 let index_path = root.join(".normalize").join("index.sqlite");
127 if !index_path.exists() {
128 return DiagnosticsReport {
129 issues: vec![],
130 files_checked: 0,
131 sources_run: vec!["stale-doc".into()],
132 tool_errors: vec![normalize_output::diagnostics::ToolFailure {
133 tool: "stale-doc".into(),
134 message: "index not built — run `normalize structure rebuild` to enable stale-doc"
135 .into(),
136 }],
137 daemon_cached: false,
138 };
139 }
140
141 let edges_result = {
143 let db_path = index_path.clone();
144 let root_path = root.to_path_buf();
145 std::thread::Builder::new()
146 .stack_size(8 * 1024 * 1024)
147 .spawn(move || {
148 let rt = tokio::runtime::Builder::new_current_thread()
149 .enable_all()
150 .build()
151 .ok()?;
152 rt.block_on(async {
153 let index = normalize_facts::FileIndex::open(&db_path, &root_path)
154 .await
155 .ok()?;
156 index.query_co_change_edges(min_co_changes).await.ok()?
157 })
158 })
159 .ok()
160 .and_then(|h| h.join().ok())
161 .flatten()
162 };
163
164 let Some(edges) = edges_result else {
165 return DiagnosticsReport {
166 issues: vec![],
167 files_checked: 0,
168 sources_run: vec!["stale-doc".into()],
169 tool_errors: vec![normalize_output::diagnostics::ToolFailure {
170 tool: "stale-doc".into(),
171 message: "co_change_edges table is empty or index could not be read — run `normalize structure rebuild`".into(),
172 }],
173 daemon_cached: false,
174 };
175 };
176
177 if edges.is_empty() {
178 return DiagnosticsReport {
179 issues: vec![],
180 files_checked: 0,
181 sources_run: vec!["stale-doc".into()],
182 tool_errors: vec![normalize_output::diagnostics::ToolFailure {
183 tool: "stale-doc".into(),
184 message: "co_change_edges table is empty — run `normalize structure rebuild` to populate it".into(),
185 }],
186 daemon_cached: false,
187 };
188 }
189
190 let mut doc_to_partners: HashMap<String, Vec<(String, usize)>> = HashMap::new();
193 for (file_a, file_b, count) in &edges {
194 let a_is_doc = is_doc_file(file_a, &patterns);
195 let b_is_doc = is_doc_file(file_b, &patterns);
196 if a_is_doc && !b_is_doc {
199 doc_to_partners
200 .entry(file_a.clone())
201 .or_default()
202 .push((file_b.clone(), *count));
203 } else if b_is_doc && !a_is_doc {
204 doc_to_partners
205 .entry(file_b.clone())
206 .or_default()
207 .push((file_a.clone(), *count));
208 }
209 }
210
211 if doc_to_partners.is_empty() {
212 return DiagnosticsReport {
213 issues: vec![],
214 files_checked: 0,
215 sources_run: vec!["stale-doc".into()],
216 tool_errors: vec![],
217 daemon_cached: false,
218 };
219 }
220
221 let doc_files: Vec<String> = if let Some(explicit_files) = files {
223 let explicit_rel: Vec<String> = explicit_files
224 .iter()
225 .filter_map(|p| {
226 p.strip_prefix(root)
227 .ok()
228 .map(|r| r.to_string_lossy().into_owned())
229 })
230 .collect();
231 doc_to_partners
232 .keys()
233 .filter(|k| explicit_rel.contains(k))
234 .cloned()
235 .collect()
236 } else {
237 doc_to_partners.keys().cloned().collect()
238 };
239
240 let files_checked = doc_files.len();
241
242 let mut commit_time_cache: HashMap<String, Option<i64>> = HashMap::new();
244
245 let mut issues = Vec::new();
246
247 for doc_path in &doc_files {
248 if !root.join(doc_path).exists() {
250 continue;
251 }
252
253 let doc_time = *commit_time_cache
254 .entry(doc_path.clone())
255 .or_insert_with(|| git_last_commit_time(root, doc_path));
256
257 let Some(doc_ts) = doc_time else {
258 continue;
260 };
261
262 let partners = &doc_to_partners[doc_path];
263
264 let mut worst_partner: Option<(&str, usize, i64)> = None; for (partner_path, co_count) in partners {
268 if !root.join(partner_path).exists() {
270 continue;
271 }
272
273 let partner_time = *commit_time_cache
274 .entry(partner_path.clone())
275 .or_insert_with(|| git_last_commit_time(root, partner_path));
276
277 let Some(partner_ts) = partner_time else {
278 continue;
279 };
280
281 if partner_ts <= doc_ts {
282 continue;
283 }
284
285 let lag = (partner_ts - doc_ts) as u64;
286 if lag < min_lag_secs {
287 continue;
288 }
289
290 let is_worse = worst_partner
292 .map(|(_, _, worst_ts)| partner_ts > worst_ts)
293 .unwrap_or(true);
294 if is_worse {
295 worst_partner = Some((partner_path.as_str(), *co_count, partner_ts));
296 }
297 }
298
299 if let Some((partner_path, co_count, partner_ts)) = worst_partner {
300 let lag_days = ((partner_ts - doc_ts) as u64) / 86400;
301 issues.push(Issue {
302 file: doc_path.clone(),
303 line: None,
304 column: None,
305 end_line: None,
306 end_column: None,
307 rule_id: "stale-doc".into(),
308 message: format!(
309 "possibly stale — {partner_path} was updated {lag_days} day{} more recently (last co-changed {co_count} times)",
310 if lag_days == 1 { "" } else { "s" }
311 ),
312 severity: Severity::Warning,
313 source: "stale-doc".into(),
314 related: vec![RelatedLocation {
315 file: partner_path.to_string(),
316 line: None,
317 message: Some(format!("co-changed {co_count} times, updated {lag_days} day{} more recently than doc", if lag_days == 1 { "" } else { "s" })),
318 }],
319 suggestion: Some(format!(
320 "review {doc_path} to ensure it reflects recent changes in {partner_path}"
321 )),
322 });
323 }
324 }
325
326 issues.sort_by(|a, b| a.file.cmp(&b.file));
328
329 DiagnosticsReport {
330 issues,
331 files_checked,
332 sources_run: vec!["stale-doc".into()],
333 tool_errors: vec![],
334 daemon_cached: false,
335 }
336}