Skip to main content

normalize_native_rules/
stale_summary.rs

1use normalize_output::OutputFormatter;
2use normalize_output::diagnostics::{DiagnosticsReport, Issue, Severity};
3use serde::{Deserialize, Serialize};
4use std::collections::{HashMap, HashSet};
5use std::ffi::OsStr;
6use std::path::Path;
7
8/// Open the git repository at or containing `root` using gix.
9fn gix_open(root: &Path) -> Option<gix::Repository> {
10    gix::discover(root).ok()
11}
12
13#[derive(Debug, Serialize, schemars::JsonSchema)]
14struct StaleSummary {
15    dir: String,
16    commits_since_update: usize,
17    last_summary_commit: String,
18    /// True if the directory has uncommitted changes not reflected in the doc file.
19    has_uncommitted_changes: bool,
20    /// The doc filename that was found (e.g. "SUMMARY.md" or "CLAUDE.md").
21    filename: String,
22}
23
24#[derive(Debug, Serialize, schemars::JsonSchema)]
25struct MissingSummary {
26    dir: String,
27    total_commits: usize,
28    /// True if the directory has uncommitted changes with no doc file at all.
29    has_uncommitted_changes: bool,
30    /// The candidate doc filenames that were checked (none were found).
31    filenames: Vec<String>,
32}
33
34/// Report produced by the `missing-summary` native rule check.
35#[derive(Debug, Serialize, schemars::JsonSchema)]
36pub struct MissingSummaryReport {
37    missing: Vec<MissingSummary>,
38    dirs_checked: usize,
39    threshold: usize,
40}
41
42impl OutputFormatter for MissingSummaryReport {
43    fn format_text(&self) -> String {
44        let mut lines = Vec::new();
45        lines.push("Doc File Presence Check".to_string());
46        lines.push(String::new());
47        lines.push(format!("Directories checked: {}", self.dirs_checked));
48        lines.push(format!("Commit threshold: {}", self.threshold));
49        lines.push(String::new());
50
51        if self.missing.is_empty() {
52            lines.push("All directories have a doc file.".to_string());
53        } else {
54            lines.push(format!("Missing doc file ({}):", self.missing.len()));
55            for m in &self.missing {
56                let candidates = m.filenames.join(" or ");
57                let suffix = if m.has_uncommitted_changes {
58                    format!(
59                        "{} commits + uncommitted changes, no {}",
60                        m.total_commits, candidates
61                    )
62                } else {
63                    format!("{} commits with no {}", m.total_commits, candidates)
64                };
65                lines.push(format!("  {} ({})", m.dir, suffix));
66            }
67        }
68
69        lines.join("\n")
70    }
71}
72
73/// Report produced by the `stale-summary` native rule check.
74#[derive(Debug, Serialize, schemars::JsonSchema)]
75pub struct StaleSummaryReport {
76    stale: Vec<StaleSummary>,
77    dirs_checked: usize,
78    threshold: usize,
79}
80
81impl OutputFormatter for StaleSummaryReport {
82    fn format_text(&self) -> String {
83        let mut lines = Vec::new();
84        lines.push("Doc File Freshness Check".to_string());
85        lines.push(String::new());
86        lines.push(format!("Directories checked: {}", self.dirs_checked));
87        lines.push(format!("Staleness threshold: {} commits", self.threshold));
88        lines.push(String::new());
89
90        if self.stale.is_empty() {
91            lines.push("All doc files are up to date.".to_string());
92        } else {
93            lines.push(format!("Stale doc file ({}):", self.stale.len()));
94            for s in &self.stale {
95                let suffix = if s.has_uncommitted_changes {
96                    format!(
97                        "{} commits + uncommitted changes since {} last updated",
98                        s.commits_since_update, s.filename
99                    )
100                } else {
101                    format!(
102                        "{} commits since {} last updated",
103                        s.commits_since_update, s.filename
104                    )
105                };
106                lines.push(format!("  {} ({})", s.dir, suffix));
107            }
108        }
109
110        lines.join("\n")
111    }
112}
113
114// --- Incremental cache ---
115
116/// One cached entry per directory, keyed by relative dir path.
117#[derive(Debug, Serialize, Deserialize)]
118struct CacheEntry {
119    /// Last commit hash touching SUMMARY.md, or None if no SUMMARY.md has ever been committed.
120    last_summary_commit: Option<String>,
121    /// Commits touching this dir since `last_summary_commit` (exclusive), or total commits if
122    /// `last_summary_commit` is None.
123    commits_count: usize,
124}
125
126/// Cache file stored at `.normalize/cache/summary-freshness.json`.
127#[derive(Debug, Serialize, Deserialize)]
128struct SummaryCache {
129    /// HEAD commit hash when this cache was written.
130    head: String,
131    dirs: HashMap<String, CacheEntry>,
132}
133
134fn cache_path(root: &Path) -> std::path::PathBuf {
135    root.join(".normalize/cache/summary-freshness.json")
136}
137
138fn load_cache(root: &Path) -> Option<SummaryCache> {
139    let path = cache_path(root);
140    let content = match std::fs::read_to_string(&path) {
141        Ok(c) => c,
142        Err(_) => return None, // missing cache file is normal
143    };
144    match serde_json::from_str(&content) {
145        Ok(c) => Some(c),
146        Err(e) => {
147            tracing::debug!(
148                "normalize-native-rules: corrupt summary cache at {:?}: {}",
149                path,
150                e
151            );
152            None
153        }
154    }
155}
156
157fn save_cache(root: &Path, cache: &SummaryCache) {
158    let dir = root.join(".normalize/cache");
159    let _ = std::fs::create_dir_all(&dir);
160    if let Ok(json) = serde_json::to_string_pretty(cache) {
161        let _ = std::fs::write(cache_path(root), json);
162    }
163}
164
165fn git_head(root: &Path) -> Option<String> {
166    let repo = gix_open(root)?;
167    let id = repo.head_id().ok()?;
168    let s = id.to_hex().to_string();
169    if s.is_empty() { None } else { Some(s) }
170}
171
172/// Compute summary freshness for all directories in a single git history pass.
173///
174/// For each directory label in `dirs`, computes:
175/// - `last_doc_commit`: the most recent commit hash that touched any doc file in that dir
176/// - `commits_count`: the number of commits touching that dir since `last_doc_commit`
177///   (or all commits touching that dir if no doc file has ever been committed)
178///
179/// This replaces the per-directory `git_last_commit` + `git_commit_count` approach,
180/// which required O(dirs × history_length) git tree diffs. A single pass is O(history_length).
181///
182/// `doc_filenames` is the set of doc file basenames (e.g. `["SUMMARY.md", "CLAUDE.md"]`).
183/// `dirs` maps dir_label → (dir_path, is_root) for all directories to track.
184fn git_batch_commit_stats(
185    root: &Path,
186    dirs: &HashMap<String, (String, bool)>,
187    doc_filenames: &[&str],
188) -> HashMap<String, CacheEntry> {
189    let Some(repo) = gix_open(root) else {
190        return HashMap::new();
191    };
192    let Ok(head_id) = repo.head_id() else {
193        return HashMap::new();
194    };
195    let Ok(walk) = head_id
196        .ancestors()
197        .sorting(gix::revision::walk::Sorting::ByCommitTime(
198            gix::traverse::commit::simple::CommitTimeOrder::NewestFirst,
199        ))
200        .all()
201    else {
202        return HashMap::new();
203    };
204
205    // Per-dir state: (last_doc_commit, commits_since_doc, found_doc)
206    // We count commits touching the dir BEFORE we've found the last doc commit.
207    // Once we find the doc commit (walking newest-first), we stop counting for that dir.
208    struct DirState {
209        last_doc_commit: Option<String>,
210        commits_since_doc: usize, // commits touching dir before doc commit found
211        doc_found: bool,
212    }
213
214    let mut states: HashMap<&str, DirState> = dirs
215        .keys()
216        .map(|label| {
217            (
218                label.as_str(),
219                DirState {
220                    last_doc_commit: None,
221                    commits_since_doc: 0,
222                    doc_found: false,
223                },
224            )
225        })
226        .collect();
227
228    // Build lookup: dir_label → (rel_dir_prefix, is_root)
229    let dir_info: Vec<(&str, &str, bool)> = dirs
230        .iter()
231        .map(|(label, (rel_dir, is_root))| (label.as_str(), rel_dir.as_str(), *is_root))
232        .collect();
233
234    for info in walk {
235        let Ok(info) = info else { continue };
236        let Ok(commit) = info.object() else { continue };
237        let Ok(tree) = commit.tree() else { continue };
238        let parent_tree = info
239            .parent_ids()
240            .next()
241            .and_then(|pid| pid.object().ok())
242            .and_then(|obj| obj.into_commit().tree().ok());
243        let changes = match repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None) {
244            Ok(c) => c,
245            Err(_) => continue,
246        };
247
248        // Collect changed paths from this commit once.
249        let changed_paths: Vec<Vec<u8>> = changes
250            .iter()
251            .map(|change| {
252                use gix::object::tree::diff::ChangeDetached;
253                let loc: &[u8] = match &change {
254                    ChangeDetached::Addition { location, .. }
255                    | ChangeDetached::Deletion { location, .. }
256                    | ChangeDetached::Modification { location, .. } => location.as_slice(),
257                    ChangeDetached::Rewrite {
258                        source_location, ..
259                    } => source_location.as_slice(),
260                };
261                loc.to_vec()
262            })
263            .collect();
264
265        let commit_sha = info.id.to_hex().to_string();
266
267        for (label, rel_dir, is_root) in &dir_info {
268            // normalize-syntax-allow: rust/unwrap-in-impl - label is from dirs.keys(); states is keyed by same set
269            let state = states.get_mut(*label).unwrap();
270            if state.doc_found {
271                continue; // already resolved this dir
272            }
273
274            // Check if this commit touches the directory.
275            let touches_dir = if *is_root {
276                !changed_paths.is_empty()
277            } else {
278                changed_paths.iter().any(|loc| {
279                    // loc starts with "rel_dir/" (include the slash to avoid false prefix matches)
280                    let prefix = rel_dir.as_bytes();
281                    loc.starts_with(prefix)
282                        && (loc.len() == prefix.len() || loc.get(prefix.len()) == Some(&b'/'))
283                })
284            };
285
286            if !touches_dir {
287                continue;
288            }
289
290            // Check if this commit touches a doc file in this directory.
291            let touches_doc = changed_paths.iter().any(|loc| {
292                let loc_str = std::str::from_utf8(loc).unwrap_or("");
293                doc_filenames.iter().any(|doc| {
294                    if *is_root {
295                        loc_str == *doc
296                    } else {
297                        let expected = format!("{}/{}", rel_dir, doc);
298                        loc_str == expected
299                    }
300                })
301            });
302
303            if touches_doc {
304                // This is the most recent doc commit for this dir.
305                state.last_doc_commit = Some(commit_sha.clone());
306                state.doc_found = true;
307                // commits_since_doc is already the count before this commit — correct.
308            } else {
309                // Commit touches dir but not a doc file — counts as a "stale" commit.
310                state.commits_since_doc += 1;
311            }
312        }
313    }
314
315    // Convert to CacheEntry format.
316    states
317        .into_iter()
318        .map(|(label, state)| {
319            (
320                label.to_string(),
321                CacheEntry {
322                    last_summary_commit: state.last_doc_commit,
323                    commits_count: state.commits_since_doc,
324                },
325            )
326        })
327        .collect()
328}
329
330/// Update `existing` cache entries in-place by walking only the commits between
331/// `since_sha` (exclusive) and the repository HEAD (inclusive).
332///
333/// This avoids re-walking all of git history on every pre-commit run after the first commit
334/// that follows a cache build. Only the new commits are traversed; existing entries are
335/// updated by incrementing `commits_count` for content commits and resetting it for doc commits.
336///
337/// Walking is newest-first (same order as `git_batch_commit_stats`). The stop condition is
338/// reaching `since_sha`. For each directory touched by a new commit:
339/// - If a doc file is touched and no doc commit has been recorded in this incremental walk yet:
340///   set `last_summary_commit` to this commit, reset `commits_count` to 0. This is the newest
341///   doc commit for this directory in the new range.
342/// - If a doc file is touched but a newer doc commit was already recorded in this walk: ignore
343///   (this is an older doc commit, already superseded).
344/// - If only content is touched and no doc commit has been recorded in this walk yet:
345///   increment `commits_count` (this content commit is newer than the last doc commit).
346/// - If only content is touched and a doc commit was already recorded in this walk: ignore
347///   (this commit is older than the new doc commit and its count is already captured in the
348///   existing `commits_count` from the previous full walk).
349///
350/// **Key invariant**: after an incremental update, `CacheEntry.commits_count` is the number
351/// of content commits since `last_summary_commit` — the same semantics as a full walk.
352fn git_incremental_commit_stats(
353    root: &Path,
354    since_sha: &str,
355    existing: &mut HashMap<String, CacheEntry>,
356    dirs: &HashMap<String, (String, bool)>,
357    doc_filenames: &[&str],
358) {
359    let Some(repo) = gix_open(root) else {
360        return;
361    };
362    let Ok(head_id) = repo.head_id() else {
363        return;
364    };
365    let Ok(walk) = head_id
366        .ancestors()
367        .sorting(gix::revision::walk::Sorting::ByCommitTime(
368            gix::traverse::commit::simple::CommitTimeOrder::NewestFirst,
369        ))
370        .all()
371    else {
372        return;
373    };
374
375    // Per-dir incremental state: did we already see a new doc commit for this dir?
376    // `new_doc_found` is set the first time we encounter a doc commit in the new range.
377    struct IncrState {
378        new_doc_found: bool,
379    }
380    let mut inc_states: HashMap<&str, IncrState> = dirs
381        .keys()
382        .map(|label| {
383            (
384                label.as_str(),
385                IncrState {
386                    new_doc_found: false,
387                },
388            )
389        })
390        .collect();
391
392    // Build lookup: dir_label → (rel_dir_prefix, is_root)
393    let dir_info: Vec<(&str, &str, bool)> = dirs
394        .iter()
395        .map(|(label, (rel_dir, is_root))| (label.as_str(), rel_dir.as_str(), *is_root))
396        .collect();
397
398    for info in walk {
399        let Ok(info) = info else { continue };
400
401        let commit_sha = info.id.to_hex().to_string();
402        // Stop when we reach the previously cached HEAD (exclusive lower bound).
403        if commit_sha == since_sha {
404            break;
405        }
406
407        let Ok(commit) = info.object() else { continue };
408        let Ok(tree) = commit.tree() else { continue };
409        let parent_tree = info
410            .parent_ids()
411            .next()
412            .and_then(|pid| pid.object().ok())
413            .and_then(|obj| obj.into_commit().tree().ok());
414        let changes = match repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None) {
415            Ok(c) => c,
416            Err(_) => continue,
417        };
418
419        let changed_paths: Vec<Vec<u8>> = changes
420            .iter()
421            .map(|change| {
422                use gix::object::tree::diff::ChangeDetached;
423                let loc: &[u8] = match &change {
424                    ChangeDetached::Addition { location, .. }
425                    | ChangeDetached::Deletion { location, .. }
426                    | ChangeDetached::Modification { location, .. } => location.as_slice(),
427                    ChangeDetached::Rewrite {
428                        source_location, ..
429                    } => source_location.as_slice(),
430                };
431                loc.to_vec()
432            })
433            .collect();
434
435        for (label, rel_dir, is_root) in &dir_info {
436            // normalize-syntax-allow: rust/unwrap-in-impl - label is from dirs.keys(); inc_states is keyed by same set
437            let inc = inc_states.get_mut(*label).unwrap();
438
439            // Check if this commit touches the directory.
440            let touches_dir = if *is_root {
441                !changed_paths.is_empty()
442            } else {
443                changed_paths.iter().any(|loc| {
444                    let prefix = rel_dir.as_bytes();
445                    loc.starts_with(prefix)
446                        && (loc.len() == prefix.len() || loc.get(prefix.len()) == Some(&b'/'))
447                })
448            };
449
450            if !touches_dir {
451                continue;
452            }
453
454            // Check if this commit touches a doc file in this directory.
455            let touches_doc = changed_paths.iter().any(|loc| {
456                let loc_str = std::str::from_utf8(loc).unwrap_or("");
457                doc_filenames.iter().any(|doc| {
458                    if *is_root {
459                        loc_str == *doc
460                    } else {
461                        let expected = format!("{}/{}", rel_dir, doc);
462                        loc_str == expected
463                    }
464                })
465            });
466
467            if touches_doc && !inc.new_doc_found {
468                // Newest doc commit in the new range: reset the entry.
469                inc.new_doc_found = true;
470                let entry = existing.entry(label.to_string()).or_insert(CacheEntry {
471                    last_summary_commit: None,
472                    commits_count: 0,
473                });
474                entry.last_summary_commit = Some(commit_sha.clone());
475                entry.commits_count = 0;
476            } else if !touches_doc && !inc.new_doc_found {
477                // Content commit newer than any new doc commit: increment counter.
478                let entry = existing.entry(label.to_string()).or_insert(CacheEntry {
479                    last_summary_commit: None,
480                    commits_count: 0,
481                });
482                entry.commits_count += 1;
483            }
484            // touches_doc && inc.new_doc_found → older doc commit, ignore.
485            // !touches_doc && inc.new_doc_found → older content commit, ignore.
486        }
487    }
488}
489
490/// All paths with uncommitted changes (staged or unstaged), collected once for the whole repo.
491///
492/// Built once before the directory loop in both report builders; all per-directory checks
493/// then become pure in-memory prefix/membership tests — no further git I/O per directory.
494struct UncommittedChanges {
495    /// Paths (relative to repo root) with staged changes (index differs from HEAD).
496    staged: HashSet<String>,
497    /// Paths (relative to repo root) with unstaged changes (worktree differs from index).
498    unstaged: HashSet<String>,
499}
500
501impl UncommittedChanges {
502    /// Build once for the whole repo.  Opens the repository, reads the index and HEAD tree
503    /// for staged changes, then runs a single worktree status walk for unstaged changes.
504    fn load(root: &Path) -> Self {
505        let Some(repo) = gix_open(root) else {
506            return Self {
507                staged: HashSet::new(),
508                unstaged: HashSet::new(),
509            };
510        };
511
512        // Staged: collect all index entries that differ from HEAD.
513        let staged = (|| -> Option<HashSet<String>> {
514            use gix::bstr::ByteSlice;
515            let head_id = repo.head_id().ok()?;
516            let head_commit = head_id.object().ok()?.into_commit();
517            let head_tree = head_commit.tree().ok()?;
518            let index = repo.index_or_empty().ok()?;
519            let mut set = HashSet::new();
520            for entry in index.entries() {
521                let rela = entry.path(&index);
522                let rela_str = rela.to_str_lossy();
523                let head_blob_id = head_tree
524                    .lookup_entry_by_path(rela_str.as_ref())
525                    .ok()
526                    .flatten()
527                    .map(|e| e.id().detach());
528                // Present in index but not HEAD (new file), or different blob id = staged change.
529                if head_blob_id.as_ref() != Some(&entry.id) {
530                    set.insert(rela_str.into_owned());
531                }
532            }
533            Some(set)
534        })()
535        .unwrap_or_default();
536
537        // Unstaged: single status walk over the whole worktree with no path patterns.
538        let unstaged = (|| -> Option<HashSet<String>> {
539            use gix::bstr::ByteSlice;
540            let platform = repo
541                .status(gix::progress::Discard)
542                .ok()?
543                .index_worktree_options_mut(|opts| {
544                    opts.dirwalk_options = None;
545                });
546            let iter = platform
547                .into_index_worktree_iter(Vec::<gix::bstr::BString>::new())
548                .ok()?;
549            let mut set = HashSet::new();
550            for item in iter.flatten() {
551                let rela = item.rela_path().to_str_lossy();
552                set.insert(rela.into_owned());
553            }
554            Some(set)
555        })()
556        .unwrap_or_default();
557
558        Self { staged, unstaged }
559    }
560
561    /// Returns true if any changed file under `rel_dir` (excluding `doc_paths`) exists.
562    ///
563    /// Used to detect content changes that should trigger a doc-freshness warning.
564    fn has_content_changes(&self, rel_dir: &str, doc_paths: &[String]) -> bool {
565        let is_root = rel_dir == ".";
566        let check = |path: &str| -> bool {
567            if !is_root && !path.starts_with(rel_dir) {
568                return false;
569            }
570            !doc_paths.iter().any(|dp| dp.as_str() == path)
571        };
572        self.staged.iter().any(|p| check(p)) || self.unstaged.iter().any(|p| check(p))
573    }
574
575    /// Returns true if the given doc file path itself has uncommitted changes.
576    ///
577    /// Used to skip stale/missing reporting when the doc is already being updated.
578    fn summary_has_changes(&self, summary_path: &str) -> bool {
579        self.staged.contains(summary_path) || self.unstaged.contains(summary_path)
580    }
581}
582
583/// Default filenames checked by `stale-summary` and `missing-summary` when none are configured.
584pub const DEFAULT_FILENAMES: &[&str] = &["SUMMARY.md"];
585
586/// Returns true if `dir_label` matches any of the `paths` glob patterns.
587///
588/// A leading `./` in `dir_label` is stripped before matching. If `paths` is empty,
589/// returns `true` (the rule applies everywhere).
590fn dir_matches_paths(dir_label: &str, paths: &[String]) -> bool {
591    if paths.is_empty() {
592        return true;
593    }
594    // Normalize: strip leading "./" for matching
595    let label = dir_label.strip_prefix("./").unwrap_or(dir_label);
596    // The root dir "." matches a bare "." pattern only; for non-root dirs we match
597    // the label against each glob pattern.
598    paths.iter().any(|pat| {
599        glob::Pattern::new(pat)
600            .map(|p| p.matches(label))
601            .unwrap_or(false)
602    })
603}
604
605/// Shared directory walker used by both report builders.
606///
607/// Yields `(dir_path, rel_dir_str, rel_dir_git, dir_label)` tuples for every
608/// non-empty directory in the repository tree (after excluding VCS/build dirs).
609fn walk_dirs(
610    root: &Path,
611    walk_config: &normalize_rules_config::WalkConfig,
612) -> Vec<(std::path::PathBuf, String)> {
613    crate::walk::gitignore_walk(root, walk_config)
614        .filter(|e| e.file_type().is_some_and(|ft| ft.is_dir()))
615        .filter(|e| {
616            !e.path()
617                .components()
618                .any(|c| c.as_os_str() == OsStr::new(".git"))
619        })
620        .filter_map(|e| {
621            let dir_path = e.path().to_path_buf();
622            let has_files = std::fs::read_dir(&dir_path)
623                .map(|mut rd| {
624                    rd.any(|e| {
625                        e.map(|e| e.file_type().map(|t| t.is_file()).unwrap_or(false))
626                            .unwrap_or(false)
627                    })
628                })
629                .unwrap_or(false);
630            if !has_files {
631                return None;
632            }
633            let rel = dir_path
634                .strip_prefix(root)
635                .unwrap_or(&dir_path)
636                .to_string_lossy();
637            let label = if rel.is_empty() {
638                ".".to_string()
639            } else {
640                rel.to_string()
641            };
642            Some((dir_path, label))
643        })
644        .collect()
645}
646
647/// Build a [`MissingSummaryReport`] by walking the repository under `root` and checking
648/// each directory for a doc file that is present (committed at least once).
649///
650/// `filenames` lists the candidate doc filenames (e.g. `["SUMMARY.md", "CLAUDE.md"]`).
651/// A directory is compliant when it has **any** of those files (OR semantics).
652/// Pass an empty slice to fall back to [`DEFAULT_FILENAMES`].
653///
654/// `paths` is a list of glob patterns; only directories matching one of the patterns are
655/// checked. An empty `paths` slice means the rule applies to every directory (default behavior).
656///
657/// Directories that have never had a doc file committed are reported as missing when
658/// the total commit count (plus any uncommitted content changes) exceeds `threshold`.
659pub fn build_missing_summary_report(
660    root: &Path,
661    threshold: usize,
662    filenames: &[String],
663    paths: &[String],
664    walk_config: &normalize_rules_config::WalkConfig,
665) -> MissingSummaryReport {
666    let filenames: Vec<&str> = if filenames.is_empty() {
667        DEFAULT_FILENAMES.to_vec()
668    } else {
669        filenames.iter().map(String::as_str).collect()
670    };
671    let mut missing = Vec::new();
672    let mut dirs_checked = 0;
673
674    // Load incremental cache (shared with stale-summary to avoid redundant git calls).
675    let head = git_head(root);
676    let mut cache = load_cache(root);
677
678    // If the cache exists but HEAD has moved, walk only the new commits and update in-place.
679    if let (Some(c), Some(current_head)) = (&mut cache, &head)
680        && c.head != *current_head
681    {
682        // Build the full dirs map (all directories, not just uncached ones) so the
683        // incremental walk can update any entry that was touched by the new commits.
684        let all_dirs: HashMap<String, (String, bool)> = {
685            let dirs_snapshot = walk_dirs(root, walk_config);
686            dirs_snapshot
687                .iter()
688                .map(|(dir_path, dir_label)| {
689                    let rel = dir_path
690                        .strip_prefix(root)
691                        .unwrap_or(dir_path)
692                        .to_string_lossy();
693                    let rel_dir = if rel.is_empty() {
694                        ".".to_string()
695                    } else {
696                        rel.to_string()
697                    };
698                    let is_root = rel_dir == ".";
699                    (dir_label.clone(), (rel_dir, is_root))
700                })
701                .collect()
702        };
703        git_incremental_commit_stats(root, &c.head, &mut c.dirs, &all_dirs, &filenames);
704        c.head = current_head.clone();
705    }
706
707    let mut updated_dirs: HashMap<String, CacheEntry> = HashMap::new();
708
709    let dirs = walk_dirs(root, walk_config);
710
711    // Identify directories not covered by the cache — compute their stats in a single
712    // git history pass rather than one per directory.
713    let uncached_dirs: HashMap<String, (String, bool)> = dirs
714        .iter()
715        .filter(|(_, dir_label)| {
716            dir_matches_paths(dir_label, paths)
717                && cache
718                    .as_ref()
719                    .is_none_or(|c| !c.dirs.contains_key(dir_label))
720        })
721        .map(|(dir_path, dir_label)| {
722            let rel = dir_path
723                .strip_prefix(root)
724                .unwrap_or(dir_path)
725                .to_string_lossy();
726            let rel_dir = if rel.is_empty() {
727                ".".to_string()
728            } else {
729                rel.to_string()
730            };
731            let is_root = rel_dir == ".";
732            (dir_label.clone(), (rel_dir, is_root))
733        })
734        .collect();
735
736    let batch_results = if uncached_dirs.is_empty() {
737        HashMap::new()
738    } else {
739        git_batch_commit_stats(root, &uncached_dirs, &filenames)
740    };
741
742    // Collect all uncommitted changes once before the loop to avoid per-directory git I/O.
743    let uncommitted = UncommittedChanges::load(root);
744
745    for (dir_path, dir_label) in &dirs {
746        // Apply paths filter: skip directories that don't match any configured glob.
747        if !dir_matches_paths(dir_label, paths) {
748            continue;
749        }
750
751        let rel_dir = dir_path
752            .strip_prefix(root)
753            .unwrap_or(dir_path)
754            .to_string_lossy();
755        let rel_dir_git = if rel_dir.is_empty() {
756            ".".to_string()
757        } else {
758            rel_dir.to_string()
759        };
760
761        // Build the relative paths for each candidate filename.
762        let candidate_paths: Vec<String> = filenames
763            .iter()
764            .map(|f| {
765                if rel_dir.is_empty() {
766                    f.to_string()
767                } else {
768                    format!("{}/{}", rel_dir, f)
769                }
770            })
771            .collect();
772
773        // Always re-check for uncommitted content changes (in-memory after the batched load).
774        let content_dirty = uncommitted.has_content_changes(&rel_dir_git, &candidate_paths);
775
776        // If ANY candidate doc file is staged (about to be committed), skip the check.
777        let any_doc_dirty = candidate_paths
778            .iter()
779            .any(|p| uncommitted.summary_has_changes(p));
780        if any_doc_dirty {
781            continue;
782        }
783
784        let (last_summary_commit, commits_count) =
785            if let Some(entry) = cache.as_ref().and_then(|c| c.dirs.get(dir_label)) {
786                (entry.last_summary_commit.clone(), entry.commits_count)
787            } else if let Some(entry) = batch_results.get(dir_label) {
788                (entry.last_summary_commit.clone(), entry.commits_count)
789            } else {
790                (None, 0)
791            };
792
793        updated_dirs.insert(
794            dir_label.clone(),
795            CacheEntry {
796                last_summary_commit: last_summary_commit.clone(),
797                commits_count,
798            },
799        );
800
801        let effective_count = commits_count + usize::from(content_dirty);
802
803        // missing-summary only fires when there is NO committed doc file.
804        if last_summary_commit.is_none() && effective_count > threshold {
805            dirs_checked += 1;
806            missing.push(MissingSummary {
807                dir: dir_label.clone(),
808                total_commits: commits_count,
809                has_uncommitted_changes: content_dirty,
810                filenames: filenames.iter().map(|s| s.to_string()).collect(),
811            });
812        } else {
813            dirs_checked += 1;
814        }
815    }
816
817    // Persist updated cache.
818    if let Some(head_hash) = head {
819        let merged_dirs = if let Some(ref mut old) = cache {
820            old.dirs.extend(updated_dirs);
821            std::mem::take(&mut old.dirs)
822        } else {
823            updated_dirs
824        };
825        save_cache(
826            root,
827            &SummaryCache {
828                head: head_hash,
829                dirs: merged_dirs,
830            },
831        );
832    }
833
834    MissingSummaryReport {
835        missing,
836        dirs_checked,
837        threshold,
838    }
839}
840
841/// Build a [`StaleSummaryReport`] by walking the repository under `root` and checking
842/// each directory for a doc file that is up-to-date.
843///
844/// `filenames` lists the candidate doc filenames (e.g. `["SUMMARY.md", "CLAUDE.md"]`).
845/// A directory is compliant when it has **any** of those files and none of the present
846/// ones are stale (OR semantics). Pass an empty slice to fall back to [`DEFAULT_FILENAMES`].
847///
848/// `paths` is a list of glob patterns; only directories matching one of the patterns are
849/// checked. An empty `paths` slice means the rule applies to every directory (default behavior).
850///
851/// A doc file is considered stale when the number of commits since its last update (plus any
852/// uncommitted content changes in the directory) exceeds `threshold`. Directories without any
853/// matching doc file are NOT reported here — use `build_missing_summary_report` for that.
854pub fn build_stale_summary_report(
855    root: &Path,
856    threshold: usize,
857    filenames: &[String],
858    paths: &[String],
859    walk_config: &normalize_rules_config::WalkConfig,
860) -> StaleSummaryReport {
861    let filenames: Vec<&str> = if filenames.is_empty() {
862        DEFAULT_FILENAMES.to_vec()
863    } else {
864        filenames.iter().map(String::as_str).collect()
865    };
866    let mut stale = Vec::new();
867    let mut dirs_checked = 0;
868
869    // Load incremental cache: if HEAD has moved since the last run, walk only the new commits
870    // and update the cached entries in-place rather than re-walking all of history.
871    let head = git_head(root);
872    let mut cache = load_cache(root);
873
874    if let (Some(c), Some(current_head)) = (&mut cache, &head)
875        && c.head != *current_head
876    {
877        let all_dirs: HashMap<String, (String, bool)> = {
878            let dirs_snapshot = walk_dirs(root, walk_config);
879            dirs_snapshot
880                .iter()
881                .map(|(dir_path, dir_label)| {
882                    let rel = dir_path
883                        .strip_prefix(root)
884                        .unwrap_or(dir_path)
885                        .to_string_lossy();
886                    let rel_dir = if rel.is_empty() {
887                        ".".to_string()
888                    } else {
889                        rel.to_string()
890                    };
891                    let is_root = rel_dir == ".";
892                    (dir_label.clone(), (rel_dir, is_root))
893                })
894                .collect()
895        };
896        git_incremental_commit_stats(root, &c.head, &mut c.dirs, &all_dirs, &filenames);
897        c.head = current_head.clone();
898    }
899
900    let mut updated_dirs: HashMap<String, CacheEntry> = HashMap::new();
901
902    let dirs = walk_dirs(root, walk_config);
903
904    // Identify directories not covered by the cache — compute their stats in a single
905    // git history pass rather than one per directory.
906    let uncached_dirs: HashMap<String, (String, bool)> = dirs
907        .iter()
908        .filter(|(_, dir_label)| {
909            dir_matches_paths(dir_label, paths)
910                && cache
911                    .as_ref()
912                    .is_none_or(|c| !c.dirs.contains_key(dir_label))
913        })
914        .map(|(dir_path, dir_label)| {
915            let rel = dir_path
916                .strip_prefix(root)
917                .unwrap_or(dir_path)
918                .to_string_lossy();
919            let rel_dir = if rel.is_empty() {
920                ".".to_string()
921            } else {
922                rel.to_string()
923            };
924            let is_root = rel_dir == ".";
925            (dir_label.clone(), (rel_dir, is_root))
926        })
927        .collect();
928
929    let batch_results = if uncached_dirs.is_empty() {
930        HashMap::new()
931    } else {
932        git_batch_commit_stats(root, &uncached_dirs, &filenames)
933    };
934
935    // Collect all uncommitted changes once before the loop to avoid per-directory git I/O.
936    let uncommitted = UncommittedChanges::load(root);
937
938    for (dir_path, dir_label) in &dirs {
939        // Apply paths filter: skip directories that don't match any configured glob.
940        if !dir_matches_paths(dir_label, paths) {
941            continue;
942        }
943
944        let rel_dir = dir_path
945            .strip_prefix(root)
946            .unwrap_or(dir_path)
947            .to_string_lossy();
948        let rel_dir_git = if rel_dir.is_empty() {
949            ".".to_string()
950        } else {
951            rel_dir.to_string()
952        };
953
954        dirs_checked += 1;
955
956        // Build the relative paths for each candidate filename.
957        let candidate_paths: Vec<String> = filenames
958            .iter()
959            .map(|f| {
960                if rel_dir.is_empty() {
961                    f.to_string()
962                } else {
963                    format!("{}/{}", rel_dir, f)
964                }
965            })
966            .collect();
967
968        // Always re-check for uncommitted content changes (in-memory after the batched load).
969        // "content_dirty" excludes all candidate doc files from the signal.
970        let content_dirty = uncommitted.has_content_changes(&rel_dir_git, &candidate_paths);
971
972        // If ANY candidate doc file is staged (about to be committed), skip the
973        // staleness check: the pending commit will fix it.
974        let any_doc_dirty = candidate_paths
975            .iter()
976            .any(|p| uncommitted.summary_has_changes(p));
977        if any_doc_dirty {
978            continue;
979        }
980
981        // For OR semantics: find the candidate that has the most recent commit
982        // (smallest commits_since_update). If none have ever been committed,
983        // the directory is treated as missing — skip it here (handled by missing-summary).
984        //
985        // Cache key: dir_label — we store the best result across all candidates.
986        let (last_summary_commit, commits_count) =
987            if let Some(entry) = cache.as_ref().and_then(|c| c.dirs.get(dir_label)) {
988                (entry.last_summary_commit.clone(), entry.commits_count)
989            } else if let Some(entry) = batch_results.get(dir_label) {
990                (entry.last_summary_commit.clone(), entry.commits_count)
991            } else {
992                (None, 0)
993            };
994
995        // Store result for cache write.
996        updated_dirs.insert(
997            dir_label.clone(),
998            CacheEntry {
999                last_summary_commit: last_summary_commit.clone(),
1000                commits_count,
1001            },
1002        );
1003
1004        // Effective change count: committed changes + 1 if there are uncommitted content changes.
1005        let effective_count = commits_count + usize::from(content_dirty);
1006
1007        // Display name: first candidate filename (representative for messages).
1008        let primary_filename = filenames.first().copied().unwrap_or("SUMMARY.md");
1009
1010        // stale-summary only fires when a doc file EXISTS but is stale.
1011        if let Some(last_commit) = last_summary_commit
1012            && effective_count > threshold
1013        {
1014            stale.push(StaleSummary {
1015                dir: dir_label.clone(),
1016                commits_since_update: commits_count,
1017                last_summary_commit: last_commit,
1018                has_uncommitted_changes: content_dirty,
1019                filename: primary_filename.to_string(),
1020            });
1021        }
1022        // If last_summary_commit is None, the directory is missing a doc file entirely.
1023        // That is handled by missing-summary, not stale-summary.
1024    }
1025
1026    // Persist updated cache (merge with existing to preserve entries not visited this run).
1027    if let Some(head_hash) = head {
1028        let merged_dirs = if let Some(ref mut old) = cache {
1029            old.dirs.extend(updated_dirs);
1030            std::mem::take(&mut old.dirs)
1031        } else {
1032            updated_dirs
1033        };
1034        save_cache(
1035            root,
1036            &SummaryCache {
1037                head: head_hash,
1038                dirs: merged_dirs,
1039            },
1040        );
1041    }
1042
1043    StaleSummaryReport {
1044        stale,
1045        dirs_checked,
1046        threshold,
1047    }
1048}
1049
1050impl From<MissingSummaryReport> for DiagnosticsReport {
1051    fn from(report: MissingSummaryReport) -> Self {
1052        let issues: Vec<Issue> = report
1053            .missing
1054            .into_iter()
1055            .map(|m| {
1056                let candidates = m.filenames.join(" or ");
1057                let primary = m
1058                    .filenames
1059                    .first()
1060                    .map(String::as_str)
1061                    .unwrap_or("SUMMARY.md");
1062                let message = if m.has_uncommitted_changes {
1063                    format!(
1064                        "no {} found ({} commits + uncommitted changes touch this directory)",
1065                        candidates, m.total_commits
1066                    )
1067                } else {
1068                    format!(
1069                        "no {} found ({} commits touch this directory)",
1070                        candidates, m.total_commits
1071                    )
1072                };
1073                Issue {
1074                    file: format!("{}/{}", m.dir, primary),
1075                    line: None,
1076                    column: None,
1077                    end_line: None,
1078                    end_column: None,
1079                    rule_id: "missing-summary".into(),
1080                    message,
1081                    severity: Severity::Error,
1082                    source: "missing-summary".into(),
1083                    related: vec![],
1084                    suggestion: Some(format!(
1085                        "add a {} describing this directory's purpose",
1086                        candidates
1087                    )),
1088                }
1089            })
1090            .collect();
1091
1092        DiagnosticsReport {
1093            issues,
1094            files_checked: report.dirs_checked,
1095            sources_run: vec!["missing-summary".into()],
1096            tool_errors: vec![],
1097            daemon_cached: false,
1098        }
1099    }
1100}
1101
1102impl From<StaleSummaryReport> for DiagnosticsReport {
1103    fn from(report: StaleSummaryReport) -> Self {
1104        let threshold = report.threshold;
1105
1106        let issues: Vec<Issue> = report
1107            .stale
1108            .into_iter()
1109            .map(|s| {
1110                let message = if s.has_uncommitted_changes {
1111                    format!(
1112                        "{} commits + uncommitted changes since {} was last updated (threshold: {})",
1113                        s.commits_since_update, s.filename, threshold
1114                    )
1115                } else {
1116                    format!(
1117                        "{} commits since {} was last updated (threshold: {})",
1118                        s.commits_since_update, s.filename, threshold
1119                    )
1120                };
1121                Issue {
1122                    file: format!("{}/{}", s.dir, s.filename),
1123                    line: None,
1124                    column: None,
1125                    end_line: None,
1126                    end_column: None,
1127                    rule_id: "stale-summary".into(),
1128                    message,
1129                    severity: Severity::Error,
1130                    source: "stale-summary".into(),
1131                    related: vec![],
1132                    suggestion: Some(format!(
1133                        "{}/{} should describe the directory's current purpose, key files, and how they fit together",
1134                        s.dir, s.filename
1135                    )),
1136                }
1137            })
1138            .collect();
1139
1140        DiagnosticsReport {
1141            issues,
1142            files_checked: report.dirs_checked,
1143            sources_run: vec!["stale-summary".into()],
1144            tool_errors: vec![],
1145            daemon_cached: false,
1146        }
1147    }
1148}