gobby_code/commands/codewiki/repair.rs
1//! Citation repair: re-anchors a generated vault's `[file:line]` citations
2//! against the current index without regenerating any page. When source files
3//! drift, the cheaper fix than a full LLM regen is to move each citation to its
4//! symbol's current span. This module owns the deterministic, no-LLM repair
5//! routine; the public `codewiki --repair-citations` flag wired in a later leaf
6//! drives [`repair_citations`].
7
8use std::collections::{BTreeMap, BTreeSet};
9use std::path::Path;
10
11use serde::Serialize;
12
13use super::io::{read_codewiki_meta, safe_doc_path, write_doc};
14use super::{CitationResolver, CodewikiIndexSnapshot, reanchor_citations};
15use crate::models::Symbol;
16
17/// Result of a citation-repair run. This is the source-of-truth serialized
18/// shape that the gcode contract freezes for `codewiki --repair-citations`
19/// (Leaf 5 / #876): a stable key set with no `dry_run` mode.
20#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)]
21pub struct CitationRepairSummary {
22 /// On-disk pages read and checked for stale citations.
23 pub pages_scanned: usize,
24 /// Pages whose citations changed and were rewritten in place.
25 pub pages_repaired: usize,
26 /// Individual citations moved to a symbol's current span.
27 pub citations_repaired: usize,
28 /// Stale citations whose symbol no longer resolves in the current index;
29 /// left in place so a human can investigate rather than silently dropped.
30 pub citations_unresolved: usize,
31}
32
33/// Bridges on-disk citations to the current index. A citation is identified by
34/// the persisted snapshot (which records each symbol's `line_start` at the
35/// generation that wrote the page), then re-anchored to that same symbol's
36/// span in the current index. Symbol IDs are UUID5 over `byte_start`, so they
37/// change whenever a symbol moves; the stable identity across a move is
38/// `(file, qualified_name, kind)`, which is what links the snapshot entry to a
39/// current symbol.
40struct IndexCitationResolver {
41 /// Current symbol spans grouped by file, for the "is this still accurate?"
42 /// containment check.
43 current_spans: BTreeMap<String, Vec<(usize, usize)>>,
44 /// `(file, qualified_name, kind)` -> current `(line_start, line_end)`.
45 current_by_identity: BTreeMap<(String, String, String), (usize, usize)>,
46 /// `(file, snapshot line_start)` -> `(qualified_name, kind)`, taken from the
47 /// persisted snapshot. A `(file, line_start)` shared by two snapshot symbols
48 /// is ambiguous and dropped, so an ambiguous citation stays unresolved
49 /// rather than re-anchoring to the wrong symbol.
50 snapshot_anchor: BTreeMap<(String, usize), (String, String)>,
51}
52
53impl IndexCitationResolver {
54 /// Builds a resolver from the current symbol set and the snapshot persisted
55 /// when the vault was last generated.
56 fn build(symbols: &[Symbol], snapshot: &CodewikiIndexSnapshot) -> Self {
57 let mut current_spans: BTreeMap<String, Vec<(usize, usize)>> = BTreeMap::new();
58 let mut current_by_identity = BTreeMap::new();
59 for symbol in symbols {
60 current_spans
61 .entry(symbol.file_path.clone())
62 .or_default()
63 .push((symbol.line_start, symbol.line_end));
64 current_by_identity.insert(
65 (
66 symbol.file_path.clone(),
67 symbol.qualified_name.clone(),
68 symbol.kind.clone(),
69 ),
70 (symbol.line_start, symbol.line_end),
71 );
72 }
73
74 let mut snapshot_anchor: BTreeMap<(String, usize), (String, String)> = BTreeMap::new();
75 let mut ambiguous: BTreeSet<(String, usize)> = BTreeSet::new();
76 for snap in snapshot.symbols.values() {
77 let key = (snap.file_path.clone(), snap.line_start);
78 if snapshot_anchor
79 .insert(
80 key.clone(),
81 (snap.qualified_name.clone(), snap.kind.clone()),
82 )
83 .is_some()
84 {
85 ambiguous.insert(key);
86 }
87 }
88 for key in ambiguous {
89 snapshot_anchor.remove(&key);
90 }
91
92 Self {
93 current_spans,
94 current_by_identity,
95 snapshot_anchor,
96 }
97 }
98}
99
100impl CitationResolver for IndexCitationResolver {
101 fn is_current(&self, file: &str, line_start: usize, line_end: usize) -> bool {
102 self.current_spans.get(file).is_some_and(|spans| {
103 spans
104 .iter()
105 .any(|(start, end)| *start <= line_start && line_end <= *end)
106 })
107 }
108
109 fn resolve(&self, file: &str, line_start: usize) -> Option<(usize, usize)> {
110 let (qualified_name, kind) = self.snapshot_anchor.get(&(file.to_string(), line_start))?;
111 self.current_by_identity
112 .get(&(file.to_string(), qualified_name.clone(), kind.clone()))
113 .copied()
114 }
115}
116
117/// Re-anchors every generated page's citations against `resolver`, rewriting
118/// only pages whose citations changed. Reads the page set from the vault meta
119/// log and each page from disk; never calls a generator or an LLM.
120fn repair_with_resolver(
121 out_dir: &Path,
122 resolver: &dyn CitationResolver,
123) -> anyhow::Result<CitationRepairSummary> {
124 let meta = read_codewiki_meta(out_dir)?;
125 let mut summary = CitationRepairSummary::default();
126 for doc_path in meta.docs.keys() {
127 let target = safe_doc_path(out_dir, doc_path)?;
128 let content = match std::fs::read_to_string(&target) {
129 Ok(content) => content,
130 // A meta entry without a page on disk is stale bookkeeping, not a
131 // page to repair; skip it.
132 Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue,
133 Err(err) => return Err(err.into()),
134 };
135 summary.pages_scanned += 1;
136 let result = reanchor_citations(&content, resolver);
137 summary.citations_repaired += result.repaired;
138 summary.citations_unresolved += result.unresolved;
139 if result.repaired > 0 && result.text != content {
140 write_doc(out_dir, doc_path, &result.text)?;
141 summary.pages_repaired += 1;
142 }
143 }
144 Ok(summary)
145}
146
147/// Public entry: re-anchors every generated page's citations against the
148/// current `symbols`, using the index snapshot persisted in the vault meta to
149/// identify which symbol each stale citation named. No regeneration, no LLM —
150/// this is the routine the `codewiki --repair-citations` flag (Leaf 5 / #876)
151/// drives. A vault with no persisted snapshot (`unwrap_or_default`) cannot
152/// identify moved symbols, so its stale citations all count as unresolved.
153pub fn repair_citations(
154 out_dir: &Path,
155 symbols: &[Symbol],
156) -> anyhow::Result<CitationRepairSummary> {
157 let snapshot = read_codewiki_meta(out_dir)?
158 .index_snapshot
159 .unwrap_or_default();
160 let resolver = IndexCitationResolver::build(symbols, &snapshot);
161 repair_with_resolver(out_dir, &resolver)
162}