Skip to main content

mati_core/store/
repair.rs

1//! Gotcha index reconciliation engine.
2//!
3//! # Consistency model
4//!
5//! Gotcha mutations write to three locations:
6//!
7//! | Location | Role | Example key |
8//! |----------|------|-------------|
9//! | `gotcha:*` record | **Canonical truth** | `gotcha:never-unwrap` |
10//! | `file:*` payload `.gotcha_keys` | Derived index | `file:src/main.rs` |
11//! | `graph:edge:file:…:has_gotcha:gotcha:…` | Derived index | `graph:edge:file:src/main.rs:has_gotcha:gotcha:never-unwrap` |
12//!
13//! The canonical gotcha record is always written first and fails hard. The
14//! derived indexes (file links and graph edges) are best-effort: if they fail,
15//! the gotcha record still persists and a dirty marker is set so the drift is
16//! visible and repairable.
17//!
18//! This means **links and edges are never authoritative**. They are
19//! materialized views that can be rebuilt entirely from `gotcha:*` records.
20//!
21//! # Dirty markers
22//!
23//! When a best-effort secondary write fails in [`super::gotcha_ops`], the
24//! affected gotcha key is enqueued in a dirty marker record at
25//! `analytics:integrity:gotcha_links`. This marker is:
26//! - read by `mati status` to surface "index drift detected" warnings
27//! - drained by `mati repair --fast` for targeted reconciliation
28//! - cleared by `mati repair` after full reconciliation + verification
29//!
30//! # Repair modes
31//!
32//! - **Full** (`mati repair`): scans all gotcha and file records, diffs
33//!   against desired state, applies repairs, then verifies by re-running the
34//!   diff. Clears the dirty marker only after verification passes. This is the
35//!   only mode that provides a complete integrity guarantee.
36//!
37//! - **Fast** (`mati repair --fast`): drains the dirty-marker queue only.
38//!   Repairs the specific gotcha keys that were flagged. This is an
39//!   optimization, not an integrity proof — it cannot detect drift that wasn't
40//!   caused by a tracked failure (e.g., manual store edits, bugs in other
41//!   write paths).
42//!
43//! - **Check** (`mati repair --check`): read-only diff, no writes. Exits
44//!   non-zero if drift exists. CI-ready.
45//!
46//! # Usage
47//!
48//! ```text
49//! mati repair          # full reconcile + verify
50//! mati repair --check  # detect drift, exit 1 if found (CI)
51//! mati repair --fast   # drain dirty queue only (opportunistic)
52//! mati repair --json   # machine-readable output
53//! ```
54
55use std::collections::{BTreeSet, HashMap};
56use std::time::{SystemTime, UNIX_EPOCH};
57
58use anyhow::Result;
59use serde::{Deserialize, Serialize};
60
61use crate::graph::edges::{Edge, EdgeKind};
62use crate::store::db::Store;
63use crate::store::record::{
64    Category, GotchaRecord, Priority, Record, RecordLifecycle, RecordSource, RecordVersion,
65    StalenessScore,
66};
67
68/// Read-only store access used by the integrity *check* paths, so they can run
69/// against either a direct [`Store`] or a daemon-routed proxy (e.g. the CLI's
70/// `StoreProxy`, which routes reads through the daemon socket). Only the three
71/// read primitives the checks need — all writes stay on `&Store`.
72#[allow(async_fn_in_trait)]
73pub trait RepairReader {
74    async fn get(&self, key: &str) -> Result<Option<Record>>;
75    async fn scan_prefix(&self, prefix: &str) -> Result<Vec<Record>>;
76    async fn scan_keys(&self, prefix: &str) -> Result<Vec<String>>;
77}
78
79impl RepairReader for Store {
80    async fn get(&self, key: &str) -> Result<Option<Record>> {
81        Store::get(self, key).await
82    }
83    async fn scan_prefix(&self, prefix: &str) -> Result<Vec<Record>> {
84        Store::scan_prefix(self, prefix).await
85    }
86    async fn scan_keys(&self, prefix: &str) -> Result<Vec<String>> {
87        Store::scan_keys(self, prefix).await
88    }
89}
90
91fn now_secs() -> u64 {
92    SystemTime::now()
93        .duration_since(UNIX_EPOCH)
94        .unwrap_or_default()
95        .as_secs()
96}
97
98/// Dirty marker key — written when a best-effort secondary write fails.
99pub const DIRTY_MARKER_KEY: &str = "analytics:integrity:gotcha_links";
100
101// ── Report ───────────────────────────────────────────────────────────────────
102
103/// Result of a check or repair operation.
104#[derive(Debug, Clone, Serialize, Deserialize)]
105pub struct RepairReport {
106    pub scanned_gotchas: usize,
107    pub scanned_files: usize,
108    pub missing_file_links: Vec<DriftEntry>,
109    pub stale_file_links: Vec<DriftEntry>,
110    pub missing_edges: Vec<DriftEntry>,
111    pub stale_edges: Vec<DriftEntry>,
112    pub repaired_count: usize,
113    pub verification_passed: bool,
114    pub dirty_marker_cleared: bool,
115}
116
117impl RepairReport {
118    pub fn has_drift(&self) -> bool {
119        !self.missing_file_links.is_empty()
120            || !self.stale_file_links.is_empty()
121            || !self.missing_edges.is_empty()
122            || !self.stale_edges.is_empty()
123    }
124
125    pub fn total_drift(&self) -> usize {
126        self.missing_file_links.len()
127            + self.stale_file_links.len()
128            + self.missing_edges.len()
129            + self.stale_edges.len()
130    }
131}
132
133/// A single drift item — identifies what's wrong and where.
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct DriftEntry {
136    pub gotcha_key: String,
137    pub file_path: String,
138}
139
140/// Dirty marker payload — persisted at `DIRTY_MARKER_KEY`.
141#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct DirtyMarker {
143    pub dirty: bool,
144    pub dirty_since: u64,
145    pub cause: String,
146    pub affected_keys: Vec<String>,
147    pub last_checked_at: u64,
148    pub last_repaired_at: u64,
149}
150
151impl DirtyMarker {
152    pub fn clean() -> Self {
153        Self {
154            dirty: false,
155            dirty_since: 0,
156            cause: String::new(),
157            affected_keys: vec![],
158            last_checked_at: 0,
159            last_repaired_at: 0,
160        }
161    }
162}
163
164// ── Dirty marker operations ──────────────────────────────────────────────────
165
166/// Mark the gotcha index as dirty after a partial-write failure.
167pub async fn mark_dirty(store: &Store, gotcha_key: &str, cause: &str) {
168    let now = now_secs();
169
170    // Try to read existing marker to preserve history
171    let mut marker = read_dirty_marker(store)
172        .await
173        .unwrap_or_else(DirtyMarker::clean);
174    marker.dirty = true;
175    if marker.dirty_since == 0 {
176        marker.dirty_since = now;
177    }
178    marker.cause = cause.to_string();
179    if !marker.affected_keys.contains(&gotcha_key.to_string()) {
180        marker.affected_keys.push(gotcha_key.to_string());
181    }
182
183    let record = Record {
184        key: DIRTY_MARKER_KEY.to_string(),
185        value: cause.to_string(),
186        payload: serde_json::to_value(&marker).ok(),
187        category: Category::Analytics,
188        priority: Priority::Normal,
189        tags: vec![],
190        created_at: now,
191        updated_at: now,
192        ref_url: None,
193        staleness: StalenessScore::fresh(),
194        lifecycle: RecordLifecycle::Active,
195        version: RecordVersion {
196            device_id: crate::store::stable_device_id(),
197            logical_clock: 1,
198            wall_clock: now,
199        },
200        quality: crate::store::record::QualityScore::layer0_default(),
201        access_count: 0,
202        last_accessed: 0,
203        source: RecordSource::StaticAnalysis,
204        confidence: crate::store::record::ConfidenceScore::for_new_record(
205            &RecordSource::StaticAnalysis,
206        ),
207        gap_analysis_score: 0.0,
208    };
209
210    // Best-effort — don't fail the caller if marker write fails
211    let _ = store.put(DIRTY_MARKER_KEY, &record).await;
212}
213
214/// Read the current dirty marker, if any.
215pub async fn read_dirty_marker<R: RepairReader>(reader: &R) -> Option<DirtyMarker> {
216    reader
217        .get(DIRTY_MARKER_KEY)
218        .await
219        .ok()
220        .flatten()
221        .and_then(|r| r.payload_as::<DirtyMarker>())
222}
223
224/// Check whether the gotcha index is currently marked dirty.
225pub async fn is_dirty<R: RepairReader>(reader: &R) -> bool {
226    read_dirty_marker(reader)
227        .await
228        .map(|m| m.dirty)
229        .unwrap_or(false)
230}
231
232// ── Check ────────────────────────────────────────────────────────────────────
233
234/// Compute the diff between canonical gotcha state and derived indexes.
235/// Does not write anything.
236pub async fn check_gotcha_indexes<R: RepairReader>(reader: &R) -> Result<RepairReport> {
237    // Phase 1: derive desired state from canonical gotcha records
238    let (desired_file_links, desired_edges, scanned_gotchas) = derive_desired_state(reader).await?;
239
240    // Phase 2: diff against actual state
241    let (actual_file_links, scanned_files) = read_actual_file_links(reader).await?;
242    let actual_edges = read_actual_edges(reader).await?;
243
244    let (missing_file_links, stale_file_links) =
245        diff_file_links(&desired_file_links, &actual_file_links);
246    let (missing_edges, stale_edges) = diff_edges(&desired_edges, &actual_edges);
247
248    Ok(RepairReport {
249        scanned_gotchas,
250        scanned_files,
251        missing_file_links,
252        stale_file_links,
253        missing_edges,
254        stale_edges,
255        repaired_count: 0,
256        verification_passed: true, // check-only: no repair to verify
257        dirty_marker_cleared: false,
258    })
259}
260
261// ── Repair ───────────────────────────────────────────────────────────────────
262
263/// Repair mode controls what gets fixed.
264#[derive(Debug, Clone, Copy, PartialEq, Eq)]
265pub enum RepairMode {
266    /// Full scan and reconcile.
267    Full,
268    /// Only drain queued dirty items (fast path).
269    Fast,
270}
271
272/// Reconcile derived indexes to match canonical gotcha state.
273pub async fn repair_gotcha_indexes(store: &Store, mode: RepairMode) -> Result<RepairReport> {
274    let now = now_secs();
275
276    // For fast mode, only repair keys from the dirty marker queue
277    if mode == RepairMode::Fast {
278        return repair_fast(store, now).await;
279    }
280
281    // Phase 1: derive desired state
282    let (desired_file_links, desired_edges, scanned_gotchas) = derive_desired_state(store).await?;
283
284    // Phase 2: diff
285    let (actual_file_links, scanned_files) = read_actual_file_links(store).await?;
286    let actual_edges = read_actual_edges(store).await?;
287
288    let (missing_file_links, stale_file_links) =
289        diff_file_links(&desired_file_links, &actual_file_links);
290    let (missing_edges, stale_edges) = diff_edges(&desired_edges, &actual_edges);
291
292    let total_drift =
293        missing_file_links.len() + stale_file_links.len() + missing_edges.len() + stale_edges.len();
294
295    if total_drift == 0 {
296        // Already clean — clear dirty marker if set
297        clear_dirty_marker(store, now).await;
298        return Ok(RepairReport {
299            scanned_gotchas,
300            scanned_files,
301            missing_file_links: vec![],
302            stale_file_links: vec![],
303            missing_edges: vec![],
304            stale_edges: vec![],
305            repaired_count: 0,
306            verification_passed: true,
307            dirty_marker_cleared: true,
308        });
309    }
310
311    // Phase 3: apply repairs
312
313    // 3a. Rebuild file-record gotcha_keys from desired state
314    let mut repaired = 0usize;
315    for (file_path, desired_keys) in &desired_file_links {
316        let file_key = format!("file:{file_path}");
317        if let Ok(Some(mut record)) = store.get(&file_key).await {
318            let current_keys = extract_gotcha_keys(&record);
319            let desired_sorted: Vec<&String> = desired_keys.iter().collect();
320            let current_sorted: Vec<&String> = current_keys.iter().collect();
321
322            if desired_sorted != current_sorted {
323                set_gotcha_keys(&mut record, desired_keys.iter().cloned().collect());
324                record.updated_at = now;
325                record.version.logical_clock += 1;
326                record.version.wall_clock = now;
327                if store.put(&file_key, &record).await.is_ok() {
328                    repaired += 1;
329                }
330            }
331        }
332    }
333
334    // Also clear gotcha_keys from files that should have none
335    let (actual_file_links_2, _) = read_actual_file_links(store).await?;
336    for (file_path, actual_keys) in &actual_file_links_2 {
337        if !desired_file_links.contains_key(file_path.as_str()) && !actual_keys.is_empty() {
338            let file_key = format!("file:{file_path}");
339            if let Ok(Some(mut record)) = store.get(&file_key).await {
340                set_gotcha_keys(&mut record, vec![]);
341                record.updated_at = now;
342                record.version.logical_clock += 1;
343                record.version.wall_clock = now;
344                if store.put(&file_key, &record).await.is_ok() {
345                    repaired += 1;
346                }
347            }
348        }
349    }
350
351    // 3b. Rebuild graph edges
352    let ts = now.to_le_bytes();
353    for entry in &missing_edges {
354        let file_key = format!("file:{}", entry.file_path);
355        let edge_key = Edge::new(&file_key, EdgeKind::HasGotcha, &entry.gotcha_key).to_key();
356        if store.put_raw(&edge_key, &ts).await.is_ok() {
357            repaired += 1;
358        }
359    }
360    for entry in &stale_edges {
361        let file_key = format!("file:{}", entry.file_path);
362        let edge_key = Edge::new(&file_key, EdgeKind::HasGotcha, &entry.gotcha_key).to_key();
363        if store.delete(&edge_key).await.is_ok() {
364            repaired += 1;
365        }
366    }
367
368    // Phase 4: verify by recomputing diff
369    let verify = check_gotcha_indexes(store).await?;
370    let verification_passed = !verify.has_drift();
371
372    if verification_passed {
373        clear_dirty_marker(store, now).await;
374    }
375
376    Ok(RepairReport {
377        scanned_gotchas,
378        scanned_files,
379        missing_file_links,
380        stale_file_links,
381        missing_edges,
382        stale_edges,
383        repaired_count: repaired,
384        verification_passed,
385        dirty_marker_cleared: verification_passed,
386    })
387}
388
389// ── Fast repair ──────────────────────────────────────────────────────────────
390
391async fn repair_fast(store: &Store, now: u64) -> Result<RepairReport> {
392    let marker = match read_dirty_marker(store).await {
393        Some(m) if m.dirty => m,
394        _ => {
395            return Ok(RepairReport {
396                scanned_gotchas: 0,
397                scanned_files: 0,
398                missing_file_links: vec![],
399                stale_file_links: vec![],
400                missing_edges: vec![],
401                stale_edges: vec![],
402                repaired_count: 0,
403                verification_passed: true,
404                dirty_marker_cleared: false,
405            });
406        }
407    };
408
409    let mut repaired = 0usize;
410    let ts = now.to_le_bytes();
411
412    for gotcha_key in &marker.affected_keys {
413        // Read canonical state for this gotcha
414        let desired_files: Vec<String> = match store.get(gotcha_key).await? {
415            Some(record) if matches!(record.lifecycle, RecordLifecycle::Active) => record
416                .payload_as::<GotchaRecord>()
417                .map(|g| g.affected_files)
418                .unwrap_or_default(),
419            // Tombstoned or missing — desired state is empty
420            _ => vec![],
421        };
422
423        // Repair file links
424        for file_path in &desired_files {
425            let file_key = format!("file:{file_path}");
426            if let Ok(Some(mut record)) = store.get(&file_key).await {
427                let keys = extract_gotcha_keys(&record);
428                if !keys.contains(gotcha_key) {
429                    let mut new_keys = keys;
430                    new_keys.push(gotcha_key.clone());
431                    set_gotcha_keys(&mut record, new_keys);
432                    record.updated_at = now;
433                    record.version.logical_clock += 1;
434                    record.version.wall_clock = now;
435                    if store.put(&file_key, &record).await.is_ok() {
436                        repaired += 1;
437                    }
438                }
439            }
440
441            // Repair edge
442            let file_key = format!("file:{file_path}");
443            let edge_key = Edge::new(&file_key, EdgeKind::HasGotcha, gotcha_key.as_str()).to_key();
444            if store.put_raw(&edge_key, &ts).await.is_ok() {
445                repaired += 1;
446            }
447        }
448
449        // Remove stale links from files that reference this gotcha but are NOT
450        // in the current desired_files. This handles both:
451        // - tombstoned/missing gotchas (desired_files is empty → all refs removed)
452        // - moved gotchas (e.g. affected_files changed from [A,B] to [B,C] → A cleaned)
453        //
454        // Previously, this scan only ran for the tombstoned case, leaving stale
455        // links behind when a gotcha's affected_files changed.
456        {
457            let desired_set: std::collections::HashSet<&str> =
458                desired_files.iter().map(String::as_str).collect();
459            let files = store.scan_prefix("file:").await?;
460            for mut file_record in files {
461                let file_path = file_record
462                    .key
463                    .strip_prefix("file:")
464                    .unwrap_or(&file_record.key);
465                // Skip files that are correctly in desired_files
466                if desired_set.contains(file_path) {
467                    continue;
468                }
469                let keys = extract_gotcha_keys(&file_record);
470                if keys.contains(gotcha_key) {
471                    let new_keys: Vec<String> =
472                        keys.into_iter().filter(|k| k != gotcha_key).collect();
473                    set_gotcha_keys(&mut file_record, new_keys);
474                    file_record.updated_at = now;
475                    file_record.version.logical_clock += 1;
476                    file_record.version.wall_clock = now;
477                    if store.put(&file_record.key, &file_record).await.is_ok() {
478                        repaired += 1;
479                    }
480                }
481                // Also remove stale HasGotcha edge
482                let edge_key =
483                    Edge::new(&file_record.key, EdgeKind::HasGotcha, gotcha_key.as_str()).to_key();
484                let _ = store.delete(&edge_key).await;
485            }
486        }
487    }
488
489    if repaired > 0 {
490        clear_dirty_marker(store, now).await;
491    }
492
493    Ok(RepairReport {
494        scanned_gotchas: marker.affected_keys.len(),
495        scanned_files: 0,
496        missing_file_links: vec![],
497        stale_file_links: vec![],
498        missing_edges: vec![],
499        stale_edges: vec![],
500        repaired_count: repaired,
501        verification_passed: true,
502        dirty_marker_cleared: repaired > 0,
503    })
504}
505
506// ── Internal helpers ─────────────────────────────────────────────────────────
507
508/// Phase 1: build desired state from canonical gotcha records.
509async fn derive_desired_state<R: RepairReader>(
510    reader: &R,
511) -> Result<(
512    HashMap<String, BTreeSet<String>>,
513    BTreeSet<(String, String)>,
514    usize,
515)> {
516    let gotchas = reader.scan_prefix("gotcha:").await?;
517    let scanned = gotchas.len();
518
519    let mut desired_file_links: HashMap<String, BTreeSet<String>> = HashMap::new();
520    let mut desired_edges: BTreeSet<(String, String)> = BTreeSet::new();
521
522    for record in &gotchas {
523        if !matches!(record.lifecycle, RecordLifecycle::Active) {
524            continue;
525        }
526        let Some(gotcha) = record.payload_as::<GotchaRecord>() else {
527            continue;
528        };
529
530        for file_path in &gotcha.affected_files {
531            desired_file_links
532                .entry(file_path.clone())
533                .or_default()
534                .insert(record.key.clone());
535            desired_edges.insert((file_path.clone(), record.key.clone()));
536        }
537    }
538
539    Ok((desired_file_links, desired_edges, scanned))
540}
541
542/// Read actual gotcha_keys from all file records.
543async fn read_actual_file_links<R: RepairReader>(
544    reader: &R,
545) -> Result<(HashMap<String, Vec<String>>, usize)> {
546    let files = reader.scan_prefix("file:").await?;
547    let count = files.len();
548    let mut actual: HashMap<String, Vec<String>> = HashMap::new();
549
550    for record in &files {
551        let path = record
552            .key
553            .strip_prefix("file:")
554            .unwrap_or(&record.key)
555            .to_string();
556        let keys = extract_gotcha_keys(record);
557        if !keys.is_empty() {
558            actual.insert(path, keys);
559        }
560    }
561
562    Ok((actual, count))
563}
564
565/// Read actual HasGotcha edges from the graph edge store.
566async fn read_actual_edges<R: RepairReader>(reader: &R) -> Result<BTreeSet<(String, String)>> {
567    let edge_keys = reader.scan_keys("graph:edge:").await?;
568    let mut actual = BTreeSet::new();
569
570    for key in &edge_keys {
571        if let Some(edge) = Edge::from_key(key) {
572            if edge.kind == EdgeKind::HasGotcha {
573                let file_path = edge
574                    .from
575                    .strip_prefix("file:")
576                    .unwrap_or(&edge.from)
577                    .to_string();
578                actual.insert((file_path, edge.to));
579            }
580        }
581    }
582
583    Ok(actual)
584}
585
586/// Diff file links: compare desired vs actual.
587fn diff_file_links(
588    desired: &HashMap<String, BTreeSet<String>>,
589    actual: &HashMap<String, Vec<String>>,
590) -> (Vec<DriftEntry>, Vec<DriftEntry>) {
591    let mut missing = Vec::new();
592    let mut stale = Vec::new();
593
594    // Find missing links (in desired but not in actual)
595    for (file_path, desired_keys) in desired {
596        let actual_keys: BTreeSet<String> = actual
597            .get(file_path)
598            .map(|v| v.iter().cloned().collect())
599            .unwrap_or_default();
600
601        for key in desired_keys {
602            if !actual_keys.contains(key) {
603                missing.push(DriftEntry {
604                    gotcha_key: key.clone(),
605                    file_path: file_path.clone(),
606                });
607            }
608        }
609    }
610
611    // Find stale links (in actual but not in desired)
612    for (file_path, actual_keys) in actual {
613        let desired_keys = desired.get(file_path);
614        for key in actual_keys {
615            let is_desired = desired_keys.map(|d| d.contains(key)).unwrap_or(false);
616            if !is_desired {
617                stale.push(DriftEntry {
618                    gotcha_key: key.clone(),
619                    file_path: file_path.clone(),
620                });
621            }
622        }
623    }
624
625    (missing, stale)
626}
627
628/// Diff edges: compare desired vs actual.
629fn diff_edges(
630    desired: &BTreeSet<(String, String)>,
631    actual: &BTreeSet<(String, String)>,
632) -> (Vec<DriftEntry>, Vec<DriftEntry>) {
633    let missing: Vec<DriftEntry> = desired
634        .difference(actual)
635        .map(|(file_path, gotcha_key)| DriftEntry {
636            gotcha_key: gotcha_key.clone(),
637            file_path: file_path.clone(),
638        })
639        .collect();
640
641    let stale: Vec<DriftEntry> = actual
642        .difference(desired)
643        .map(|(file_path, gotcha_key)| DriftEntry {
644            gotcha_key: gotcha_key.clone(),
645            file_path: file_path.clone(),
646        })
647        .collect();
648
649    (missing, stale)
650}
651
652fn extract_gotcha_keys(record: &Record) -> Vec<String> {
653    record
654        .payload
655        .as_ref()
656        .and_then(|p| p.get("gotcha_keys"))
657        .and_then(|v| v.as_array())
658        .map(|arr| {
659            arr.iter()
660                .filter_map(|v| v.as_str().map(String::from))
661                .collect()
662        })
663        .unwrap_or_default()
664}
665
666fn set_gotcha_keys(record: &mut Record, keys: Vec<String>) {
667    if let Some(payload) = record.payload.as_mut() {
668        if let Some(obj) = payload.as_object_mut() {
669            obj.insert(
670                "gotcha_keys".into(),
671                serde_json::Value::Array(keys.into_iter().map(serde_json::Value::String).collect()),
672            );
673        }
674    }
675}
676
677/// Remove a single gotcha key from the dirty marker if it is the only key
678/// currently flagged.
679///
680/// Used by [`super::gotcha_ops`] as the "disarm" half of its cancellation
681/// guard: after a successful all-secondary-writes path, the caller pre-armed
682/// `mark_dirty(key)` upfront and now wants to release it. If another caller
683/// has flagged a different key concurrently (or a previous failure left a
684/// key behind), we leave the marker alone — `repair_fast` on the next boot
685/// will reconcile both. This is best-effort and never blocks the caller.
686pub async fn clear_dirty_key_if_solo(store: &Store, gotcha_key: &str) {
687    let Some(mut marker) = read_dirty_marker(store).await else {
688        return;
689    };
690    if !marker.dirty {
691        return;
692    }
693    // Only clear if our key is the *only* dirty one. If other keys are
694    // present, leaving the marker intact is the safe choice — repair will
695    // reconcile our successfully-written derived state as a no-op.
696    let only_ours = marker.affected_keys.len() == 1 && marker.affected_keys[0] == gotcha_key;
697    if !only_ours {
698        return;
699    }
700
701    let now = now_secs();
702    marker.dirty = false;
703    marker.affected_keys.clear();
704    marker.last_repaired_at = now;
705
706    let record = Record {
707        key: DIRTY_MARKER_KEY.to_string(),
708        value: String::new(),
709        payload: serde_json::to_value(&marker).ok(),
710        category: Category::Analytics,
711        priority: Priority::Normal,
712        tags: vec![],
713        created_at: now,
714        updated_at: now,
715        ref_url: None,
716        staleness: StalenessScore::fresh(),
717        lifecycle: RecordLifecycle::Active,
718        version: RecordVersion {
719            device_id: crate::store::stable_device_id(),
720            logical_clock: 1,
721            wall_clock: now,
722        },
723        quality: crate::store::record::QualityScore::layer0_default(),
724        access_count: 0,
725        last_accessed: 0,
726        source: RecordSource::StaticAnalysis,
727        confidence: crate::store::record::ConfidenceScore::for_new_record(
728            &RecordSource::StaticAnalysis,
729        ),
730        gap_analysis_score: 0.0,
731    };
732    let _ = store.put(DIRTY_MARKER_KEY, &record).await;
733}
734
735async fn clear_dirty_marker(store: &Store, now: u64) {
736    if let Some(mut marker) = read_dirty_marker(store).await {
737        marker.dirty = false;
738        marker.affected_keys.clear();
739        marker.last_repaired_at = now;
740
741        let record = Record {
742            key: DIRTY_MARKER_KEY.to_string(),
743            value: String::new(),
744            payload: serde_json::to_value(&marker).ok(),
745            category: Category::Analytics,
746            priority: Priority::Normal,
747            tags: vec![],
748            created_at: now,
749            updated_at: now,
750            ref_url: None,
751            staleness: StalenessScore::fresh(),
752            lifecycle: RecordLifecycle::Active,
753            version: RecordVersion {
754                device_id: crate::store::stable_device_id(),
755                logical_clock: 1,
756                wall_clock: now,
757            },
758            quality: crate::store::record::QualityScore::layer0_default(),
759            access_count: 0,
760            last_accessed: 0,
761            source: RecordSource::StaticAnalysis,
762            confidence: crate::store::record::ConfidenceScore::for_new_record(
763                &RecordSource::StaticAnalysis,
764            ),
765            gap_analysis_score: 0.0,
766        };
767        let _ = store.put(DIRTY_MARKER_KEY, &record).await;
768    }
769}
770
771// ── Tests ────────────────────────────────────────────────────────────────────
772
773#[cfg(test)]
774mod tests {
775    use super::*;
776    use crate::store::record::FileRecord;
777
778    fn make_gotcha(key: &str, files: &[&str]) -> Record {
779        let gotcha = GotchaRecord {
780            rule: "test".into(),
781            reason: "test".into(),
782            severity: Priority::High,
783            affected_files: files.iter().map(|s| s.to_string()).collect(),
784            ref_url: None,
785            discovered_session: 1_000_000,
786            confirmed: true,
787        };
788        Record {
789            key: key.to_string(),
790            value: "test".into(),
791            payload: serde_json::to_value(&gotcha).ok(),
792            category: Category::Gotcha,
793            priority: Priority::High,
794            tags: vec![],
795            created_at: 1_000_000,
796            updated_at: 1_000_000,
797            ref_url: None,
798            staleness: StalenessScore::fresh(),
799            lifecycle: RecordLifecycle::Active,
800            version: RecordVersion {
801                device_id: uuid::Uuid::new_v4(),
802                logical_clock: 1,
803                wall_clock: 1_000_000,
804            },
805            quality: crate::store::record::QualityScore::layer0_default(),
806            access_count: 0,
807            last_accessed: 0,
808            source: RecordSource::DeveloperManual,
809            confidence: crate::store::record::ConfidenceScore::for_new_record(
810                &RecordSource::DeveloperManual,
811            ),
812            gap_analysis_score: 0.0,
813        }
814    }
815
816    fn make_file(path: &str, gotcha_keys: &[&str]) -> Record {
817        let file = FileRecord {
818            path: path.to_string(),
819            purpose: String::new(),
820            entry_points: vec![],
821            imports: vec![],
822            gotcha_keys: gotcha_keys.iter().map(|s| s.to_string()).collect(),
823            decision_keys: vec![],
824            todos: vec![],
825            unsafe_count: 0,
826            unwrap_count: 0,
827            change_frequency: 0,
828            last_author: None,
829            is_hotspot: false,
830            token_cost_estimate: 0,
831            last_modified_session: 0,
832            content_hash: None,
833            line_count: 0,
834            blast_radius: None,
835            propagated_staleness: None,
836        };
837        Record {
838            key: format!("file:{path}"),
839            value: String::new(),
840            payload: serde_json::to_value(&file).ok(),
841            category: Category::File,
842            priority: Priority::Normal,
843            tags: vec![],
844            created_at: 1_000_000,
845            updated_at: 1_000_000,
846            ref_url: None,
847            staleness: StalenessScore::fresh(),
848            lifecycle: RecordLifecycle::Active,
849            version: RecordVersion {
850                device_id: uuid::Uuid::new_v4(),
851                logical_clock: 1,
852                wall_clock: 1_000_000,
853            },
854            quality: crate::store::record::QualityScore::layer0_default(),
855            access_count: 0,
856            last_accessed: 0,
857            source: RecordSource::StaticAnalysis,
858            confidence: crate::store::record::ConfidenceScore::for_new_record(
859                &RecordSource::StaticAnalysis,
860            ),
861            gap_analysis_score: 0.0,
862        }
863    }
864
865    #[tokio::test]
866    async fn check_detects_no_drift_when_consistent() {
867        let dir = tempfile::TempDir::new().unwrap();
868        let store = Store::open(dir.path()).await.unwrap();
869
870        store
871            .put("gotcha:g1", &make_gotcha("gotcha:g1", &["src/a.rs"]))
872            .await
873            .unwrap();
874        store
875            .put("file:src/a.rs", &make_file("src/a.rs", &["gotcha:g1"]))
876            .await
877            .unwrap();
878
879        let edge = Edge::new("file:src/a.rs", EdgeKind::HasGotcha, "gotcha:g1");
880        store
881            .put_raw(&edge.to_key(), &now_secs().to_le_bytes())
882            .await
883            .unwrap();
884
885        let report = check_gotcha_indexes(&store).await.unwrap();
886        assert!(!report.has_drift());
887        assert_eq!(report.scanned_gotchas, 1);
888        assert_eq!(report.scanned_files, 1);
889
890        store.close().await.unwrap();
891    }
892
893    #[tokio::test]
894    async fn check_detects_missing_file_link() {
895        let dir = tempfile::TempDir::new().unwrap();
896        let store = Store::open(dir.path()).await.unwrap();
897
898        store
899            .put("gotcha:g1", &make_gotcha("gotcha:g1", &["src/a.rs"]))
900            .await
901            .unwrap();
902        // File exists but has no gotcha_keys
903        store
904            .put("file:src/a.rs", &make_file("src/a.rs", &[]))
905            .await
906            .unwrap();
907
908        let report = check_gotcha_indexes(&store).await.unwrap();
909        assert!(report.has_drift());
910        assert_eq!(report.missing_file_links.len(), 1);
911        assert_eq!(report.missing_file_links[0].gotcha_key, "gotcha:g1");
912        assert_eq!(report.missing_file_links[0].file_path, "src/a.rs");
913
914        store.close().await.unwrap();
915    }
916
917    #[tokio::test]
918    async fn check_detects_stale_file_link() {
919        let dir = tempfile::TempDir::new().unwrap();
920        let store = Store::open(dir.path()).await.unwrap();
921
922        // No active gotcha, but file still references one
923        store
924            .put("file:src/a.rs", &make_file("src/a.rs", &["gotcha:deleted"]))
925            .await
926            .unwrap();
927
928        let report = check_gotcha_indexes(&store).await.unwrap();
929        assert!(report.has_drift());
930        assert_eq!(report.stale_file_links.len(), 1);
931        assert_eq!(report.stale_file_links[0].gotcha_key, "gotcha:deleted");
932
933        store.close().await.unwrap();
934    }
935
936    #[tokio::test]
937    async fn repair_fixes_missing_links_and_verifies() {
938        let dir = tempfile::TempDir::new().unwrap();
939        let store = Store::open(dir.path()).await.unwrap();
940
941        store
942            .put(
943                "gotcha:g1",
944                &make_gotcha("gotcha:g1", &["src/a.rs", "src/b.rs"]),
945            )
946            .await
947            .unwrap();
948        store
949            .put("file:src/a.rs", &make_file("src/a.rs", &[]))
950            .await
951            .unwrap();
952        store
953            .put("file:src/b.rs", &make_file("src/b.rs", &[]))
954            .await
955            .unwrap();
956
957        let report = repair_gotcha_indexes(&store, RepairMode::Full)
958            .await
959            .unwrap();
960        assert!(report.verification_passed);
961        assert!(report.repaired_count > 0);
962        assert!(report.dirty_marker_cleared);
963
964        // Verify file records now have the right keys
965        let a = store.get("file:src/a.rs").await.unwrap().unwrap();
966        let b = store.get("file:src/b.rs").await.unwrap().unwrap();
967        assert!(extract_gotcha_keys(&a).contains(&"gotcha:g1".to_string()));
968        assert!(extract_gotcha_keys(&b).contains(&"gotcha:g1".to_string()));
969
970        // Verify edges exist
971        let edges = store.scan_keys("graph:edge:").await.unwrap();
972        let edge_a = Edge::new("file:src/a.rs", EdgeKind::HasGotcha, "gotcha:g1").to_key();
973        let edge_b = Edge::new("file:src/b.rs", EdgeKind::HasGotcha, "gotcha:g1").to_key();
974        assert!(edges.contains(&edge_a));
975        assert!(edges.contains(&edge_b));
976
977        store.close().await.unwrap();
978    }
979
980    #[tokio::test]
981    async fn repair_removes_stale_links() {
982        let dir = tempfile::TempDir::new().unwrap();
983        let store = Store::open(dir.path()).await.unwrap();
984
985        // File references a gotcha that doesn't exist
986        store
987            .put("file:src/a.rs", &make_file("src/a.rs", &["gotcha:ghost"]))
988            .await
989            .unwrap();
990
991        let report = repair_gotcha_indexes(&store, RepairMode::Full)
992            .await
993            .unwrap();
994        assert!(report.verification_passed);
995
996        let a = store.get("file:src/a.rs").await.unwrap().unwrap();
997        assert!(extract_gotcha_keys(&a).is_empty());
998
999        store.close().await.unwrap();
1000    }
1001
1002    #[tokio::test]
1003    async fn dirty_marker_lifecycle() {
1004        let dir = tempfile::TempDir::new().unwrap();
1005        let store = Store::open(dir.path()).await.unwrap();
1006
1007        assert!(!is_dirty(&store).await);
1008
1009        mark_dirty(&store, "gotcha:test", "link sync failed").await;
1010        assert!(is_dirty(&store).await);
1011
1012        let marker = read_dirty_marker(&store).await.unwrap();
1013        assert!(marker.dirty);
1014        assert_eq!(marker.affected_keys, vec!["gotcha:test"]);
1015
1016        clear_dirty_marker(&store, now_secs()).await;
1017        assert!(!is_dirty(&store).await);
1018
1019        store.close().await.unwrap();
1020    }
1021
1022    /// Simulates a partial-write failure and verifies the full recovery contract:
1023    /// 1. Canonical gotcha record persists
1024    /// 2. File links are missing (secondary write "failed")
1025    /// 3. Dirty marker is set
1026    /// 4. Repair restores derived state from canonical truth
1027    /// 5. Dirty marker is cleared after verified repair
1028    #[tokio::test]
1029    async fn partial_failure_recovery_contract() {
1030        let dir = tempfile::TempDir::new().unwrap();
1031        let store = Store::open(dir.path()).await.unwrap();
1032
1033        // Seed file records
1034        store
1035            .put("file:src/a.rs", &make_file("src/a.rs", &[]))
1036            .await
1037            .unwrap();
1038        store
1039            .put("file:src/b.rs", &make_file("src/b.rs", &[]))
1040            .await
1041            .unwrap();
1042
1043        // Simulate step 2 succeeding: write the canonical gotcha record directly
1044        let gotcha = make_gotcha("gotcha:partial", &["src/a.rs", "src/b.rs"]);
1045        store.put("gotcha:partial", &gotcha).await.unwrap();
1046
1047        // Simulate step 3 failing: do NOT write file links or edges
1048        // (this is what happens when sync_gotcha_file_links errors out)
1049
1050        // Simulate the failure handler: set dirty marker
1051        mark_dirty(&store, "gotcha:partial", "link sync failed").await;
1052
1053        // ── Verify partial-failure state ──────────────────────────────────
1054
1055        // Canonical record exists
1056        let canonical = store.get("gotcha:partial").await.unwrap();
1057        assert!(canonical.is_some(), "canonical gotcha record must persist");
1058
1059        // File links are missing
1060        let a = store.get("file:src/a.rs").await.unwrap().unwrap();
1061        let b = store.get("file:src/b.rs").await.unwrap().unwrap();
1062        assert!(
1063            extract_gotcha_keys(&a).is_empty(),
1064            "file link should be missing (secondary write failed)"
1065        );
1066        assert!(
1067            extract_gotcha_keys(&b).is_empty(),
1068            "file link should be missing (secondary write failed)"
1069        );
1070
1071        // Dirty marker is set
1072        assert!(is_dirty(&store).await, "dirty marker must be set");
1073        let marker = read_dirty_marker(&store).await.unwrap();
1074        assert!(marker.affected_keys.contains(&"gotcha:partial".to_string()));
1075
1076        // Check detects the drift
1077        let pre = check_gotcha_indexes(&store).await.unwrap();
1078        assert!(pre.has_drift());
1079        assert_eq!(pre.missing_file_links.len(), 2);
1080        assert_eq!(pre.missing_edges.len(), 2);
1081
1082        // ── Repair restores consistency ───────────────────────────────────
1083
1084        let report = repair_gotcha_indexes(&store, RepairMode::Full)
1085            .await
1086            .unwrap();
1087        assert!(report.repaired_count > 0, "repair should fix something");
1088        assert!(
1089            report.verification_passed,
1090            "post-repair verification must pass"
1091        );
1092        assert!(
1093            report.dirty_marker_cleared,
1094            "dirty marker must be cleared after verified repair"
1095        );
1096
1097        // File links now correct
1098        let a2 = store.get("file:src/a.rs").await.unwrap().unwrap();
1099        let b2 = store.get("file:src/b.rs").await.unwrap().unwrap();
1100        assert!(extract_gotcha_keys(&a2).contains(&"gotcha:partial".to_string()));
1101        assert!(extract_gotcha_keys(&b2).contains(&"gotcha:partial".to_string()));
1102
1103        // Edges now exist
1104        let edges = store.scan_keys("graph:edge:").await.unwrap();
1105        let edge_a = Edge::new("file:src/a.rs", EdgeKind::HasGotcha, "gotcha:partial").to_key();
1106        let edge_b = Edge::new("file:src/b.rs", EdgeKind::HasGotcha, "gotcha:partial").to_key();
1107        assert!(edges.contains(&edge_a));
1108        assert!(edges.contains(&edge_b));
1109
1110        // Dirty marker cleared
1111        assert!(!is_dirty(&store).await);
1112
1113        // Re-check confirms no drift remains
1114        let post = check_gotcha_indexes(&store).await.unwrap();
1115        assert!(!post.has_drift());
1116
1117        store.close().await.unwrap();
1118    }
1119
1120    /// Verifies that repair_fast removes stale file links when a gotcha's
1121    /// affected_files changed (e.g. from [A,B] to [B,C]). Previously,
1122    /// repair_fast only cleaned stale links for tombstoned/missing gotchas,
1123    /// leaving file A with a stale reference after a move.
1124    #[tokio::test]
1125    async fn fast_repair_removes_stale_links_on_move() {
1126        let dir = tempfile::TempDir::new().unwrap();
1127        let store = Store::open(dir.path()).await.unwrap();
1128
1129        // Seed file records for A, B, and C
1130        store
1131            .put("file:src/a.rs", &make_file("src/a.rs", &["gotcha:moved"]))
1132            .await
1133            .unwrap();
1134        store
1135            .put("file:src/b.rs", &make_file("src/b.rs", &["gotcha:moved"]))
1136            .await
1137            .unwrap();
1138        store
1139            .put("file:src/c.rs", &make_file("src/c.rs", &[]))
1140            .await
1141            .unwrap();
1142
1143        // Gotcha now targets [B, C] — A is stale
1144        store
1145            .put(
1146                "gotcha:moved",
1147                &make_gotcha("gotcha:moved", &["src/b.rs", "src/c.rs"]),
1148            )
1149            .await
1150            .unwrap();
1151
1152        // Also add a stale edge for A
1153        let stale_edge = Edge::new("file:src/a.rs", EdgeKind::HasGotcha, "gotcha:moved");
1154        store
1155            .put_raw(&stale_edge.to_key(), &now_secs().to_le_bytes())
1156            .await
1157            .unwrap();
1158
1159        // Mark dirty so repair_fast picks it up
1160        mark_dirty(&store, "gotcha:moved", "affected_files changed").await;
1161
1162        // Run fast repair
1163        let report = repair_fast(&store, now_secs()).await.unwrap();
1164        assert!(
1165            report.repaired_count > 0,
1166            "fast repair should fix something"
1167        );
1168        assert!(report.dirty_marker_cleared);
1169
1170        // A should no longer reference the gotcha
1171        let a = store.get("file:src/a.rs").await.unwrap().unwrap();
1172        assert!(
1173            !extract_gotcha_keys(&a).contains(&"gotcha:moved".to_string()),
1174            "stale link on file A should be removed"
1175        );
1176
1177        // B should still reference the gotcha
1178        let b = store.get("file:src/b.rs").await.unwrap().unwrap();
1179        assert!(extract_gotcha_keys(&b).contains(&"gotcha:moved".to_string()));
1180
1181        // C should now reference the gotcha
1182        let c = store.get("file:src/c.rs").await.unwrap().unwrap();
1183        assert!(extract_gotcha_keys(&c).contains(&"gotcha:moved".to_string()));
1184
1185        // Full check should confirm consistency
1186        let check = check_gotcha_indexes(&store).await.unwrap();
1187        assert!(
1188            !check.has_drift(),
1189            "no drift should remain after fast repair: missing_file_links={}, stale_file_links={}, missing_edges={}, stale_edges={}",
1190            check.missing_file_links.len(),
1191            check.stale_file_links.len(),
1192            check.missing_edges.len(),
1193            check.stale_edges.len(),
1194        );
1195
1196        store.close().await.unwrap();
1197    }
1198
1199    /// Fault-injection test for the `mati serve` boot-time auto-drain.
1200    ///
1201    /// Simulates an unclean shutdown that left real drift AND a dirty marker.
1202    /// On reopen, the same `is_dirty + repair_gotcha_indexes(Fast)` sequence
1203    /// that `mcp::server::serve()` runs must clear both. Locks down the
1204    /// contract for the boot-time recovery added alongside the panic hook
1205    /// and explicit shutdown flush.
1206    #[tokio::test]
1207    async fn auto_drain_on_reopen_clears_dirty_marker_and_drift() {
1208        let dir = tempfile::TempDir::new().unwrap();
1209
1210        // Session 1: introduce drift (gotcha now targets [B,C], but file A still
1211        // references it from before, file C has not yet been linked, plus a
1212        // stale edge to A). Mark dirty as if a partial-write recorded the
1213        // failure. Close to simulate the daemon process exiting.
1214        {
1215            let store = Store::open(dir.path()).await.unwrap();
1216            store
1217                .put("file:src/a.rs", &make_file("src/a.rs", &["gotcha:moved"]))
1218                .await
1219                .unwrap();
1220            store
1221                .put("file:src/b.rs", &make_file("src/b.rs", &["gotcha:moved"]))
1222                .await
1223                .unwrap();
1224            store
1225                .put("file:src/c.rs", &make_file("src/c.rs", &[]))
1226                .await
1227                .unwrap();
1228            store
1229                .put(
1230                    "gotcha:moved",
1231                    &make_gotcha("gotcha:moved", &["src/b.rs", "src/c.rs"]),
1232                )
1233                .await
1234                .unwrap();
1235            let stale_edge = Edge::new("file:src/a.rs", EdgeKind::HasGotcha, "gotcha:moved");
1236            store
1237                .put_raw(&stale_edge.to_key(), &now_secs().to_le_bytes())
1238                .await
1239                .unwrap();
1240            mark_dirty(&store, "gotcha:moved", "simulated partial-write").await;
1241
1242            // Sanity: pre-shutdown state really is broken.
1243            let pre = check_gotcha_indexes(&store).await.unwrap();
1244            assert!(pre.has_drift(), "drift must exist before shutdown");
1245            assert!(is_dirty(&store).await, "marker must be set before shutdown");
1246
1247            store.close().await.unwrap();
1248        }
1249
1250        // Session 2: reopen and run the exact sequence `serve()` runs at
1251        // startup. The dirty marker must survive the reopen (it's persisted
1252        // in the knowledge tree), and the Fast drain must clear both the
1253        // marker and the drift.
1254        {
1255            let store = Store::open(dir.path()).await.unwrap();
1256            assert!(
1257                is_dirty(&store).await,
1258                "dirty marker should survive reopen across sessions"
1259            );
1260
1261            let report = repair_gotcha_indexes(&store, RepairMode::Fast)
1262                .await
1263                .unwrap();
1264            assert!(report.repaired_count > 0, "Fast drain must apply repairs");
1265            assert!(
1266                report.dirty_marker_cleared,
1267                "Fast drain must clear the dirty marker on success"
1268            );
1269
1270            assert!(
1271                !is_dirty(&store).await,
1272                "auto-drain should leave no dirty marker behind"
1273            );
1274
1275            let post = check_gotcha_indexes(&store).await.unwrap();
1276            assert!(
1277                !post.has_drift(),
1278                "no drift after auto-drain: missing_file={}, stale_file={}, missing_edge={}, stale_edge={}",
1279                post.missing_file_links.len(),
1280                post.stale_file_links.len(),
1281                post.missing_edges.len(),
1282                post.stale_edges.len(),
1283            );
1284
1285            store.close().await.unwrap();
1286        }
1287    }
1288}