innate_core/kb/repair.rs
1use super::*;
2
3/// Outcome of [`KnowledgeBase::repair_traces`].
4#[derive(Debug, Default, Clone)]
5pub struct TraceRepairReport {
6 /// `selected`/`retrieved` usage events deleted (daemon recalls that never
7 /// injected their knowledge into a model context).
8 pub daemon_events_deleted: usize,
9 /// `open` episodic logs retired to `discarded` (daemon session traces +
10 /// no-answer recalls that would otherwise sit in the open/distill pool).
11 pub open_logs_retired: usize,
12 /// Total `selected_count` across non-spark chunks before the repair.
13 pub selected_before: i64,
14 /// Total `selected_count` across non-spark chunks after the repair.
15 pub selected_after: i64,
16}
17
18impl KnowledgeBase {
19 /// One-shot data repair for trace pollution that predates the strict
20 /// `selected` = "entered the model context" semantics (Priority 1).
21 ///
22 /// Before the fix, the daemon recalled on every session start, discarded the
23 /// knowledge, and kept only the `trace_id` — yet `recall()` had already
24 /// written per-chunk `selected`/`retrieved` events and an `open` episodic
25 /// log. That inflated `selected_count` (which feeds the curate archive
26 /// heuristic `selected_count >= N AND used_count = 0`) and stuffed the `open`
27 /// pool that drives trace-completion stats. Empty hook recalls did the same.
28 ///
29 /// This repair, in one transaction:
30 /// 1. deletes daemon-sourced `selected`/`retrieved` usage events;
31 /// 2. recomputes `selected_count` from the cleaned facts (curate's formula);
32 /// 3. retires orphaned `open` logs (daemon session traces + no-answer
33 /// recalls whose snapshot selected nothing) to `discarded`/`known_none`.
34 ///
35 /// Idempotent: a second run deletes nothing and recomputes the same counts.
36 /// With `dry_run` the transaction is rolled back and the report reflects what
37 /// *would* change.
38 pub fn repair_traces(&self, dry_run: bool) -> Result<TraceRepairReport> {
39 let sum_selected = || -> Result<i64> {
40 Ok(self.storage.query_chunks_params(
41 "SELECT COALESCE(SUM(selected_count), 0) AS s FROM chunks WHERE origin != 'spark'",
42 rusqlite::params![],
43 )?[0]["s"]
44 .as_i64()
45 .unwrap_or(0))
46 };
47
48 self.storage.begin_immediate()?;
49 let outcome = (|| -> Result<TraceRepairReport> {
50 let selected_before = sum_selected()?;
51
52 // 1. Drop the false daemon selection facts.
53 let daemon_events_deleted = self.storage.conn_execute_count(
54 "DELETE FROM usage_trace
55 WHERE source = 'daemon' AND event IN ('selected', 'retrieved')",
56 rusqlite::params![],
57 )?;
58
59 // 2. Recompute selected_count from the retained facts (mirrors the
60 // curate aggregate: base + post-cutoff live selected events).
61 self.storage.conn_execute(
62 "UPDATE chunks SET
63 selected_count = selected_count_base + COALESCE(
64 (SELECT COUNT(*) FROM usage_trace
65 WHERE chunk_id = chunks.id AND event = 'selected'
66 AND ts > COALESCE(chunks.evidence_cutoff_at, '')), 0)
67 WHERE origin != 'spark'",
68 rusqlite::params![],
69 )?;
70
71 // 3. Retire orphaned open logs: daemon session traces and no-answer
72 // recalls (empty/absent selected snapshot) leave the open pool.
73 let open_logs_retired = self.storage.conn_execute_count(
74 "UPDATE episodic_log
75 SET distill_state = 'discarded',
76 usage_state = CASE WHEN usage_state = 'unknown'
77 THEN 'known_none' ELSE usage_state END
78 WHERE distill_state = 'open'
79 AND (event_source = 'daemon'
80 OR recall_snapshot IS NULL
81 -- No-answer recall: nothing was surfaced. Require BOTH an
82 -- empty selected AND empty sparks list, otherwise a
83 -- spark-only recall (visible empty but sparks shown) would
84 -- be wrongly retired even though it surfaced knowledge.
85 OR (recall_snapshot LIKE '%\"selected\":[]%'
86 AND recall_snapshot LIKE '%\"sparks\":[]%'))",
87 rusqlite::params![],
88 )?;
89
90 let selected_after = sum_selected()?;
91 Ok(TraceRepairReport {
92 daemon_events_deleted,
93 open_logs_retired,
94 selected_before,
95 selected_after,
96 })
97 })();
98
99 match outcome {
100 Ok(report) => {
101 if dry_run {
102 self.storage.rollback()?;
103 } else {
104 self.storage.commit()?;
105 }
106 Ok(report)
107 }
108 Err(e) => {
109 let _ = self.storage.rollback();
110 Err(e)
111 }
112 }
113 }
114}