Skip to main content

innate_core/kb/
repair.rs

1use super::*;
2
3/// Outcome of [`KnowledgeBase::repair_traces`].
4#[derive(Debug, Default, Clone)]
5pub struct TraceRepairReport {
6    /// `selected`/`retrieved` usage events deleted (daemon recalls that never
7    /// injected their knowledge into a model context).
8    pub daemon_events_deleted: usize,
9    /// `open` episodic logs retired to `discarded` (daemon session traces +
10    /// no-answer recalls that would otherwise sit in the open/distill pool).
11    pub open_logs_retired: usize,
12    /// Total `selected_count` across non-spark chunks before the repair.
13    pub selected_before: i64,
14    /// Total `selected_count` across non-spark chunks after the repair.
15    pub selected_after: i64,
16}
17
18impl KnowledgeBase {
19    /// One-shot data repair for trace pollution that predates the strict
20    /// `selected` = "entered the model context" semantics (Priority 1).
21    ///
22    /// Before the fix, the daemon recalled on every session start, discarded the
23    /// knowledge, and kept only the `trace_id` — yet `recall()` had already
24    /// written per-chunk `selected`/`retrieved` events and an `open` episodic
25    /// log. That inflated `selected_count` (which feeds the curate archive
26    /// heuristic `selected_count >= N AND used_count = 0`) and stuffed the `open`
27    /// pool that drives trace-completion stats. Empty hook recalls did the same.
28    ///
29    /// This repair, in one transaction:
30    ///   1. deletes daemon-sourced `selected`/`retrieved` usage events;
31    ///   2. recomputes `selected_count` from the cleaned facts (curate's formula);
32    ///   3. retires orphaned `open` logs (daemon session traces + no-answer
33    ///      recalls whose snapshot selected nothing) to `discarded`/`known_none`.
34    ///
35    /// Idempotent: a second run deletes nothing and recomputes the same counts.
36    /// With `dry_run` the transaction is rolled back and the report reflects what
37    /// *would* change.
38    pub fn repair_traces(&self, dry_run: bool) -> Result<TraceRepairReport> {
39        let sum_selected = || -> Result<i64> {
40            Ok(self.storage.query_chunks_params(
41                "SELECT COALESCE(SUM(selected_count), 0) AS s FROM chunks WHERE origin != 'spark'",
42                rusqlite::params![],
43            )?[0]["s"]
44                .as_i64()
45                .unwrap_or(0))
46        };
47
48        self.storage.begin_immediate()?;
49        let outcome = (|| -> Result<TraceRepairReport> {
50            let selected_before = sum_selected()?;
51
52            // 1. Drop the false daemon selection facts.
53            let daemon_events_deleted = self.storage.conn_execute_count(
54                "DELETE FROM usage_trace
55                 WHERE source = 'daemon' AND event IN ('selected', 'retrieved')",
56                rusqlite::params![],
57            )?;
58
59            // 2. Recompute selected_count from the retained facts (mirrors the
60            //    curate aggregate: base + post-cutoff live selected events).
61            self.storage.conn_execute(
62                "UPDATE chunks SET
63                   selected_count = selected_count_base + COALESCE(
64                     (SELECT COUNT(*) FROM usage_trace
65                      WHERE chunk_id = chunks.id AND event = 'selected'
66                        AND ts > COALESCE(chunks.evidence_cutoff_at, '')), 0)
67                 WHERE origin != 'spark'",
68                rusqlite::params![],
69            )?;
70
71            // 3. Retire orphaned open logs: daemon session traces and no-answer
72            //    recalls (empty/absent selected snapshot) leave the open pool.
73            let open_logs_retired = self.storage.conn_execute_count(
74                "UPDATE episodic_log
75                 SET distill_state = 'discarded',
76                     usage_state = CASE WHEN usage_state = 'unknown'
77                                        THEN 'known_none' ELSE usage_state END
78                 WHERE distill_state = 'open'
79                   AND (event_source = 'daemon'
80                        OR recall_snapshot IS NULL
81                        -- No-answer recall: nothing was surfaced. Require BOTH an
82                        -- empty selected AND empty sparks list, otherwise a
83                        -- spark-only recall (visible empty but sparks shown) would
84                        -- be wrongly retired even though it surfaced knowledge.
85                        OR (recall_snapshot LIKE '%\"selected\":[]%'
86                            AND recall_snapshot LIKE '%\"sparks\":[]%'))",
87                rusqlite::params![],
88            )?;
89
90            let selected_after = sum_selected()?;
91            Ok(TraceRepairReport {
92                daemon_events_deleted,
93                open_logs_retired,
94                selected_before,
95                selected_after,
96            })
97        })();
98
99        match outcome {
100            Ok(report) => {
101                if dry_run {
102                    self.storage.rollback()?;
103                } else {
104                    self.storage.commit()?;
105                }
106                Ok(report)
107            }
108            Err(e) => {
109                let _ = self.storage.rollback();
110                Err(e)
111            }
112        }
113    }
114}