Skip to main content

innate_core/kb/
appraise.rs

1//! appraise — the critic contract.
2//!
3//! `recall()` is the **actor** side: "which knowledge should I load to act?". `appraise()` is
4//! the **critic** side: "do I have any footing on this candidate answer?". Both ride the *same*
5//! fused score (`w_content·sim_content + w_trigger·sim_trigger + w_confidence·conf +
6//! w_context·context_score + w_activation·activation`, with the pending/anti penalties); appraise
7//! does not introduce a
8//! second scoring path. It only *re-reads* that score as strength + valence and surfaces what to
9//! be careful about.
10//!
11//! Hard value-domain constraint (PRD §2.2 / §5, the lethal-trifecta defence): a [`Verdict`]
12//! carries **no answer text** — no `answer`, `fix`, `corrected_*`. `flagged_points` say "watch
13//! out for X", never "the answer is Y". The synchronous path is pure Rust math — **no LLM**.
14
15use serde::Serialize;
16use serde_json::{json, Value};
17
18use crate::errors::Result;
19use crate::storage::EpisodicLogRow;
20use super::actr_activation;
21use crate::utils::{gen_uuid, utc_now_iso, SanitizeAction};
22
23use super::{anti_trigger_hit, validate_source, KnowledgeBase, Situation, PENDING_RECALL_PENALTY};
24
25// ---------------------------------------------------------------------------
26// Public types — note the absence of any answer-bearing field (enforced by T0.2).
27// ---------------------------------------------------------------------------
28
29/// Polarity of an intuition. Derived, never stored as a column (PRD §3.4).
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
31#[serde(rename_all = "lowercase")]
32pub enum Valence {
33    /// Trigger-hit and positive calibration — "you have footing here".
34    Affirm,
35    /// Anti-trigger hit, failure-origin, or negative context history — "be careful here".
36    Caution,
37    /// Both affirm and caution signals fired.
38    Mixed,
39    /// Nothing resonated meaningfully — stay quiet.
40    Neutral,
41}
42
43/// Strength band, from the fused score against `meta.appraise.tier_*`.
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
45#[serde(rename_all = "lowercase")]
46pub enum Tier {
47    Weak,
48    Medium,
49    Strong,
50}
51
52/// A single thing to be careful about. Comes from a caution-class chunk's `trigger_desc` —
53/// "this kind of situation tends to bite", never a prescribed answer.
54#[derive(Debug, Clone, Serialize)]
55pub struct FlaggedPoint {
56    pub chunk_id: String,
57    /// What to watch for. Sourced from the chunk's existing `trigger_desc`. No answer text.
58    pub summary: String,
59    /// Resonance component (sim_content + sim_trigger, weighted).
60    pub resonance: f64,
61    /// Calibration component (confidence + context_score, weighted).
62    pub calibration: f64,
63    /// Single-chunk fused strength ∈ [0,1].
64    pub strength: f64,
65}
66
67/// One contributing chunk, for explainability.
68#[derive(Debug, Clone, Serialize)]
69pub struct Contributor {
70    pub chunk_id: String,
71    pub valence: Valence,
72    pub strength: f64,
73}
74
75/// The critic's judgement. **No answer-bearing field may ever be added here.**
76#[derive(Debug, Clone, Serialize)]
77pub struct Verdict {
78    pub valence: Valence,
79    /// Aggregate strength ∈ [0,1]; the max fused over contributors.
80    pub strength: f64,
81    pub tier: Tier,
82    pub flagged_points: Vec<FlaggedPoint>,
83    pub contributors: Vec<Contributor>,
84    /// Threads appraise → record so an override can flow back via `record(feedback='down')`.
85    pub trace_id: String,
86}
87
88/// Parameters for [`KnowledgeBase::appraise`].
89#[derive(Debug, Clone, Default)]
90pub struct AppraiseParams<'a> {
91    pub situation: Situation<'a>,
92    /// The candidate answer under judgement. Folded into the resonance embedding to sharpen the
93    /// match (still pure math) when `meta.appraise.candidate_in_embed` is true; always sanitized
94    /// first. Never echoed back in the Verdict.
95    pub candidate: Option<&'a str>,
96    /// Resonance prune floor; default `meta.appraise.min_strength`.
97    pub min_strength: Option<f64>,
98    /// Candidate cap; default `meta.appraise.top`.
99    pub top: Option<usize>,
100    /// Write a recall/episodic trace so a later `record` can flow back. Default true.
101    pub trace: bool,
102    /// Event source written to traces (mcp | sdk | cli | hook | daemon | augmented).
103    pub source: &'a str,
104}
105
106/// Per-candidate scored result with the resonance/calibration decomposition exposed
107/// for explainability. The aggregate uses `fused` — the same number recall ranks on.
108struct ScoredCandidate {
109    chunk_id: String,
110    trigger_desc: String,
111    fused: f64,
112    resonance: f64,
113    calibration: f64,
114    valence: Valence,
115}
116
117impl KnowledgeBase {
118    pub fn appraise(&self, params: AppraiseParams<'_>) -> Result<Verdict> {
119        let AppraiseParams {
120            situation,
121            candidate,
122            min_strength,
123            top,
124            trace,
125            source,
126        } = params;
127        let source = if source.is_empty() { "sdk" } else { source };
128        validate_source(source)?;
129        let min_strength = min_strength.unwrap_or(self.appraise_min_strength);
130        let top = top.unwrap_or(self.appraise_top);
131
132        let trace_id = gen_uuid();
133        let now = utc_now_iso();
134
135        // 1. Sanitize the resonance inputs before they touch the embedder (PRD §5). A Discard
136        //    verdict on either neutralizes that input rather than embedding hostile text.
137        let raw_embed = situation.embed_text();
138        let (embed_clean, embed_action) = self.sanitize_content(&raw_embed);
139        let mut embed_text = if matches!(embed_action, SanitizeAction::Discard) {
140            String::new()
141        } else {
142            embed_clean
143        };
144        // Lowercased text used for anti-trigger matching (situation + candidate).
145        let mut anti_match = embed_text.to_lowercase();
146        if self.appraise_candidate_in_embed {
147            if let Some(cand) = candidate.map(str::trim).filter(|c| !c.is_empty()) {
148                let (cand_clean, cand_action) = self.sanitize_content(cand);
149                if !matches!(cand_action, SanitizeAction::Discard) {
150                    embed_text.push_str("\n[candidate] ");
151                    embed_text.push_str(&cand_clean);
152                    anti_match.push('\n');
153                    anti_match.push_str(&cand_clean.to_lowercase());
154                }
155            }
156        }
157
158        // 2. Resonance embedding + candidate gathering (reuses the recall ANN path).
159        let (q_content, q_trigger) = self
160            .embedding
161            .embed_both(&embed_text)
162            .map_err(|e| crate::errors::InnateError::EmbeddingUnavailable(e.to_string()))?;
163        let mut candidates = self.ann_candidates(&q_content, &q_trigger)?;
164        self.apply_soft_dep_bonus(&mut candidates)?;
165
166        // 3. Calibration path: one context_key for read + the pre-written episodic_log (Spec §5).
167        let context_key = situation.context_key(&self.situation_coarse_keys);
168        let cand_ids: Vec<String> = candidates
169            .values()
170            .filter_map(|info| {
171                info.chunk
172                    .get("id")
173                    .and_then(Value::as_str)
174                    .map(str::to_string)
175            })
176            .collect();
177        let cand_refs: Vec<&str> = cand_ids.iter().map(String::as_str).collect();
178        let ctx_scores = self
179            .storage
180            .context_scores_batch(&cand_refs, &context_key)?;
181
182        // 4. Score every candidate with the *same* fused math as recall, but keep the
183        //    resonance / calibration split for explainability, and derive a valence.
184        let mut scored: Vec<ScoredCandidate> = Vec::with_capacity(candidates.len());
185        for info in candidates.into_values() {
186            let chunk = &info.chunk;
187            let chunk_id = chunk.get("id").and_then(Value::as_str).unwrap_or("");
188            let conf = chunk
189                .get("confidence")
190                .and_then(Value::as_f64)
191                .unwrap_or(0.5);
192            let context_score = ctx_scores.get(chunk_id).copied().unwrap_or(0.0);
193
194            let resonance =
195                self.w_content * info.sim_content as f64 + self.w_trigger * info.sim_trigger as f64;
196            // ACT-R activation (recency × frequency) — same usage-history signal recall fuses;
197            // grouped with calibration since it reflects accumulated use, not query resonance.
198            let used_count = chunk.get("used_count").and_then(Value::as_i64).unwrap_or(0);
199            let last_used_at = chunk.get("last_used_at").and_then(Value::as_str);
200            let activation = actr_activation(used_count, last_used_at, &now);
201            let calibration = self.w_confidence * conf
202                + self.w_context * context_score
203                + self.w_activation * activation;
204            let mut fused = resonance + calibration;
205            if chunk.get("state").and_then(Value::as_str) == Some("pending") {
206                fused *= PENDING_RECALL_PENALTY;
207            }
208            let anti = chunk
209                .get("anti_trigger_desc")
210                .and_then(Value::as_str)
211                .unwrap_or("");
212            let anti_hit = !anti.is_empty() && anti_trigger_hit(&anti_match, anti);
213            if anti_hit {
214                fused *= self.anti_trigger_penalty;
215            }
216
217            // Failure-origin proxy: the heuristic distiller writes "Avoid: …" content and an
218            // anti_trigger_desc for fail-outcome traces; either marks a caution chunk.
219            let content = chunk.get("content").and_then(Value::as_str).unwrap_or("");
220            let fail_origin = content.trim_start().starts_with("Avoid:") || !anti.is_empty();
221            let trigger_hit = info.sim_trigger as f64 >= self.appraise_trigger_hit_min;
222
223            let valence = if anti_hit || fail_origin || context_score < 0.0 {
224                Valence::Caution
225            } else if trigger_hit && calibration > 0.0 {
226                Valence::Affirm
227            } else {
228                Valence::Neutral
229            };
230
231            let trigger_desc = chunk
232                .get("trigger_desc")
233                .and_then(Value::as_str)
234                .filter(|s| !s.is_empty())
235                .map(str::to_string)
236                .unwrap_or_else(|| {
237                    content
238                        .lines()
239                        .next()
240                        .unwrap_or("")
241                        .chars()
242                        .take(120)
243                        .collect()
244                });
245
246            scored.push(ScoredCandidate {
247                chunk_id: chunk_id.to_string(),
248                trigger_desc,
249                fused: fused.clamp(0.0, 1.0),
250                resonance,
251                calibration,
252                valence,
253            });
254        }
255        scored.sort_by(|a, b| b.fused.partial_cmp(&a.fused).unwrap_or(std::cmp::Ordering::Equal));
256        // Resonance prune (Spec §3.1: min_strength is the resonance lower bound). Sub-threshold
257        // contributors are noise — they must not set strength/tier/valence, otherwise an
258        // unrelated situation reads as weak-caution and silence_rate becomes dishonest. The floor
259        // is the single gate for strength, tier, valence, contributors *and* flagged_points.
260        scored.retain(|s| s.fused >= min_strength);
261        scored.truncate(top);
262
263        // 5. Aggregate: strength = max fused over surviving contributors; valence by max-affirm
264        //    vs max-caution. flagged_points = the caution survivors.
265        let max_for = |v: Valence| -> f64 {
266            scored
267                .iter()
268                .filter(|s| s.valence == v)
269                .map(|s| s.fused)
270                .fold(0.0_f64, f64::max)
271        };
272        let s_affirm = max_for(Valence::Affirm);
273        let s_caution = max_for(Valence::Caution);
274        let strength = scored.iter().map(|s| s.fused).fold(0.0_f64, f64::max);
275
276        let valence = match (s_affirm > 0.0, s_caution > 0.0) {
277            (true, true) => Valence::Mixed,
278            (false, true) => Valence::Caution,
279            (true, false) => Valence::Affirm,
280            (false, false) => Valence::Neutral,
281        };
282        let tier = if strength >= self.appraise_tier_strong {
283            Tier::Strong
284        } else if strength >= self.appraise_tier_weak {
285            Tier::Medium
286        } else {
287            Tier::Weak
288        };
289
290        let flagged_points: Vec<FlaggedPoint> = scored
291            .iter()
292            .filter(|s| s.valence == Valence::Caution && s.fused >= min_strength)
293            .map(|s| FlaggedPoint {
294                chunk_id: s.chunk_id.clone(),
295                summary: s.trigger_desc.clone(),
296                resonance: s.resonance,
297                calibration: s.calibration,
298                strength: s.fused,
299            })
300            .collect();
301        let contributors: Vec<Contributor> = scored
302            .iter()
303            .map(|s| Contributor {
304                chunk_id: s.chunk_id.clone(),
305                valence: s.valence,
306                strength: s.fused,
307            })
308            .collect();
309
310        let verdict = Verdict {
311            valence,
312            strength,
313            tier,
314            flagged_points,
315            contributors,
316            trace_id: trace_id.clone(),
317        };
318
319        // 6. Trace — same shape/timing as recall so a later record(trace_id, …) UPDATEs the
320        //    same episodic_log row and flows the override back through confidence_evidence.
321        if trace {
322            self.write_appraise_trace(&trace_id, &context_key, &raw_embed, &scored, &verdict, source, &now)?;
323        }
324
325        Ok(verdict)
326    }
327
328    #[allow(clippy::too_many_arguments)]
329    fn write_appraise_trace(
330        &self,
331        trace_id: &str,
332        context_key: &str,
333        situation_text: &str,
334        scored: &[ScoredCandidate],
335        verdict: &Verdict,
336        source: &str,
337        now: &str,
338    ) -> Result<()> {
339        let lib_id = self.storage.lib_id()?;
340        self.storage.begin_immediate()?;
341        let result = (|| -> Result<()> {
342            for (rank, s) in scored.iter().enumerate() {
343                let sim = Some(s.fused);
344                self.storage.insert_usage_trace(
345                    trace_id,
346                    Some(&s.chunk_id),
347                    "retrieved",
348                    1.0,
349                    sim,
350                    Some("appraise"),
351                    None,
352                    Some((rank + 1) as i64),
353                    None,
354                    source,
355                    now,
356                )?;
357                // Mark contributors 'selected' too: the critic leaned on them, so they must be
358                // attributable for `record(feedback=…)` to flow an override back (Spec §5).
359                self.storage.insert_usage_trace(
360                    trace_id,
361                    Some(&s.chunk_id),
362                    "selected",
363                    1.0,
364                    sim,
365                    Some("appraise"),
366                    None,
367                    Some((rank + 1) as i64),
368                    None,
369                    source,
370                    now,
371                )?;
372            }
373            // The verdict is persisted in recall_snapshot (free-form TEXT, no schema change) so the
374            // honesty metrics in inspect() can bucket by tier/valence and join the later outcome.
375            let contributor_ids: Vec<&String> = scored.iter().map(|s| &s.chunk_id).collect();
376            let snapshot = json!({
377                "appraise": {
378                    "valence": verdict.valence,
379                    "tier": verdict.tier,
380                    "strength": verdict.strength,
381                    "flagged": verdict.flagged_points.iter().map(|f| &f.chunk_id).collect::<Vec<_>>(),
382                },
383                "retrieved": contributor_ids,
384                "selected": contributor_ids,
385            });
386            let log = EpisodicLogRow {
387                id: gen_uuid(),
388                trace_id: trace_id.to_string(),
389                lib_id,
390                ts: now.to_string(),
391                query: Some(situation_text.chars().take(500).collect()),
392                recall_snapshot: Some(snapshot.to_string()),
393                event_source: source.to_string(),
394                task_state: "recalled".to_string(),
395                usage_state: "unknown".to_string(),
396                context_key: Some(context_key.to_string()),
397                distill_state: "open".to_string(),
398                ..Default::default()
399            };
400            self.storage.upsert_episodic_log(&log)?;
401            self.storage.commit()
402        })();
403        if result.is_err() {
404            let _ = self.storage.rollback();
405        }
406        result
407    }
408}