innate_core/kb/
appraise.rs

1//! appraise — the critic contract.
2//!
3//! `recall()` is the **actor** side: "which knowledge should I load to act?". `appraise()` is
4//! the **critic** side: "do I have any footing on this candidate answer?". Both ride the *same*
5//! fused score (`w_content·sim_content + w_trigger·sim_trigger + w_confidence·conf +
6//! w_context·context_score`, with the pending/anti penalties); appraise does not introduce a
7//! second scoring path. It only *re-reads* that score as strength + valence and surfaces what to
8//! be careful about.
9//!
10//! Hard value-domain constraint (PRD §2.2 / §5, the lethal-trifecta defence): a [`Verdict`]
11//! carries **no answer text** — no `answer`, `fix`, `corrected_*`. `flagged_points` say "watch
12//! out for X", never "the answer is Y". The synchronous path is pure Rust math — **no LLM**.
13
14use serde::Serialize;
15use serde_json::{json, Value};
16
17use crate::errors::Result;
18use crate::storage::EpisodicLogRow;
19use crate::utils::{gen_uuid, utc_now_iso, SanitizeAction};
20
21use super::{anti_trigger_hit, validate_source, KnowledgeBase, Situation, PENDING_RECALL_PENALTY};
22
23// ---------------------------------------------------------------------------
24// Public types — note the absence of any answer-bearing field (enforced by T0.2).
25// ---------------------------------------------------------------------------
26
27/// Polarity of an intuition. Derived, never stored as a column (PRD §3.4).
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
29#[serde(rename_all = "lowercase")]
30pub enum Valence {
31    /// Trigger-hit and positive calibration — "you have footing here".
32    Affirm,
33    /// Anti-trigger hit, failure-origin, or negative context history — "be careful here".
34    Caution,
35    /// Both affirm and caution signals fired.
36    Mixed,
37    /// Nothing resonated meaningfully — stay quiet.
38    Neutral,
39}
40
41/// Strength band, from the fused score against `meta.appraise.tier_*`.
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
43#[serde(rename_all = "lowercase")]
44pub enum Tier {
45    Weak,
46    Medium,
47    Strong,
48}
49
50/// A single thing to be careful about. Comes from a caution-class chunk's `trigger_desc` —
51/// "this kind of situation tends to bite", never a prescribed answer.
52#[derive(Debug, Clone, Serialize)]
53pub struct FlaggedPoint {
54    pub chunk_id: String,
55    /// What to watch for. Sourced from the chunk's existing `trigger_desc`. No answer text.
56    pub summary: String,
57    /// Resonance component (sim_content + sim_trigger, weighted).
58    pub resonance: f64,
59    /// Calibration component (confidence + context_score, weighted).
60    pub calibration: f64,
61    /// Single-chunk fused strength ∈ [0,1].
62    pub strength: f64,
63}
64
65/// One contributing chunk, for explainability.
66#[derive(Debug, Clone, Serialize)]
67pub struct Contributor {
68    pub chunk_id: String,
69    pub valence: Valence,
70    pub strength: f64,
71}
72
73/// The critic's judgement. **No answer-bearing field may ever be added here.**
74#[derive(Debug, Clone, Serialize)]
75pub struct Verdict {
76    pub valence: Valence,
77    /// Aggregate strength ∈ [0,1]; the max fused over contributors.
78    pub strength: f64,
79    pub tier: Tier,
80    pub flagged_points: Vec<FlaggedPoint>,
81    pub contributors: Vec<Contributor>,
82    /// Threads appraise → record so an override can flow back via `record(feedback='down')`.
83    pub trace_id: String,
84}
85
86/// Parameters for [`KnowledgeBase::appraise`].
87#[derive(Debug, Clone, Default)]
88pub struct AppraiseParams<'a> {
89    pub situation: Situation<'a>,
90    /// The candidate answer under judgement. Folded into the resonance embedding to sharpen the
91    /// match (still pure math) when `meta.appraise.candidate_in_embed` is true; always sanitized
92    /// first. Never echoed back in the Verdict.
93    pub candidate: Option<&'a str>,
94    /// Resonance prune floor; default `meta.appraise.min_strength`.
95    pub min_strength: Option<f64>,
96    /// Candidate cap; default `meta.appraise.top`.
97    pub top: Option<usize>,
98    /// Write a recall/episodic trace so a later `record` can flow back. Default true.
99    pub trace: bool,
100    /// Event source written to traces (mcp | sdk | cli | hook | daemon | augmented).
101    pub source: &'a str,
102}
103
104/// Per-candidate scored result with the resonance/calibration decomposition exposed
105/// for explainability. The aggregate uses `fused` — the same number recall ranks on.
106struct ScoredCandidate {
107    chunk_id: String,
108    trigger_desc: String,
109    fused: f64,
110    resonance: f64,
111    calibration: f64,
112    valence: Valence,
113}
114
115impl KnowledgeBase {
116    pub fn appraise(&self, params: AppraiseParams<'_>) -> Result<Verdict> {
117        let AppraiseParams {
118            situation,
119            candidate,
120            min_strength,
121            top,
122            trace,
123            source,
124        } = params;
125        let source = if source.is_empty() { "sdk" } else { source };
126        validate_source(source)?;
127        let min_strength = min_strength.unwrap_or(self.appraise_min_strength);
128        let top = top.unwrap_or(self.appraise_top);
129
130        let trace_id = gen_uuid();
131        let now = utc_now_iso();
132
133        // 1. Sanitize the resonance inputs before they touch the embedder (PRD §5). A Discard
134        //    verdict on either neutralizes that input rather than embedding hostile text.
135        let raw_embed = situation.embed_text();
136        let (embed_clean, embed_action) = self.sanitize_content(&raw_embed);
137        let mut embed_text = if matches!(embed_action, SanitizeAction::Discard) {
138            String::new()
139        } else {
140            embed_clean
141        };
142        // Lowercased text used for anti-trigger matching (situation + candidate).
143        let mut anti_match = embed_text.to_lowercase();
144        if self.appraise_candidate_in_embed {
145            if let Some(cand) = candidate.map(str::trim).filter(|c| !c.is_empty()) {
146                let (cand_clean, cand_action) = self.sanitize_content(cand);
147                if !matches!(cand_action, SanitizeAction::Discard) {
148                    embed_text.push_str("\n[candidate] ");
149                    embed_text.push_str(&cand_clean);
150                    anti_match.push('\n');
151                    anti_match.push_str(&cand_clean.to_lowercase());
152                }
153            }
154        }
155
156        // 2. Resonance embedding + candidate gathering (reuses the recall ANN path).
157        let (q_content, q_trigger) = self
158            .embedding
159            .embed_both(&embed_text)
160            .map_err(|e| crate::errors::InnateError::EmbeddingUnavailable(e.to_string()))?;
161        let mut candidates = self.ann_candidates(&q_content, &q_trigger)?;
162        self.apply_soft_dep_bonus(&mut candidates)?;
163
164        // 3. Calibration path: one context_key for read + the pre-written episodic_log (Spec §5).
165        let context_key = situation.context_key(&self.situation_coarse_keys);
166        let cand_ids: Vec<String> = candidates
167            .values()
168            .filter_map(|info| {
169                info.chunk
170                    .get("id")
171                    .and_then(Value::as_str)
172                    .map(str::to_string)
173            })
174            .collect();
175        let cand_refs: Vec<&str> = cand_ids.iter().map(String::as_str).collect();
176        let ctx_scores = self
177            .storage
178            .context_scores_batch(&cand_refs, &context_key)?;
179
180        // 4. Score every candidate with the *same* fused math as recall, but keep the
181        //    resonance / calibration split for explainability, and derive a valence.
182        let mut scored: Vec<ScoredCandidate> = Vec::with_capacity(candidates.len());
183        for info in candidates.into_values() {
184            let chunk = &info.chunk;
185            let chunk_id = chunk.get("id").and_then(Value::as_str).unwrap_or("");
186            let conf = chunk
187                .get("confidence")
188                .and_then(Value::as_f64)
189                .unwrap_or(0.5);
190            let context_score = ctx_scores.get(chunk_id).copied().unwrap_or(0.0);
191
192            let resonance =
193                self.w_content * info.sim_content as f64 + self.w_trigger * info.sim_trigger as f64;
194            let calibration = self.w_confidence * conf + self.w_context * context_score;
195            let mut fused = resonance + calibration;
196            if chunk.get("state").and_then(Value::as_str) == Some("pending") {
197                fused *= PENDING_RECALL_PENALTY;
198            }
199            let anti = chunk
200                .get("anti_trigger_desc")
201                .and_then(Value::as_str)
202                .unwrap_or("");
203            let anti_hit = !anti.is_empty() && anti_trigger_hit(&anti_match, anti);
204            if anti_hit {
205                fused *= self.anti_trigger_penalty;
206            }
207
208            // Failure-origin proxy: the heuristic distiller writes "Avoid: …" content and an
209            // anti_trigger_desc for fail-outcome traces; either marks a caution chunk.
210            let content = chunk.get("content").and_then(Value::as_str).unwrap_or("");
211            let fail_origin = content.trim_start().starts_with("Avoid:") || !anti.is_empty();
212            let trigger_hit = info.sim_trigger as f64 >= self.appraise_trigger_hit_min;
213
214            let valence = if anti_hit || fail_origin || context_score < 0.0 {
215                Valence::Caution
216            } else if trigger_hit && calibration > 0.0 {
217                Valence::Affirm
218            } else {
219                Valence::Neutral
220            };
221
222            let trigger_desc = chunk
223                .get("trigger_desc")
224                .and_then(Value::as_str)
225                .filter(|s| !s.is_empty())
226                .map(str::to_string)
227                .unwrap_or_else(|| {
228                    content
229                        .lines()
230                        .next()
231                        .unwrap_or("")
232                        .chars()
233                        .take(120)
234                        .collect()
235                });
236
237            scored.push(ScoredCandidate {
238                chunk_id: chunk_id.to_string(),
239                trigger_desc,
240                fused: fused.clamp(0.0, 1.0),
241                resonance,
242                calibration,
243                valence,
244            });
245        }
246        scored.sort_by(|a, b| b.fused.partial_cmp(&a.fused).unwrap_or(std::cmp::Ordering::Equal));
247        // Resonance prune (Spec §3.1: min_strength is the resonance lower bound). Sub-threshold
248        // contributors are noise — they must not set strength/tier/valence, otherwise an
249        // unrelated situation reads as weak-caution and silence_rate becomes dishonest. The floor
250        // is the single gate for strength, tier, valence, contributors *and* flagged_points.
251        scored.retain(|s| s.fused >= min_strength);
252        scored.truncate(top);
253
254        // 5. Aggregate: strength = max fused over surviving contributors; valence by max-affirm
255        //    vs max-caution. flagged_points = the caution survivors.
256        let max_for = |v: Valence| -> f64 {
257            scored
258                .iter()
259                .filter(|s| s.valence == v)
260                .map(|s| s.fused)
261                .fold(0.0_f64, f64::max)
262        };
263        let s_affirm = max_for(Valence::Affirm);
264        let s_caution = max_for(Valence::Caution);
265        let strength = scored.iter().map(|s| s.fused).fold(0.0_f64, f64::max);
266
267        let valence = match (s_affirm > 0.0, s_caution > 0.0) {
268            (true, true) => Valence::Mixed,
269            (false, true) => Valence::Caution,
270            (true, false) => Valence::Affirm,
271            (false, false) => Valence::Neutral,
272        };
273        let tier = if strength >= self.appraise_tier_strong {
274            Tier::Strong
275        } else if strength >= self.appraise_tier_weak {
276            Tier::Medium
277        } else {
278            Tier::Weak
279        };
280
281        let flagged_points: Vec<FlaggedPoint> = scored
282            .iter()
283            .filter(|s| s.valence == Valence::Caution && s.fused >= min_strength)
284            .map(|s| FlaggedPoint {
285                chunk_id: s.chunk_id.clone(),
286                summary: s.trigger_desc.clone(),
287                resonance: s.resonance,
288                calibration: s.calibration,
289                strength: s.fused,
290            })
291            .collect();
292        let contributors: Vec<Contributor> = scored
293            .iter()
294            .map(|s| Contributor {
295                chunk_id: s.chunk_id.clone(),
296                valence: s.valence,
297                strength: s.fused,
298            })
299            .collect();
300
301        let verdict = Verdict {
302            valence,
303            strength,
304            tier,
305            flagged_points,
306            contributors,
307            trace_id: trace_id.clone(),
308        };
309
310        // 6. Trace — same shape/timing as recall so a later record(trace_id, …) UPDATEs the
311        //    same episodic_log row and flows the override back through confidence_evidence.
312        if trace {
313            self.write_appraise_trace(&trace_id, &context_key, &raw_embed, &scored, &verdict, source, &now)?;
314        }
315
316        Ok(verdict)
317    }
318
319    #[allow(clippy::too_many_arguments)]
320    fn write_appraise_trace(
321        &self,
322        trace_id: &str,
323        context_key: &str,
324        situation_text: &str,
325        scored: &[ScoredCandidate],
326        verdict: &Verdict,
327        source: &str,
328        now: &str,
329    ) -> Result<()> {
330        let lib_id = self.storage.lib_id()?;
331        self.storage.begin_immediate()?;
332        let result = (|| -> Result<()> {
333            for (rank, s) in scored.iter().enumerate() {
334                let sim = Some(s.fused);
335                self.storage.insert_usage_trace(
336                    trace_id,
337                    Some(&s.chunk_id),
338                    "retrieved",
339                    1.0,
340                    sim,
341                    Some("appraise"),
342                    None,
343                    Some((rank + 1) as i64),
344                    None,
345                    source,
346                    now,
347                )?;
348                // Mark contributors 'selected' too: the critic leaned on them, so they must be
349                // attributable for `record(feedback=…)` to flow an override back (Spec §5).
350                self.storage.insert_usage_trace(
351                    trace_id,
352                    Some(&s.chunk_id),
353                    "selected",
354                    1.0,
355                    sim,
356                    Some("appraise"),
357                    None,
358                    Some((rank + 1) as i64),
359                    None,
360                    source,
361                    now,
362                )?;
363            }
364            // The verdict is persisted in recall_snapshot (free-form TEXT, no schema change) so the
365            // honesty metrics in inspect() can bucket by tier/valence and join the later outcome.
366            let contributor_ids: Vec<&String> = scored.iter().map(|s| &s.chunk_id).collect();
367            let snapshot = json!({
368                "appraise": {
369                    "valence": verdict.valence,
370                    "tier": verdict.tier,
371                    "strength": verdict.strength,
372                    "flagged": verdict.flagged_points.iter().map(|f| &f.chunk_id).collect::<Vec<_>>(),
373                },
374                "retrieved": contributor_ids,
375                "selected": contributor_ids,
376            });
377            let log = EpisodicLogRow {
378                id: gen_uuid(),
379                trace_id: trace_id.to_string(),
380                lib_id,
381                ts: now.to_string(),
382                query: Some(situation_text.chars().take(500).collect()),
383                recall_snapshot: Some(snapshot.to_string()),
384                event_source: source.to_string(),
385                task_state: "recalled".to_string(),
386                usage_state: "unknown".to_string(),
387                context_key: Some(context_key.to_string()),
388                distill_state: "open".to_string(),
389                ..Default::default()
390            };
391            self.storage.upsert_episodic_log(&log)?;
392            self.storage.commit()
393        })();
394        if result.is_err() {
395            let _ = self.storage.rollback();
396        }
397        result
398    }
399}
innate_core/kb/appraise.rs

innate_core/kb/
appraise.rs