Skip to main content

innate_core/kb/
appraise.rs

1//! appraise — the critic contract.
2//!
3//! `recall()` is the **actor** side: "which knowledge should I load to act?". `appraise()` is
4//! the **critic** side: "do I have any footing on this candidate answer?". Both ride the *same*
5//! fused score (`w_content·sim_content + w_trigger·sim_trigger + w_confidence·conf +
6//! w_context·context_score + w_activation·activation`, with the pending/anti penalties); appraise
7//! does not introduce a
8//! second scoring path. It only *re-reads* that score as strength + valence and surfaces what to
9//! be careful about.
10//!
11//! Hard value-domain constraint (PRD §2.2 / §5, the lethal-trifecta defence): a [`Verdict`]
12//! carries **no answer text** — no `answer`, `fix`, `corrected_*`. `flagged_points` say "watch
13//! out for X", never "the answer is Y". The synchronous path is pure Rust math — **no LLM**.
14
15use serde::Serialize;
16use serde_json::{json, Value};
17
18use super::actr_activation;
19use crate::errors::Result;
20use crate::storage::EpisodicLogRow;
21use crate::utils::{gen_uuid, utc_now_iso, SanitizeAction};
22
23use super::{anti_trigger_hit, validate_source, KnowledgeBase, Situation, PENDING_RECALL_PENALTY};
24
25// ---------------------------------------------------------------------------
26// Public types — note the absence of any answer-bearing field (enforced by T0.2).
27// ---------------------------------------------------------------------------
28
29/// 返给 agent 的固定声明:直觉只是参考信号,不是精准答案。在 MCP / CLI 的 appraise
30/// 响应里随每个 verdict 一起返回,提醒 actor「权衡、勿盲从、勿让直觉覆盖你自己对正确
31/// 答案的判断」。这是值域护栏(PRD §2.2/§5「直觉永不产出答案」)在交付层的显式表态。
32pub const APPRAISE_ADVISORY: &str = "Reference signal only — this is intuition (footing/caution), \
33not a precise or verified answer. Weigh it as one input; do not defer to it and never let it \
34override your own analysis of the correct answer. flagged_points are things to watch for, never \
35prescribed solutions. When abstained=true the critic has no footing — that is correct, not a failure.";
36
37/// Polarity of an intuition. Derived, never stored as a column (PRD §3.4).
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
39#[serde(rename_all = "lowercase")]
40pub enum Valence {
41    /// Trigger-hit and positive calibration — "you have footing here".
42    Affirm,
43    /// Anti-trigger hit, failure-origin, or negative context history — "be careful here".
44    Caution,
45    /// Both affirm and caution signals fired.
46    Mixed,
47    /// Nothing resonated meaningfully — stay quiet.
48    Neutral,
49}
50
51/// Strength band, from the fused score against `meta.appraise.tier_*`.
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
53#[serde(rename_all = "lowercase")]
54pub enum Tier {
55    Weak,
56    Medium,
57    Strong,
58}
59
60/// 方案 A —— 弃权原因(四道门)。弃权是一等输出,不是失败:critic 的第一能力是
61/// 「说不知道」。短路顺序求值,记录第一道触发的门。
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
63#[serde(rename_all = "snake_case")]
64pub enum AbstainReason {
65    /// 门1:prune 后没有任何候选越过共振地板 —— 根本没共振到东西。
66    WeakResonance,
67    /// 门2:rich 嵌入近但 signature 远 —— 疑似假共振(方案 F)。
68    FalseResonance,
69    /// 门3:命中邻居缺乏实际观测结果历史 —— 没证据就不装懂(方案 C/门3)。
70    SparseEvidence,
71    /// 门4:top-k 邻居 fused 离散度过大 —— 本情境真实模糊(方案 G)。
72    Conflicted,
73}
74
75/// A single thing to be careful about. Comes from a caution-class chunk's `trigger_desc` —
76/// "this kind of situation tends to bite", never a prescribed answer.
77#[derive(Debug, Clone, Serialize)]
78pub struct FlaggedPoint {
79    pub chunk_id: String,
80    /// What to watch for. Sourced from the chunk's existing `trigger_desc`. No answer text.
81    pub summary: String,
82    /// Resonance component (sim_content + sim_trigger, weighted).
83    pub resonance: f64,
84    /// Calibration component (confidence + context_score, weighted).
85    pub calibration: f64,
86    /// Single-chunk fused strength ∈ [0,1].
87    pub strength: f64,
88}
89
90/// One contributing chunk, for explainability.
91#[derive(Debug, Clone, Serialize)]
92pub struct Contributor {
93    pub chunk_id: String,
94    pub valence: Valence,
95    pub strength: f64,
96}
97
98/// The critic's judgement. **No answer-bearing field may ever be added here.**
99#[derive(Debug, Clone, Serialize)]
100pub struct Verdict {
101    pub valence: Valence,
102    /// Aggregate strength ∈ [0,1]; the max fused over contributors.
103    pub strength: f64,
104    pub tier: Tier,
105    pub flagged_points: Vec<FlaggedPoint>,
106    pub contributors: Vec<Contributor>,
107    /// Threads appraise → record so an override can flow back via `record(feedback='down')`.
108    pub trace_id: String,
109    /// 方案 A:弃权是一等输出。`true` 时 valence=Neutral、flagged 为空、confidence=0,
110    /// 但 strength 仍可见(弃权率/弃权精度的健康信号)。
111    pub abstained: bool,
112    /// 弃权原因;表态则 `None`。
113    #[serde(skip_serializing_if = "Option::is_none")]
114    pub abstain_reason: Option<AbstainReason>,
115    /// 方案 E/G:经校准映射 + 邻居离散度塑形后的置信度 ∈ [0,1]。与 strength 区分:
116    /// strength 是原始共振强度,confidence 是「直觉对自己几斤几两的诚实估计」。
117    pub confidence: f64,
118    /// 方案 G:top-k 邻居 fused 离散度(max-min)。透明化,供下游判读模糊性。
119    pub dispersion: f64,
120}
121
122/// Parameters for [`KnowledgeBase::appraise`].
123#[derive(Debug, Clone, Default)]
124pub struct AppraiseParams<'a> {
125    pub situation: Situation<'a>,
126    /// The candidate answer under judgement. Folded into the resonance embedding to sharpen the
127    /// match (still pure math) when `meta.appraise.candidate_in_embed` is true; always sanitized
128    /// first. Never echoed back in the Verdict.
129    pub candidate: Option<&'a str>,
130    /// Resonance prune floor; default `meta.appraise.min_strength`.
131    pub min_strength: Option<f64>,
132    /// Candidate cap; default `meta.appraise.top`.
133    pub top: Option<usize>,
134    /// Write a recall/episodic trace so a later `record` can flow back. Default true.
135    pub trace: bool,
136    /// Event source written to traces (mcp | sdk | cli | hook | daemon | augmented).
137    pub source: &'a str,
138}
139
140/// Per-candidate scored result with the resonance/calibration decomposition exposed
141/// for explainability. The aggregate uses `fused` — the same number recall ranks on.
142struct ScoredCandidate {
143    chunk_id: String,
144    trigger_desc: String,
145    fused: f64,
146    resonance: f64,
147    calibration: f64,
148    valence: Valence,
149}
150
151impl KnowledgeBase {
152    pub fn appraise(&self, params: AppraiseParams<'_>) -> Result<Verdict> {
153        let AppraiseParams {
154            situation,
155            candidate,
156            min_strength,
157            top,
158            trace,
159            source,
160        } = params;
161        let source = if source.is_empty() { "sdk" } else { source };
162        validate_source(source)?;
163        let min_strength = min_strength.unwrap_or(self.appraise_min_strength);
164        let top = top.unwrap_or(self.appraise_top);
165
166        let trace_id = gen_uuid();
167        let now = utc_now_iso();
168
169        // 1. Sanitize the resonance inputs before they touch the embedder (PRD §5). A Discard
170        //    verdict on either neutralizes that input rather than embedding hostile text.
171        let raw_embed = situation.embed_text();
172        let (embed_clean, embed_action) = self.sanitize_content(&raw_embed);
173        let mut embed_text = if matches!(embed_action, SanitizeAction::Discard) {
174            String::new()
175        } else {
176            embed_clean
177        };
178        // Lowercased text used for anti-trigger matching (situation + candidate).
179        let mut anti_match = embed_text.to_lowercase();
180        if self.appraise_candidate_in_embed {
181            if let Some(cand) = candidate.map(str::trim).filter(|c| !c.is_empty()) {
182                let (cand_clean, cand_action) = self.sanitize_content(cand);
183                if !matches!(cand_action, SanitizeAction::Discard) {
184                    embed_text.push_str("\n[candidate] ");
185                    embed_text.push_str(&cand_clean);
186                    anti_match.push('\n');
187                    anti_match.push_str(&cand_clean.to_lowercase());
188                }
189            }
190        }
191
192        // 2. Resonance embedding + candidate gathering (reuses the recall ANN path).
193        let (q_content, q_trigger) = self
194            .embedding
195            .embed_both(&embed_text)
196            .map_err(|e| crate::errors::InnateError::EmbeddingUnavailable(e.to_string()))?;
197        let mut candidates = self.ann_candidates(&q_content, &q_trigger)?;
198        self.apply_soft_dep_bonus(&mut candidates)?;
199
200        // 3. Calibration path: one context_key for read + the pre-written episodic_log (Spec §5).
201        let context_key = situation.context_key(&self.situation_coarse_keys);
202        let cand_ids: Vec<String> = candidates
203            .values()
204            .filter_map(|info| {
205                info.chunk
206                    .get("id")
207                    .and_then(Value::as_str)
208                    .map(str::to_string)
209            })
210            .collect();
211        let cand_refs: Vec<&str> = cand_ids.iter().map(String::as_str).collect();
212        let ctx_scores = self.storage.context_scores_batch(
213            &cand_refs,
214            &context_key,
215            self.intuition_prior_m,
216            self.intuition_base_rate,
217        )?;
218        // 方案 F 门2:哪些邻居在 signature 通道(coarse 情境桶)也有校准历史。
219        let sig_present = self
220            .storage
221            .context_stat_present_batch(&cand_refs, &context_key)?;
222
223        // 4. Score every candidate with the *same* fused math as recall, but keep the
224        //    resonance / calibration split for explainability, and derive a valence.
225        let mut scored: Vec<ScoredCandidate> = Vec::with_capacity(candidates.len());
226        for info in candidates.into_values() {
227            let chunk = &info.chunk;
228            let chunk_id = chunk.get("id").and_then(Value::as_str).unwrap_or("");
229            let conf = chunk
230                .get("confidence")
231                .and_then(Value::as_f64)
232                .unwrap_or(0.5);
233            let context_score = ctx_scores.get(chunk_id).copied().unwrap_or(0.0);
234
235            let resonance =
236                self.w_content * info.sim_content as f64 + self.w_trigger * info.sim_trigger as f64;
237            // ACT-R activation (recency × frequency) — same usage-history signal recall fuses;
238            // grouped with calibration since it reflects accumulated use, not query resonance.
239            let used_count = chunk.get("used_count").and_then(Value::as_i64).unwrap_or(0);
240            let last_used_at = chunk.get("last_used_at").and_then(Value::as_str);
241            let activation = actr_activation(used_count, last_used_at, &now);
242            let calibration = self.w_confidence * conf
243                + self.w_context * context_score
244                + self.w_activation * activation;
245            let mut fused = resonance + calibration;
246            if chunk.get("state").and_then(Value::as_str) == Some("pending") {
247                fused *= PENDING_RECALL_PENALTY;
248            }
249            let anti = chunk
250                .get("anti_trigger_desc")
251                .and_then(Value::as_str)
252                .unwrap_or("");
253            let anti_hit = !anti.is_empty() && anti_trigger_hit(&anti_match, anti);
254            if anti_hit {
255                fused *= self.anti_trigger_penalty;
256            }
257
258            // Failure-origin proxy: the heuristic distiller writes "Avoid: …" content and an
259            // anti_trigger_desc for fail-outcome traces; either marks a caution chunk.
260            let content = chunk.get("content").and_then(Value::as_str).unwrap_or("");
261            let fail_origin = content.trim_start().starts_with("Avoid:") || !anti.is_empty();
262            let trigger_hit = info.sim_trigger as f64 >= self.appraise_trigger_hit_min;
263
264            let valence = if anti_hit || fail_origin || context_score < 0.0 {
265                Valence::Caution
266            } else if trigger_hit && calibration > 0.0 {
267                Valence::Affirm
268            } else {
269                Valence::Neutral
270            };
271
272            let trigger_desc = chunk
273                .get("trigger_desc")
274                .and_then(Value::as_str)
275                .filter(|s| !s.is_empty())
276                .map(str::to_string)
277                .unwrap_or_else(|| {
278                    content
279                        .lines()
280                        .next()
281                        .unwrap_or("")
282                        .chars()
283                        .take(120)
284                        .collect()
285                });
286
287            scored.push(ScoredCandidate {
288                chunk_id: chunk_id.to_string(),
289                trigger_desc,
290                fused: fused.clamp(0.0, 1.0),
291                resonance,
292                calibration,
293                valence,
294            });
295        }
296        scored.sort_by(|a, b| {
297            b.fused
298                .partial_cmp(&a.fused)
299                .unwrap_or(std::cmp::Ordering::Equal)
300        });
301        // Resonance prune (Spec §3.1: min_strength is the resonance lower bound). Sub-threshold
302        // contributors are noise — they must not set strength/tier/valence, otherwise an
303        // unrelated situation reads as weak-caution and silence_rate becomes dishonest. The floor
304        // is the single gate for strength, tier, valence, contributors *and* flagged_points.
305        scored.retain(|s| s.fused >= min_strength);
306        scored.truncate(top);
307
308        // strength = max fused over survivors; dispersion (方案 G) = fused 极差。
309        let strength = scored.iter().map(|s| s.fused).fold(0.0_f64, f64::max);
310        let dispersion = if scored.len() >= 2 {
311            let hi = scored.iter().map(|s| s.fused).fold(f64::MIN, f64::max);
312            let lo = scored.iter().map(|s| s.fused).fold(f64::MAX, f64::min);
313            (hi - lo).clamp(0.0, 1.0)
314        } else {
315            0.0
316        };
317
318        // ---- 方案 A:四道弃权门(短路顺序,记录第一道触发的门)----
319        let mut abstain: Option<AbstainReason> = None;
320        // 门1 弱共振:prune 后无候选越过共振地板 —— 根本没共振到。
321        if scored.is_empty() {
322            abstain = Some(AbstainReason::WeakResonance);
323        }
324        // 门2 假共振(方案 F):signature 通道一致度低于地板。默认 floor=0 → 关闭。
325        if abstain.is_none() && self.appraise_signature_floor > 0.0 {
326            let agree = scored
327                .iter()
328                .filter(|s| sig_present.contains(&s.chunk_id))
329                .count() as f64
330                / scored.len() as f64;
331            if agree < self.appraise_signature_floor {
332                abstain = Some(AbstainReason::FalseResonance);
333            }
334        }
335        // 门3 证据稀疏(方案 C/门3):有实际观测历史的邻居不足。默认 min_evidence=0 → 关闭。
336        if abstain.is_none() && self.appraise_min_evidence > 0 {
337            let mut observed = 0_i64;
338            for s in &scored {
339                if self.storage.observed_outcome_count(&s.chunk_id)? >= 1 {
340                    observed += 1;
341                }
342            }
343            if observed < self.appraise_min_evidence {
344                abstain = Some(AbstainReason::SparseEvidence);
345            }
346        }
347        // 门4 真实模糊(方案 G):邻居离散度超过上界。默认 ceiling=1.0 → 关闭。
348        if abstain.is_none() && dispersion > self.appraise_conflict_ceiling {
349            abstain = Some(AbstainReason::Conflicted);
350        }
351
352        // 5. Aggregate: strength = max fused over surviving contributors; valence by max-affirm
353        //    vs max-caution. flagged_points = the caution survivors.
354        let max_for = |v: Valence| -> f64 {
355            scored
356                .iter()
357                .filter(|s| s.valence == v)
358                .map(|s| s.fused)
359                .fold(0.0_f64, f64::max)
360        };
361        let s_affirm = max_for(Valence::Affirm);
362        let s_caution = max_for(Valence::Caution);
363
364        let directional_valence = match (s_affirm > 0.0, s_caution > 0.0) {
365            (true, true) => Valence::Mixed,
366            (false, true) => Valence::Caution,
367            (true, false) => Valence::Affirm,
368            (false, false) => Valence::Neutral,
369        };
370        let directional_tier = if strength >= self.appraise_tier_strong {
371            Tier::Strong
372        } else if strength >= self.appraise_tier_weak {
373            Tier::Medium
374        } else {
375            Tier::Weak
376        };
377
378        // 方案 E:校准映射(分桶查表;空 map = 恒等)。方案 G:再按离散度折损。
379        let calibrated = self.calibrate_confidence(strength);
380        let shaped_conf = (calibrated * (1.0 - dispersion)).clamp(0.0, 1.0);
381
382        // 弃权时 valence=Neutral、tier=Weak、flagged 为空、confidence=0;strength 仍可见。
383        let (valence, tier, confidence) = if abstain.is_some() {
384            (Valence::Neutral, Tier::Weak, 0.0)
385        } else {
386            (directional_valence, directional_tier, shaped_conf)
387        };
388
389        let flagged_points: Vec<FlaggedPoint> = if abstain.is_some() {
390            Vec::new()
391        } else {
392            scored
393                .iter()
394                .filter(|s| s.valence == Valence::Caution && s.fused >= min_strength)
395                .map(|s| FlaggedPoint {
396                    chunk_id: s.chunk_id.clone(),
397                    summary: s.trigger_desc.clone(),
398                    resonance: s.resonance,
399                    calibration: s.calibration,
400                    strength: s.fused,
401                })
402                .collect()
403        };
404        // contributors 始终保留(可解释性;弃权样本也留痕,符合零数据丢失)。
405        let contributors: Vec<Contributor> = scored
406            .iter()
407            .map(|s| Contributor {
408                chunk_id: s.chunk_id.clone(),
409                valence: s.valence,
410                strength: s.fused,
411            })
412            .collect();
413
414        let verdict = Verdict {
415            valence,
416            strength,
417            tier,
418            flagged_points,
419            contributors,
420            trace_id: trace_id.clone(),
421            abstained: abstain.is_some(),
422            abstain_reason: abstain,
423            confidence,
424            dispersion,
425        };
426
427        // 6. Trace — same shape/timing as recall so a later record(trace_id, …) UPDATEs the
428        //    same episodic_log row and flows the override back through confidence_evidence.
429        if trace {
430            self.write_appraise_trace(
431                &trace_id,
432                &context_key,
433                &raw_embed,
434                &scored,
435                &verdict,
436                source,
437                &now,
438            )?;
439        }
440
441        Ok(verdict)
442    }
443
444    #[allow(clippy::too_many_arguments)]
445    fn write_appraise_trace(
446        &self,
447        trace_id: &str,
448        context_key: &str,
449        situation_text: &str,
450        scored: &[ScoredCandidate],
451        verdict: &Verdict,
452        source: &str,
453        now: &str,
454    ) -> Result<()> {
455        let lib_id = self.storage.lib_id()?;
456        self.storage.begin_immediate()?;
457        let result = (|| -> Result<()> {
458            for (rank, s) in scored.iter().enumerate() {
459                let sim = Some(s.fused);
460                self.storage.insert_usage_trace(
461                    trace_id,
462                    Some(&s.chunk_id),
463                    "retrieved",
464                    1.0,
465                    sim,
466                    Some("appraise"),
467                    None,
468                    Some((rank + 1) as i64),
469                    None,
470                    source,
471                    now,
472                )?;
473                // Mark contributors 'selected' too: the critic leaned on them, so they must be
474                // attributable for `record(feedback=…)` to flow an override back (Spec §5).
475                self.storage.insert_usage_trace(
476                    trace_id,
477                    Some(&s.chunk_id),
478                    "selected",
479                    1.0,
480                    sim,
481                    Some("appraise"),
482                    None,
483                    Some((rank + 1) as i64),
484                    None,
485                    source,
486                    now,
487                )?;
488            }
489            // The verdict is persisted in recall_snapshot (free-form TEXT, no schema change) so the
490            // honesty metrics in inspect() can bucket by tier/valence and join the later outcome.
491            let contributor_ids: Vec<&String> = scored.iter().map(|s| &s.chunk_id).collect();
492            let snapshot = json!({
493                "appraise": {
494                    "valence": verdict.valence,
495                    "tier": verdict.tier,
496                    "strength": verdict.strength,
497                    "confidence": verdict.confidence,
498                    "dispersion": verdict.dispersion,
499                    "abstained": verdict.abstained,
500                    "abstain_reason": verdict.abstain_reason,
501                    "flagged": verdict.flagged_points.iter().map(|f| &f.chunk_id).collect::<Vec<_>>(),
502                },
503                "retrieved": contributor_ids,
504                "selected": contributor_ids,
505            });
506            let log = EpisodicLogRow {
507                id: gen_uuid(),
508                trace_id: trace_id.to_string(),
509                lib_id,
510                ts: now.to_string(),
511                query: Some(situation_text.chars().take(500).collect()),
512                recall_snapshot: Some(snapshot.to_string()),
513                event_source: source.to_string(),
514                task_state: "recalled".to_string(),
515                usage_state: "unknown".to_string(),
516                context_key: Some(context_key.to_string()),
517                distill_state: "open".to_string(),
518                ..Default::default()
519            };
520            self.storage.upsert_episodic_log(&log)?;
521            // 方案 B:写 verdict_log —— 直觉模块可证伪的唯一数据源。弃权也入表
522            // (abstain_reason 非空、valence/conf 为空),弃权率本身是健康度信号。
523            let abstain_reason = verdict.abstain_reason.as_ref().map(|r| {
524                serde_json::to_value(r)
525                    .ok()
526                    .and_then(|v| v.as_str().map(str::to_string))
527                    .unwrap_or_default()
528            });
529            let tier_str = serde_json::to_value(verdict.tier)
530                .ok()
531                .and_then(|v| v.as_str().map(str::to_string));
532            let valence_str = serde_json::to_value(verdict.valence)
533                .ok()
534                .and_then(|v| v.as_str().map(str::to_string));
535            self.storage.insert_verdict_log(
536                &gen_uuid(),
537                trace_id,
538                context_key,
539                if verdict.abstained {
540                    None
541                } else {
542                    valence_str.as_deref()
543                },
544                if verdict.abstained {
545                    None
546                } else {
547                    Some(verdict.confidence)
548                },
549                verdict.strength,
550                if verdict.abstained {
551                    None
552                } else {
553                    tier_str.as_deref()
554                },
555                abstain_reason.as_deref(),
556                now,
557            )?;
558            self.storage.commit()
559        })();
560        if result.is_err() {
561            let _ = self.storage.rollback();
562        }
563        result
564    }
565
566    /// 方案 E:把原始强度经学习到的校准映射(分桶查表)转成校准置信度。
567    /// 空 map(冷启动 / 数据不足)= 恒等,不引入偏差。命中桶则返回该桶的实际命中率。
568    fn calibrate_confidence(&self, raw: f64) -> f64 {
569        let map = match self.storage.load_calibration_map() {
570            Ok(m) if !m.is_empty() => m,
571            _ => return raw.clamp(0.0, 1.0),
572        };
573        for (lo, hi, rate) in &map {
574            if raw >= *lo && raw < *hi {
575                return rate.clamp(0.0, 1.0);
576            }
577        }
578        // 落在最后一桶上界(raw==1.0)→ 用最高桶。
579        map.last().map(|(_, _, r)| r.clamp(0.0, 1.0)).unwrap_or(raw)
580    }
581}