Skip to main content

innate_core/kb/
appraise.rs

1//! appraise — the critic contract.
2//!
3//! `recall()` is the **actor** side: "which knowledge should I load to act?". `appraise()` is
4//! the **critic** side: "do I have any footing on this candidate answer?". Both ride the *same*
5//! fused score (`w_content·sim_content + w_trigger·sim_trigger + w_confidence·conf +
6//! w_context·context_score + w_activation·activation`, with the pending/anti penalties); appraise
7//! does not introduce a
8//! second scoring path. It only *re-reads* that score as strength + valence and surfaces what to
9//! be careful about.
10//!
11//! Hard value-domain constraint (PRD §2.2 / §5, the lethal-trifecta defence): a [`Verdict`]
12//! carries **no answer text** — no `answer`, `fix`, `corrected_*`. `flagged_points` say "watch
13//! out for X", never "the answer is Y". The synchronous path is pure Rust math — **no LLM**.
14
15use serde::Serialize;
16use serde_json::{json, Value};
17
18use super::actr_activation;
19use crate::errors::Result;
20use crate::storage::EpisodicLogRow;
21use crate::utils::{gen_uuid, utc_now_iso, SanitizeAction};
22
23use super::{anti_trigger_hit, validate_source, KnowledgeBase, Situation, PENDING_RECALL_PENALTY};
24
25// ---------------------------------------------------------------------------
26// Public types — note the absence of any answer-bearing field (enforced by T0.2).
27// ---------------------------------------------------------------------------
28
29/// 返给 agent 的固定声明:直觉只是参考信号,不是精准答案。在 MCP / CLI 的 appraise
30/// 响应里随每个 verdict 一起返回,提醒 actor「权衡、勿盲从、勿让直觉覆盖你自己对正确
31/// 答案的判断」。这是值域护栏(PRD §2.2/§5「直觉永不产出答案」)在交付层的显式表态。
32pub const APPRAISE_ADVISORY: &str = "Reference signal only — this is intuition (footing/caution), \
33not a precise or verified answer. Weigh it as one input; do not defer to it and never let it \
34override your own analysis of the correct answer. flagged_points are things to watch for, never \
35prescribed solutions. When abstained=true the critic has no footing — that is correct, not a failure.";
36
37/// Polarity of an intuition. Derived, never stored as a column (PRD §3.4).
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
39#[serde(rename_all = "lowercase")]
40pub enum Valence {
41    /// Trigger-hit and positive calibration — "you have footing here".
42    Affirm,
43    /// Anti-trigger hit, failure-origin, or negative context history — "be careful here".
44    Caution,
45    /// Both affirm and caution signals fired.
46    Mixed,
47    /// Nothing resonated meaningfully — stay quiet.
48    Neutral,
49}
50
51/// Strength band, from the fused score against `meta.appraise.tier_*`.
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
53#[serde(rename_all = "lowercase")]
54pub enum Tier {
55    Weak,
56    Medium,
57    Strong,
58}
59
60/// 方案 A —— 弃权原因(四道门)。弃权是一等输出,不是失败:critic 的第一能力是
61/// 「说不知道」。短路顺序求值,记录第一道触发的门。
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
63#[serde(rename_all = "snake_case")]
64pub enum AbstainReason {
65    /// 门1:prune 后没有任何候选越过共振地板 —— 根本没共振到东西。
66    WeakResonance,
67    /// 门2:rich 嵌入近但 signature 远 —— 疑似假共振(方案 F)。
68    FalseResonance,
69    /// 门3:命中邻居缺乏实际观测结果历史 —— 没证据就不装懂(方案 C/门3)。
70    SparseEvidence,
71    /// 门4:top-k 邻居 fused 离散度过大 —— 本情境真实模糊(方案 G)。
72    Conflicted,
73}
74
75/// A single thing to be careful about. Comes from a caution-class chunk's `trigger_desc` —
76/// "this kind of situation tends to bite", never a prescribed answer.
77#[derive(Debug, Clone, Serialize)]
78pub struct FlaggedPoint {
79    pub chunk_id: String,
80    /// What to watch for. Sourced from the chunk's existing `trigger_desc`. No answer text.
81    pub summary: String,
82    /// Resonance component (sim_content + sim_trigger, weighted).
83    pub resonance: f64,
84    /// Calibration component (confidence + context_score, weighted).
85    pub calibration: f64,
86    /// Single-chunk fused strength ∈ [0,1].
87    pub strength: f64,
88}
89
90/// One contributing chunk, for explainability.
91#[derive(Debug, Clone, Serialize)]
92pub struct Contributor {
93    pub chunk_id: String,
94    pub valence: Valence,
95    pub strength: f64,
96}
97
98/// The critic's judgement. **No answer-bearing field may ever be added here.**
99#[derive(Debug, Clone, Serialize)]
100pub struct Verdict {
101    pub valence: Valence,
102    /// Aggregate strength ∈ [0,1]; the max fused over contributors.
103    pub strength: f64,
104    pub tier: Tier,
105    pub flagged_points: Vec<FlaggedPoint>,
106    pub contributors: Vec<Contributor>,
107    /// Threads appraise → record so an override can flow back via `record(feedback='down')`.
108    pub trace_id: String,
109    /// 方案 A:弃权是一等输出。`true` 时 valence=Neutral、flagged 为空、confidence=0,
110    /// 但 strength 仍可见(弃权率/弃权精度的健康信号)。
111    pub abstained: bool,
112    /// 弃权原因;表态则 `None`。
113    #[serde(skip_serializing_if = "Option::is_none")]
114    pub abstain_reason: Option<AbstainReason>,
115    /// 方案 E/G:经校准映射 + 邻居离散度塑形后的置信度 ∈ [0,1]。与 strength 区分:
116    /// strength 是原始共振强度,confidence 是「直觉对自己几斤几两的诚实估计」。
117    pub confidence: f64,
118    /// 方案 G:top-k 邻居 fused 离散度(max-min)。透明化,供下游判读模糊性。
119    pub dispersion: f64,
120}
121
122/// Parameters for [`KnowledgeBase::appraise`].
123#[derive(Debug, Clone, Default)]
124pub struct AppraiseParams<'a> {
125    pub situation: Situation<'a>,
126    /// The candidate answer under judgement. Folded into the resonance embedding to sharpen the
127    /// match (still pure math) when `meta.appraise.candidate_in_embed` is true; always sanitized
128    /// first. Never echoed back in the Verdict.
129    pub candidate: Option<&'a str>,
130    /// Resonance prune floor; default `meta.appraise.min_strength`.
131    pub min_strength: Option<f64>,
132    /// Candidate cap; default `meta.appraise.top`.
133    pub top: Option<usize>,
134    /// Write a recall/episodic trace so a later `record` can flow back. Default true.
135    pub trace: bool,
136    /// Event source written to traces (mcp | sdk | cli | hook | daemon | augmented).
137    pub source: &'a str,
138}
139
140/// Per-candidate scored result with the resonance/calibration decomposition exposed
141/// for explainability. The aggregate uses `fused` — the same number recall ranks on.
142struct ScoredCandidate {
143    chunk_id: String,
144    trigger_desc: String,
145    fused: f64,
146    resonance: f64,
147    calibration: f64,
148    valence: Valence,
149}
150
151impl KnowledgeBase {
152    pub fn appraise(&self, params: AppraiseParams<'_>) -> Result<Verdict> {
153        let AppraiseParams {
154            situation,
155            candidate,
156            min_strength,
157            top,
158            trace,
159            source,
160        } = params;
161        let source = if source.is_empty() { "sdk" } else { source };
162        validate_source(source)?;
163        let min_strength = min_strength.unwrap_or(self.appraise_min_strength);
164        let top = top.unwrap_or(self.appraise_top);
165
166        let trace_id = gen_uuid();
167        let now = utc_now_iso();
168
169        // 1. Sanitize the resonance inputs before they touch the embedder (PRD §5). A Discard
170        //    verdict on either neutralizes that input rather than embedding hostile text.
171        let raw_embed = situation.embed_text();
172        let (embed_clean, embed_action) = self.sanitize_content(&raw_embed);
173        let mut embed_text = if matches!(embed_action, SanitizeAction::Discard) {
174            String::new()
175        } else {
176            embed_clean
177        };
178        // Lowercased text used for anti-trigger matching (situation + candidate).
179        let mut anti_match = embed_text.to_lowercase();
180        if self.appraise_candidate_in_embed {
181            if let Some(cand) = candidate.map(str::trim).filter(|c| !c.is_empty()) {
182                let (cand_clean, cand_action) = self.sanitize_content(cand);
183                if !matches!(cand_action, SanitizeAction::Discard) {
184                    embed_text.push_str("\n[candidate] ");
185                    embed_text.push_str(&cand_clean);
186                    anti_match.push('\n');
187                    anti_match.push_str(&cand_clean.to_lowercase());
188                }
189            }
190        }
191
192        // 2. Resonance embedding + candidate gathering (reuses the recall ANN path).
193        let (q_content, q_trigger) = self
194            .embedding
195            .embed_both(&embed_text)
196            .map_err(|e| crate::errors::InnateError::EmbeddingUnavailable(e.to_string()))?;
197        let mut candidates = self.ann_candidates(&q_content, &q_trigger, &embed_text)?;
198        self.apply_soft_dep_bonus(&mut candidates)?;
199
200        // 3. Calibration path: one context_key for read + the pre-written episodic_log (Spec §5).
201        let context_key = situation.context_key(&self.situation_coarse_keys);
202        let cand_ids: Vec<String> = candidates
203            .values()
204            .filter_map(|info| {
205                info.chunk
206                    .get("id")
207                    .and_then(Value::as_str)
208                    .map(str::to_string)
209            })
210            .collect();
211        let cand_refs: Vec<&str> = cand_ids.iter().map(String::as_str).collect();
212        let ctx_scores = self.storage.context_scores_batch(
213            &cand_refs,
214            &context_key,
215            self.intuition_prior_m,
216            self.intuition_base_rate,
217        )?;
218        // 方案 F 门2:哪些邻居在 signature 通道(coarse 情境桶)也有校准历史。
219        let sig_present = self
220            .storage
221            .context_stat_present_batch(&cand_refs, &context_key)?;
222
223        // 4. Score every candidate with the *same* fused math as recall, but keep the
224        //    resonance / calibration split for explainability, and derive a valence.
225        let mut scored: Vec<ScoredCandidate> = Vec::with_capacity(candidates.len());
226        for info in candidates.into_values() {
227            let chunk = &info.chunk;
228            let chunk_id = chunk.get("id").and_then(Value::as_str).unwrap_or("");
229            let conf = chunk
230                .get("confidence")
231                .and_then(Value::as_f64)
232                .unwrap_or(0.5);
233            let context_score = ctx_scores.get(chunk_id).copied().unwrap_or(0.0);
234
235            let resonance = self.w_content * info.sim_content as f64
236                + self.w_trigger * info.sim_trigger as f64
237                + self.w_lexical * info.sim_lexical as f64;
238            // ACT-R activation (recency × frequency) — same usage-history signal recall fuses;
239            // grouped with calibration since it reflects accumulated use, not query resonance.
240            let used_count = chunk.get("used_count").and_then(Value::as_i64).unwrap_or(0);
241            let last_used_at = chunk.get("last_used_at").and_then(Value::as_str);
242            let activation = actr_activation(used_count, last_used_at, &now);
243            let calibration = self.w_confidence * conf
244                + self.w_context * context_score
245                + self.w_activation * activation;
246            let mut fused = resonance + calibration;
247            if chunk.get("state").and_then(Value::as_str) == Some("pending") {
248                fused *= PENDING_RECALL_PENALTY;
249            }
250            let anti = chunk
251                .get("anti_trigger_desc")
252                .and_then(Value::as_str)
253                .unwrap_or("");
254            let anti_hit = !anti.is_empty() && anti_trigger_hit(&anti_match, anti);
255            if anti_hit {
256                fused *= self.anti_trigger_penalty;
257            }
258
259            // Failure-origin proxy: the heuristic distiller writes "Avoid: …" content and an
260            // anti_trigger_desc for fail-outcome traces; either marks a caution chunk.
261            let content = chunk.get("content").and_then(Value::as_str).unwrap_or("");
262            let fail_origin = content.trim_start().starts_with("Avoid:") || !anti.is_empty();
263            let trigger_hit = info.sim_trigger as f64 >= self.appraise_trigger_hit_min;
264
265            let valence = if anti_hit || fail_origin || context_score < 0.0 {
266                Valence::Caution
267            } else if trigger_hit && calibration > 0.0 {
268                Valence::Affirm
269            } else {
270                Valence::Neutral
271            };
272
273            let trigger_desc = chunk
274                .get("trigger_desc")
275                .and_then(Value::as_str)
276                .filter(|s| !s.is_empty())
277                .map(str::to_string)
278                .unwrap_or_else(|| {
279                    content
280                        .lines()
281                        .next()
282                        .unwrap_or("")
283                        .chars()
284                        .take(120)
285                        .collect()
286                });
287
288            scored.push(ScoredCandidate {
289                chunk_id: chunk_id.to_string(),
290                trigger_desc,
291                fused: fused.clamp(0.0, 1.0),
292                resonance,
293                calibration,
294                valence,
295            });
296        }
297        scored.sort_by(|a, b| {
298            b.fused
299                .partial_cmp(&a.fused)
300                .unwrap_or(std::cmp::Ordering::Equal)
301        });
302        // Resonance prune (Spec §3.1: min_strength is the resonance lower bound). Sub-threshold
303        // contributors are noise — they must not set strength/tier/valence, otherwise an
304        // unrelated situation reads as weak-caution and silence_rate becomes dishonest. The floor
305        // is the single gate for strength, tier, valence, contributors *and* flagged_points.
306        scored.retain(|s| s.fused >= min_strength);
307        scored.truncate(top);
308
309        // strength = max fused over survivors; dispersion (方案 G) = fused 极差。
310        let strength = scored.iter().map(|s| s.fused).fold(0.0_f64, f64::max);
311        let dispersion = if scored.len() >= 2 {
312            let hi = scored.iter().map(|s| s.fused).fold(f64::MIN, f64::max);
313            let lo = scored.iter().map(|s| s.fused).fold(f64::MAX, f64::min);
314            (hi - lo).clamp(0.0, 1.0)
315        } else {
316            0.0
317        };
318
319        // ---- 方案 A:四道弃权门(短路顺序,记录第一道触发的门)----
320        let mut abstain: Option<AbstainReason> = None;
321        // 门1 弱共振:prune 后无候选越过共振地板 —— 根本没共振到。
322        if scored.is_empty() {
323            abstain = Some(AbstainReason::WeakResonance);
324        }
325        // 门2 假共振(方案 F):signature 通道一致度低于地板。默认 floor=0 → 关闭。
326        if abstain.is_none() && self.appraise_signature_floor > 0.0 {
327            let agree = scored
328                .iter()
329                .filter(|s| sig_present.contains(&s.chunk_id))
330                .count() as f64
331                / scored.len() as f64;
332            if agree < self.appraise_signature_floor {
333                abstain = Some(AbstainReason::FalseResonance);
334            }
335        }
336        // 门3 证据稀疏(方案 C/门3):有实际观测历史的邻居不足。默认 min_evidence=0 → 关闭。
337        if abstain.is_none() && self.appraise_min_evidence > 0 {
338            let mut observed = 0_i64;
339            for s in &scored {
340                if self.storage.observed_outcome_count(&s.chunk_id)? >= 1 {
341                    observed += 1;
342                }
343            }
344            if observed < self.appraise_min_evidence {
345                abstain = Some(AbstainReason::SparseEvidence);
346            }
347        }
348        // 门4 真实模糊(方案 G):邻居离散度超过上界。默认 ceiling=1.0 → 关闭。
349        if abstain.is_none() && dispersion > self.appraise_conflict_ceiling {
350            abstain = Some(AbstainReason::Conflicted);
351        }
352
353        // 5. Aggregate: strength = max fused over surviving contributors; valence by max-affirm
354        //    vs max-caution. flagged_points = the caution survivors.
355        let max_for = |v: Valence| -> f64 {
356            scored
357                .iter()
358                .filter(|s| s.valence == v)
359                .map(|s| s.fused)
360                .fold(0.0_f64, f64::max)
361        };
362        let s_affirm = max_for(Valence::Affirm);
363        let s_caution = max_for(Valence::Caution);
364
365        let directional_valence = match (s_affirm > 0.0, s_caution > 0.0) {
366            (true, true) => Valence::Mixed,
367            (false, true) => Valence::Caution,
368            (true, false) => Valence::Affirm,
369            (false, false) => Valence::Neutral,
370        };
371        let directional_tier = if strength >= self.appraise_tier_strong {
372            Tier::Strong
373        } else if strength >= self.appraise_tier_weak {
374            Tier::Medium
375        } else {
376            Tier::Weak
377        };
378
379        // 方案 E:校准映射(分桶查表;空 map = 恒等)。方案 G:再按离散度折损。
380        let calibrated = self.calibrate_confidence(strength);
381        let shaped_conf = (calibrated * (1.0 - dispersion)).clamp(0.0, 1.0);
382
383        // 弃权时 valence=Neutral、tier=Weak、flagged 为空、confidence=0;strength 仍可见。
384        let (valence, tier, confidence) = if abstain.is_some() {
385            (Valence::Neutral, Tier::Weak, 0.0)
386        } else {
387            (directional_valence, directional_tier, shaped_conf)
388        };
389
390        let flagged_points: Vec<FlaggedPoint> = if abstain.is_some() {
391            Vec::new()
392        } else {
393            scored
394                .iter()
395                .filter(|s| s.valence == Valence::Caution && s.fused >= min_strength)
396                .map(|s| FlaggedPoint {
397                    chunk_id: s.chunk_id.clone(),
398                    summary: s.trigger_desc.clone(),
399                    resonance: s.resonance,
400                    calibration: s.calibration,
401                    strength: s.fused,
402                })
403                .collect()
404        };
405        // contributors 始终保留(可解释性;弃权样本也留痕,符合零数据丢失)。
406        let contributors: Vec<Contributor> = scored
407            .iter()
408            .map(|s| Contributor {
409                chunk_id: s.chunk_id.clone(),
410                valence: s.valence,
411                strength: s.fused,
412            })
413            .collect();
414
415        let verdict = Verdict {
416            valence,
417            strength,
418            tier,
419            flagged_points,
420            contributors,
421            trace_id: trace_id.clone(),
422            abstained: abstain.is_some(),
423            abstain_reason: abstain,
424            confidence,
425            dispersion,
426        };
427
428        // 6. Trace — same shape/timing as recall so a later record(trace_id, …) UPDATEs the
429        //    same episodic_log row and flows the override back through confidence_evidence.
430        if trace {
431            self.write_appraise_trace(
432                &trace_id,
433                &context_key,
434                &raw_embed,
435                &scored,
436                &verdict,
437                source,
438                &now,
439            )?;
440        }
441
442        Ok(verdict)
443    }
444
445    #[allow(clippy::too_many_arguments)]
446    fn write_appraise_trace(
447        &self,
448        trace_id: &str,
449        context_key: &str,
450        situation_text: &str,
451        scored: &[ScoredCandidate],
452        verdict: &Verdict,
453        source: &str,
454        now: &str,
455    ) -> Result<()> {
456        let lib_id = self.storage.lib_id()?;
457        self.storage.begin_immediate()?;
458        let result = (|| -> Result<()> {
459            for (rank, s) in scored.iter().enumerate() {
460                let sim = Some(s.fused);
461                self.storage.insert_usage_trace(
462                    trace_id,
463                    Some(&s.chunk_id),
464                    "retrieved",
465                    1.0,
466                    sim,
467                    Some("appraise"),
468                    None,
469                    Some((rank + 1) as i64),
470                    None,
471                    source,
472                    now,
473                )?;
474                // Mark contributors 'selected' too: the critic leaned on them, so they must be
475                // attributable for `record(feedback=…)` to flow an override back (Spec §5).
476                self.storage.insert_usage_trace(
477                    trace_id,
478                    Some(&s.chunk_id),
479                    "selected",
480                    1.0,
481                    sim,
482                    Some("appraise"),
483                    None,
484                    Some((rank + 1) as i64),
485                    None,
486                    source,
487                    now,
488                )?;
489            }
490            // The verdict is persisted in recall_snapshot (free-form TEXT, no schema change) so the
491            // honesty metrics in inspect() can bucket by tier/valence and join the later outcome.
492            let contributor_ids: Vec<&String> = scored.iter().map(|s| &s.chunk_id).collect();
493            let snapshot = json!({
494                "appraise": {
495                    "valence": verdict.valence,
496                    "tier": verdict.tier,
497                    "strength": verdict.strength,
498                    "confidence": verdict.confidence,
499                    "dispersion": verdict.dispersion,
500                    "abstained": verdict.abstained,
501                    "abstain_reason": verdict.abstain_reason,
502                    "flagged": verdict.flagged_points.iter().map(|f| &f.chunk_id).collect::<Vec<_>>(),
503                },
504                "retrieved": contributor_ids,
505                "selected": contributor_ids,
506            });
507            let log = EpisodicLogRow {
508                id: gen_uuid(),
509                trace_id: trace_id.to_string(),
510                lib_id,
511                ts: now.to_string(),
512                query: Some(situation_text.chars().take(500).collect()),
513                recall_snapshot: Some(snapshot.to_string()),
514                event_source: source.to_string(),
515                task_state: "recalled".to_string(),
516                usage_state: "unknown".to_string(),
517                context_key: Some(context_key.to_string()),
518                distill_state: "open".to_string(),
519                ..Default::default()
520            };
521            self.storage.upsert_episodic_log(&log)?;
522            // 方案 B:写 verdict_log —— 直觉模块可证伪的唯一数据源。弃权也入表
523            // (abstain_reason 非空、valence/conf 为空),弃权率本身是健康度信号。
524            let abstain_reason = verdict.abstain_reason.as_ref().map(|r| {
525                serde_json::to_value(r)
526                    .ok()
527                    .and_then(|v| v.as_str().map(str::to_string))
528                    .unwrap_or_default()
529            });
530            let tier_str = serde_json::to_value(verdict.tier)
531                .ok()
532                .and_then(|v| v.as_str().map(str::to_string));
533            let valence_str = serde_json::to_value(verdict.valence)
534                .ok()
535                .and_then(|v| v.as_str().map(str::to_string));
536            self.storage.insert_verdict_log(
537                &gen_uuid(),
538                trace_id,
539                context_key,
540                if verdict.abstained {
541                    None
542                } else {
543                    valence_str.as_deref()
544                },
545                if verdict.abstained {
546                    None
547                } else {
548                    Some(verdict.confidence)
549                },
550                verdict.strength,
551                if verdict.abstained {
552                    None
553                } else {
554                    tier_str.as_deref()
555                },
556                abstain_reason.as_deref(),
557                now,
558            )?;
559            self.storage.commit()
560        })();
561        if result.is_err() {
562            let _ = self.storage.rollback();
563        }
564        result
565    }
566
567    /// 方案 E:把原始强度经学习到的校准映射(分桶查表)转成校准置信度。
568    /// 空 map(冷启动 / 数据不足)= 恒等,不引入偏差。命中桶则返回该桶的实际命中率。
569    fn calibrate_confidence(&self, raw: f64) -> f64 {
570        let map = match self.storage.load_calibration_map() {
571            Ok(m) if !m.is_empty() => m,
572            _ => return raw.clamp(0.0, 1.0),
573        };
574        for (lo, hi, rate) in &map {
575            if raw >= *lo && raw < *hi {
576                return rate.clamp(0.0, 1.0);
577            }
578        }
579        // 落在最后一桶上界(raw==1.0)→ 用最高桶。
580        map.last().map(|(_, _, r)| r.clamp(0.0, 1.0)).unwrap_or(raw)
581    }
582}