converge_core/
recall.rs

1// Copyright 2024-2026 Reflective Labs
2// SPDX-License-Identifier: MIT
3
4//! # Recall Types — Portable across all backends
5//!
6//! This module defines the **constitutional types** for semantic recall.
7//! These types encode the core axiom: "Recall ≠ Evidence".
8//!
9//! ## Axiom: Recall ≠ Evidence
10//!
11//! Recall provides **hints** to guide reasoning, not **citations** to justify claims.
12//! Validators MUST reject any output that cites recall content as evidence.
13//!
14//! ## What lives here (converge-core)
15//!
16//! - `RecallQuery`, `RecallCandidate`, `RecallPolicy`, `RecallBudgets`
17//! - `RecallProvenanceEnvelope`, `RecallTraceLink`
18//! - `CandidateSourceType`, `CandidateScore`, `StopReason`
19//! - `RecallUse`, `RecallConsumer` (training boundary types)
20//!
21//! ## What stays in converge-llm
22//!
23//! - `HashEmbedder`, `SemanticEmbedder` (implementations)
24//! - `RecallNormalizer` (tightly coupled to prompt injection)
25//! - PII redaction utilities
26//! - `MockRecallProvider`
27
28use crate::experience_store::{
29    EventQuery, ExperienceEvent, ExperienceRecord, ExperienceStore, ExperienceStoreResult,
30    UserExperienceEvent,
31};
32use crate::kernel_boundary::DecisionStep;
33use crate::types::TenantId;
34use converge_pack::UnitInterval;
35use serde::{Deserialize, Serialize};
36use sha2::{Digest, Sha256};
37
38// ============================================================================
39// Recall Use/Consumer Types (Recall ≠ Training boundary)
40// ============================================================================
41
42/// Purpose of a recall operation.
43///
44/// Distinguishes runtime augmentation (injecting hints into prompts) from
45/// training-time candidate selection (building datasets). This separation
46/// preserves "Recall ≠ Evidence" and "Recall ≠ Training" boundaries.
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
48pub enum RecallUse {
49    /// Runtime prompt augmentation (hints only, not evidence)
50    RuntimeAugmentation,
51    /// Training data candidate selection (offline, auditable)
52    TrainingCandidateSelection,
53}
54
55impl Default for RecallUse {
56    fn default() -> Self {
57        Self::RuntimeAugmentation
58    }
59}
60
61/// Consumer of recall results.
62///
63/// Tracks which component is using the recall results for audit trails
64/// and to enforce that training consumers cannot masquerade as runtime.
65#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
66pub enum RecallConsumer {
67    /// Reasoning kernel (runtime prompts)
68    Kernel,
69    /// Analytics pipeline (eval, metrics)
70    Analytics,
71    /// Training pipeline (dataset building)
72    Trainer,
73}
74
75impl Default for RecallConsumer {
76    fn default() -> Self {
77        Self::Kernel
78    }
79}
80
81// ============================================================================
82// Recall Policy and Configuration
83// ============================================================================
84
85/// Policy controlling recall behavior.
86///
87/// This is the declarative configuration for recall operations.
88/// It controls what is allowed, not how it is implemented.
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct RecallPolicy {
91    /// Whether recall is enabled
92    pub enabled: bool,
93    /// Maximum number of candidates to return total
94    pub max_k_total: usize,
95    /// Maximum tokens to inject from recall context
96    pub max_tokens_injection: usize,
97    /// Minimum similarity score threshold
98    pub min_score_threshold: UnitInterval,
99    /// Budget constraints
100    pub budgets: RecallBudgets,
101    /// Allowed recall uses (runtime, training, etc.)
102    ///
103    /// Defaults to `[RuntimeAugmentation]` only - training use must be
104    /// explicitly enabled. This preserves "Recall ≠ Training" boundary.
105    #[serde(default = "default_allowed_uses")]
106    pub allowed_uses: Vec<RecallUse>,
107
108    /// How strongly recall results weight planning priors when consumed by
109    /// `PlanningPriorAgent`. `1.0` means full weight; `0.0` disables prior
110    /// adjustment without disabling recall itself. Capped to `[0.0, 1.0]` by
111    /// consumers.
112    #[serde(default = "default_prior_weight")]
113    pub prior_weight: UnitInterval,
114}
115
116fn default_prior_weight() -> UnitInterval {
117    UnitInterval::ONE
118}
119
120fn default_allowed_uses() -> Vec<RecallUse> {
121    vec![RecallUse::RuntimeAugmentation]
122}
123
124impl Default for RecallPolicy {
125    fn default() -> Self {
126        Self {
127            enabled: false,
128            max_k_total: 5,
129            max_tokens_injection: 500,
130            min_score_threshold: UnitInterval::clamped(0.5),
131            budgets: RecallBudgets::default(),
132            allowed_uses: default_allowed_uses(),
133            prior_weight: default_prior_weight(),
134        }
135    }
136}
137
138impl RecallPolicy {
139    /// Create an enabled recall policy with default settings.
140    #[must_use]
141    pub fn enabled() -> Self {
142        Self {
143            enabled: true,
144            ..Default::default()
145        }
146    }
147
148    /// Create a disabled recall policy.
149    #[must_use]
150    pub fn disabled() -> Self {
151        Self::default()
152    }
153
154    /// Check if a specific recall use is allowed by this policy.
155    ///
156    /// Returns `true` if the policy allows the given use, `false` otherwise.
157    /// This is the primary enforcement point for "Recall ≠ Training" boundary.
158    #[must_use]
159    pub fn is_use_allowed(&self, purpose: RecallUse) -> bool {
160        self.allowed_uses.contains(&purpose)
161    }
162
163    /// Compute a deterministic hash of this policy for provenance tracking.
164    ///
165    /// This enables replay verification: same policy hash → same behavior.
166    /// Note: Includes `allowed_uses` in the hash for full provenance.
167    #[must_use]
168    pub fn snapshot_hash(&self) -> String {
169        let mut hasher = StableHash::new("recall-policy-v1");
170        hasher.bool(self.enabled);
171        hasher.usize(self.max_k_total);
172        hasher.usize(self.max_tokens_injection);
173        hasher.u16(self.min_score_threshold.to_basis_points());
174        hasher.u64(self.budgets.max_latency_ms);
175        hasher.usize(self.budgets.max_embedding_calls);
176        hasher.usize(self.budgets.max_tokens_per_candidate);
177        for use_type in &self.allowed_uses {
178            hasher.u8(*use_type as u8);
179        }
180        hasher.u16(self.prior_weight.to_basis_points());
181        hasher.finish_hex()
182    }
183}
184
185/// Check if a recall use is allowed by the given policy.
186///
187/// Standalone function for use at kernel boundary enforcement.
188/// Returns `true` if the policy allows the given purpose.
189#[must_use]
190pub fn recall_use_allowed(policy: &RecallPolicy, purpose: RecallUse) -> bool {
191    policy.is_use_allowed(purpose)
192}
193
194/// Budget constraints for recall operations.
195#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct RecallBudgets {
197    /// Maximum latency in milliseconds for recall operations
198    pub max_latency_ms: u64,
199    /// Maximum number of embedding calls per chain
200    pub max_embedding_calls: usize,
201    /// Maximum tokens per candidate summary
202    pub max_tokens_per_candidate: usize,
203}
204
205impl Default for RecallBudgets {
206    fn default() -> Self {
207        Self {
208            max_latency_ms: 100,
209            max_embedding_calls: 3,
210            max_tokens_per_candidate: 100,
211        }
212    }
213}
214
215// ============================================================================
216// Recall Query and Candidate Types
217// ============================================================================
218
219/// A query for semantic recall.
220#[derive(Debug, Clone, Serialize, Deserialize)]
221pub struct RecallQuery {
222    /// The text to find similar items for
223    pub query_text: String,
224    /// Number of candidates to return
225    pub top_k: usize,
226    /// Optional step context for filtering
227    pub step_context: Option<DecisionStep>,
228    /// Optional tenant scope
229    pub tenant_scope: Option<String>,
230}
231
232impl RecallQuery {
233    /// Create a new recall query.
234    #[must_use]
235    pub fn new(query_text: impl Into<String>, top_k: usize) -> Self {
236        Self {
237            query_text: query_text.into(),
238            top_k,
239            step_context: None,
240            tenant_scope: None,
241        }
242    }
243
244    /// Add step context filter.
245    #[must_use]
246    pub fn with_step_context(mut self, step: DecisionStep) -> Self {
247        self.step_context = Some(step);
248        self
249    }
250
251    /// Add tenant scope filter.
252    #[must_use]
253    pub fn with_tenant_scope(mut self, tenant: impl Into<String>) -> Self {
254        self.tenant_scope = Some(tenant.into());
255        self
256    }
257
258    /// Compute a deterministic hash of this query for provenance tracking.
259    #[must_use]
260    pub fn query_hash(&self) -> String {
261        let mut hasher = StableHash::new("recall-query-v1");
262        hasher.str(&self.query_text);
263        hasher.usize(self.top_k);
264        if let Some(ref step) = self.step_context {
265            hasher.str(step.as_str());
266        } else {
267            hasher.none();
268        }
269        if let Some(ref tenant) = self.tenant_scope {
270            hasher.str(tenant);
271        } else {
272            hasher.none();
273        }
274        hasher.finish_hex()
275    }
276}
277
278/// A candidate returned by recall.
279#[derive(Debug, Clone, Serialize, Deserialize)]
280pub struct RecallCandidate {
281    /// Unique identifier for this candidate
282    pub id: String,
283    /// Summary text of the candidate
284    pub summary: String,
285    /// Raw similarity score from vector search
286    pub raw_score: UnitInterval,
287    /// Final normalized score
288    pub final_score: UnitInterval,
289    /// Relevance level
290    pub relevance: RelevanceLevel,
291    /// Source type (failure, success, runbook, etc.)
292    pub source_type: CandidateSourceType,
293    /// Provenance information
294    pub provenance: CandidateProvenance,
295    /// Per-candidate confidence in `[0.0, 1.0]`. Reflects how much weight a
296    /// downstream consumer (e.g. `PlanningPriorAgent`) should give this entry
297    /// when adjusting priors. Defaults to `0.5` for backends that do not yet
298    /// emit calibrated confidence.
299    #[serde(default = "default_candidate_confidence")]
300    pub confidence: UnitInterval,
301}
302
303fn default_candidate_confidence() -> UnitInterval {
304    UnitInterval::clamped(0.5)
305}
306
307/// Relevance level for a recall candidate.
308#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
309pub enum RelevanceLevel {
310    High,
311    Medium,
312    Low,
313}
314
315impl RelevanceLevel {
316    /// Create from a score (0.0-1.0).
317    #[must_use]
318    pub fn from_score(score: UnitInterval) -> Self {
319        let score = score.as_f64();
320        if score >= 0.8 {
321            Self::High
322        } else if score >= 0.5 {
323            Self::Medium
324        } else {
325            Self::Low
326        }
327    }
328
329    /// Get the string representation.
330    #[must_use]
331    pub fn as_str(&self) -> &'static str {
332        match self {
333            Self::High => "high",
334            Self::Medium => "medium",
335            Self::Low => "low",
336        }
337    }
338}
339
340/// Source type for a recall candidate.
341#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
342pub enum CandidateSourceType {
343    SimilarFailure,
344    SimilarSuccess,
345    Runbook,
346    AdapterConfig,
347    AntiPattern,
348}
349
350/// Provenance information for a recall candidate.
351#[derive(Debug, Clone, Serialize, Deserialize)]
352pub struct CandidateProvenance {
353    /// When this record was created
354    pub created_at: String,
355    /// Chain ID that produced this record
356    pub source_chain_id: Option<String>,
357    /// Step that produced this record
358    pub source_step: Option<DecisionStep>,
359    /// Corpus version when this was indexed
360    pub corpus_version: String,
361}
362
363// ============================================================================
364// Recall Provenance Types
365// ============================================================================
366
367/// Trace link for recall operations (enables reproducibility).
368#[derive(Debug, Clone, Serialize, Deserialize)]
369pub struct RecallTraceLink {
370    /// Hash of the query embedding vector
371    pub embedding_hash: String,
372    /// Corpus version used for search
373    pub corpus_version: String,
374    /// Embedder ID used
375    pub embedder_id: String,
376    /// Number of candidates searched
377    pub candidates_searched: usize,
378    /// Number of candidates returned
379    pub candidates_returned: usize,
380    /// Latency in milliseconds
381    pub latency_ms: u64,
382}
383
384/// A candidate ID with its score, for ordered provenance tracking.
385#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
386pub struct CandidateScore {
387    /// Candidate ID
388    pub id: String,
389    /// Final normalized score
390    pub score: UnitInterval,
391}
392
393/// Complete provenance envelope for recall operations.
394///
395/// This captures ALL information needed to:
396/// - Replay the exact same recall query
397/// - Audit why specific candidates were returned
398/// - Verify determinism across runs
399///
400/// All fields are required (non-optional) to make it impossible to be vague.
401#[derive(Debug, Clone, Serialize, Deserialize)]
402pub struct RecallProvenanceEnvelope {
403    // --- Query Provenance ---
404    /// Hash of the original query (before embedding)
405    pub query_hash: String,
406
407    /// Hash of the canonicalized embedding input text
408    /// (after PII redaction, whitespace normalization, Unicode NFKC)
409    pub embedding_input_hash: String,
410
411    /// Hash of the resulting embedding vector
412    pub embedding_hash: String,
413
414    // --- Embedder Provenance ---
415    /// Embedder identifier
416    pub embedder_id: String,
417
418    /// Hash of embedder settings (model, normalization, etc.)
419    pub embedder_settings_hash: String,
420
421    // --- Corpus Provenance ---
422    /// Full corpus fingerprint string
423    pub corpus_fingerprint: String,
424
425    // --- Policy Provenance ---
426    /// Hash of the RecallPolicy that was applied
427    pub policy_snapshot_hash: String,
428
429    // --- Use/Consumer Provenance (Recall ≠ Training boundary) ---
430    /// Purpose of this recall operation
431    ///
432    /// Defaults to `RuntimeAugmentation`. Training use must be explicit.
433    #[serde(default)]
434    pub purpose: RecallUse,
435
436    /// Consumers that will receive these results
437    ///
438    /// Empty by default; runtime typically sets `[Kernel]`.
439    /// Training pipelines would set `[Trainer]` or `[Analytics, Trainer]`.
440    #[serde(default)]
441    pub consumers: Vec<RecallConsumer>,
442
443    // --- Results Provenance ---
444    /// Ordered list of (candidate_id, final_score) pairs
445    /// Order matters for determinism verification
446    pub candidate_scores: Vec<CandidateScore>,
447
448    /// Number of candidates in corpus that were searched
449    pub candidates_searched: usize,
450
451    /// Number of candidates returned (after filtering)
452    pub candidates_returned: usize,
453
454    /// Why recall stopped (if applicable)
455    pub stop_reason: Option<StopReason>,
456
457    // --- Timing ---
458    /// Latency in milliseconds
459    pub latency_ms: u64,
460
461    /// Timestamp when recall was performed (ISO 8601)
462    pub timestamp: String,
463
464    // --- Future-proofing for signing ---
465    /// Optional signature for multi-tenant verification
466    /// Format: "unsigned" | "sha256:`<hash>`" | "sig://`<key-id>`/`<signature>`"
467    #[serde(default = "default_signature")]
468    pub signature: String,
469}
470
471fn default_signature() -> String {
472    "unsigned".to_string()
473}
474
475impl RecallProvenanceEnvelope {
476    /// Compute a hash of the entire provenance envelope.
477    ///
478    /// This can be used for quick equality checks and audit trails.
479    #[must_use]
480    pub fn envelope_hash(&self) -> String {
481        let mut hasher = StableHash::new("recall-envelope-v1");
482        hasher.str(&self.query_hash);
483        hasher.str(&self.embedding_input_hash);
484        hasher.str(&self.embedding_hash);
485        hasher.str(&self.embedder_id);
486        hasher.str(&self.embedder_settings_hash);
487        hasher.str(&self.corpus_fingerprint);
488        hasher.str(&self.policy_snapshot_hash);
489        hasher.u8(self.purpose as u8);
490        for consumer in &self.consumers {
491            hasher.u8(*consumer as u8);
492        }
493        for cs in &self.candidate_scores {
494            hasher.str(&cs.id);
495            hasher.u16(cs.score.to_basis_points());
496        }
497        hasher.usize(self.candidates_searched);
498        hasher.usize(self.candidates_returned);
499        hasher.u64(self.latency_ms);
500        hasher.str(&self.timestamp);
501        hasher.finish_hex()
502    }
503
504    /// Check if this envelope matches another for replay verification.
505    ///
506    /// Two envelopes match if they have identical:
507    /// - query_hash
508    /// - embedding_input_hash
509    /// - embedder_id + embedder_settings_hash
510    /// - corpus_fingerprint
511    /// - policy_snapshot_hash
512    /// - purpose + consumers (Recall ≠ Training boundary)
513    /// - candidate_scores (order-sensitive)
514    #[must_use]
515    pub fn matches_for_replay(&self, other: &Self) -> bool {
516        self.query_hash == other.query_hash
517            && self.embedding_input_hash == other.embedding_input_hash
518            && self.embedder_id == other.embedder_id
519            && self.embedder_settings_hash == other.embedder_settings_hash
520            && self.corpus_fingerprint == other.corpus_fingerprint
521            && self.policy_snapshot_hash == other.policy_snapshot_hash
522            && self.purpose == other.purpose
523            && self.consumers == other.consumers
524            && self.candidate_scores == other.candidate_scores
525    }
526
527    /// Get a short summary for logging.
528    #[must_use]
529    pub fn summary(&self) -> String {
530        format!(
531            "Recall[query:{:.8}...][corpus:{:.8}...][{}/{} candidates][{}ms]",
532            self.query_hash,
533            self.corpus_fingerprint,
534            self.candidates_returned,
535            self.candidates_searched,
536            self.latency_ms
537        )
538    }
539}
540
541struct StableHash {
542    hasher: Sha256,
543}
544
545impl StableHash {
546    fn new(domain: &'static str) -> Self {
547        let mut stable = Self {
548            hasher: Sha256::new(),
549        };
550        stable.str(domain);
551        stable
552    }
553
554    fn bytes(&mut self, bytes: &[u8]) {
555        self.hasher.update((bytes.len() as u64).to_be_bytes());
556        self.hasher.update(bytes);
557    }
558
559    fn str(&mut self, value: &str) {
560        self.bytes(value.as_bytes());
561    }
562
563    fn bool(&mut self, value: bool) {
564        self.u8(u8::from(value));
565    }
566
567    fn none(&mut self) {
568        self.bytes(&[]);
569    }
570
571    fn u8(&mut self, value: u8) {
572        self.bytes(&[value]);
573    }
574
575    fn u16(&mut self, value: u16) {
576        self.bytes(&value.to_be_bytes());
577    }
578
579    fn u64(&mut self, value: u64) {
580        self.bytes(&value.to_be_bytes());
581    }
582
583    fn usize(&mut self, value: usize) {
584        self.u64(value as u64);
585    }
586
587    fn finish_hex(self) -> String {
588        hex::encode(self.hasher.finalize())
589    }
590}
591
592/// Reason why recall stopped returning results.
593#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
594pub enum StopReason {
595    /// Reached the requested top_k
596    ReachedTopK,
597    /// Reached max_k_total budget
598    BudgetExhausted,
599    /// All remaining candidates below threshold
600    BelowThreshold,
601    /// Reached max tokens for injection
602    TokenLimitReached,
603    /// Latency budget exceeded
604    LatencyExceeded,
605    /// Embedder is not deterministic and policy requires replayability
606    ///
607    /// When `RecallUse::TrainingCandidateSelection` or kernel requires
608    /// deterministic replay, but embedder is `Stochastic` or `Unknown`.
609    /// Results may still be returned but marked as audit-only.
610    EmbedderNotDeterministic,
611    /// Tenant scope required but not provided
612    ///
613    /// The corpus has `TenantPolicy::Required` but the query did not
614    /// include a tenant scope. No results returned.
615    TenantScopeMissing,
616}
617
618// ============================================================================
619// Recall Executor — turns ExperienceRecords into RecallCandidates
620// ============================================================================
621
622/// Pull recall candidates from an [`ExperienceStore`].
623///
624/// First implementation: scans the ledger for recall-relevant records (user
625/// overrides, user approvals, failed engine outcomes), maps each to a
626/// `RecallCandidate`, applies `policy.min_score_threshold` and `prior_weight`,
627/// then trims to the smaller of `query.top_k` and `policy.max_k_total`.
628///
629/// Semantic ranking by embedding similarity is intentionally deferred — the
630/// goal here is to wire planning to history end-to-end. Ranking by recency is
631/// a placeholder that will be replaced once a recall provider is in place.
632pub fn recall_from_store(
633    store: &dyn ExperienceStore,
634    query: &RecallQuery,
635    policy: &RecallPolicy,
636) -> ExperienceStoreResult<Vec<RecallCandidate>> {
637    if !policy.enabled {
638        return Ok(Vec::new());
639    }
640
641    let event_query = EventQuery {
642        tenant_id: query.tenant_scope.as_deref().map(TenantId::new),
643        ..Default::default()
644    };
645
646    let records = store.query_records(&event_query)?;
647    let limit = query.top_k.min(policy.max_k_total);
648
649    let candidates = records
650        .iter()
651        .rev()
652        .filter_map(record_to_candidate)
653        .filter(|c| c.confidence >= policy.min_score_threshold)
654        .take(limit)
655        .map(|mut c| {
656            c.confidence = c.confidence.scale_by(policy.prior_weight);
657            c
658        })
659        .collect();
660
661    Ok(candidates)
662}
663
664fn record_to_candidate(record: &ExperienceRecord) -> Option<RecallCandidate> {
665    match record {
666        ExperienceRecord::User(env) => match &env.event {
667            UserExperienceEvent::UserOverrideIssued { reason, .. } => Some(make_candidate(
668                env.event_id.as_str(),
669                env.occurred_at.as_str(),
670                format!("user override: {reason}"),
671                UnitInterval::clamped(0.9),
672                CandidateSourceType::AntiPattern,
673            )),
674            UserExperienceEvent::UserApprovalGranted { reason, .. } => Some(make_candidate(
675                env.event_id.as_str(),
676                env.occurred_at.as_str(),
677                format!("user approval: {}", reason.as_deref().unwrap_or("granted")),
678                UnitInterval::clamped(0.7),
679                CandidateSourceType::SimilarSuccess,
680            )),
681            UserExperienceEvent::UserApprovalRejected { reason, .. } => Some(make_candidate(
682                env.event_id.as_str(),
683                env.occurred_at.as_str(),
684                format!(
685                    "user rejection: {}",
686                    reason.as_deref().unwrap_or("declined")
687                ),
688                UnitInterval::clamped(0.7),
689                CandidateSourceType::AntiPattern,
690            )),
691            UserExperienceEvent::UserCorrection { target, reason, .. } => Some(make_candidate(
692                env.event_id.as_str(),
693                env.occurred_at.as_str(),
694                format!("correction ({}): {reason}", target.kind_label()),
695                UnitInterval::clamped(0.85),
696                CandidateSourceType::Runbook,
697            )),
698            UserExperienceEvent::UserBoundaryAdjusted {
699                boundary,
700                target,
701                reason,
702                ..
703            } => Some(make_candidate(
704                env.event_id.as_str(),
705                env.occurred_at.as_str(),
706                format!(
707                    "{} boundary adjusted on {}: {reason}",
708                    boundary_kind_label(*boundary),
709                    boundary_target_label(target)
710                ),
711                UnitInterval::clamped(0.8),
712                CandidateSourceType::Runbook,
713            )),
714        },
715        ExperienceRecord::Engine(env) => match &env.event {
716            ExperienceEvent::OutcomeRecorded {
717                passed: false,
718                stop_reason,
719                ..
720            } => Some(make_candidate(
721                env.event_id.as_str(),
722                env.occurred_at.as_str(),
723                format!(
724                    "outcome failed: {}",
725                    stop_reason
726                        .as_ref()
727                        .map_or_else(|| "unspecified".to_string(), ToString::to_string)
728                ),
729                UnitInterval::clamped(0.6),
730                CandidateSourceType::SimilarFailure,
731            )),
732            _ => None,
733        },
734    }
735}
736
737fn boundary_kind_label(kind: crate::BoundaryKind) -> &'static str {
738    match kind {
739        crate::BoundaryKind::Authority => "authority",
740        crate::BoundaryKind::Forbidden => "forbidden",
741        crate::BoundaryKind::Expiry => "expiry",
742        crate::BoundaryKind::Reversibility => "reversibility",
743    }
744}
745
746fn boundary_target_label(target: &crate::BoundaryTarget) -> String {
747    match target {
748        crate::BoundaryTarget::Pack { pack_id } => format!("pack:{}", pack_id.as_str()),
749        crate::BoundaryTarget::Intent { intent_id } => format!("intent:{}", intent_id.as_str()),
750        crate::BoundaryTarget::Global => "global".to_string(),
751    }
752}
753
754fn make_candidate(
755    id: &str,
756    occurred_at: &str,
757    summary: String,
758    confidence: UnitInterval,
759    source_type: CandidateSourceType,
760) -> RecallCandidate {
761    RecallCandidate {
762        id: id.to_string(),
763        summary,
764        raw_score: confidence,
765        final_score: confidence,
766        relevance: RelevanceLevel::from_score(confidence),
767        source_type,
768        provenance: CandidateProvenance {
769            created_at: occurred_at.to_string(),
770            source_chain_id: None,
771            source_step: None,
772            corpus_version: "experience-store-v0".to_string(),
773        },
774        confidence,
775    }
776}
777
778#[cfg(test)]
779mod tests {
780    use super::*;
781    use crate::{
782        BoundaryKind, BoundaryTarget, ContentHash, CorrectionTarget, ExperienceRecord, FactContent,
783        FactContentKind, UserExperienceEventEnvelope,
784    };
785
786    fn candidate_for_user_event(event: UserExperienceEvent) -> RecallCandidate {
787        let envelope = UserExperienceEventEnvelope::new("evt-user", event);
788        record_to_candidate(&ExperienceRecord::User(envelope)).expect("candidate")
789    }
790
791    #[test]
792    fn test_recall_policy_enabled() {
793        let policy = RecallPolicy::enabled();
794        assert!(policy.enabled);
795    }
796
797    #[test]
798    fn test_recall_policy_disabled() {
799        let policy = RecallPolicy::disabled();
800        assert!(!policy.enabled);
801    }
802
803    #[test]
804    fn test_relevance_from_score() {
805        assert_eq!(
806            RelevanceLevel::from_score(UnitInterval::clamped(0.9)),
807            RelevanceLevel::High
808        );
809        assert_eq!(
810            RelevanceLevel::from_score(UnitInterval::clamped(0.6)),
811            RelevanceLevel::Medium
812        );
813        assert_eq!(
814            RelevanceLevel::from_score(UnitInterval::clamped(0.3)),
815            RelevanceLevel::Low
816        );
817    }
818
819    #[test]
820    fn test_recall_query_builder() {
821        let query = RecallQuery::new("test", 5)
822            .with_step_context(DecisionStep::Reasoning)
823            .with_tenant_scope("tenant-1");
824
825        assert_eq!(query.query_text, "test");
826        assert_eq!(query.top_k, 5);
827        assert_eq!(query.step_context, Some(DecisionStep::Reasoning));
828        assert_eq!(query.tenant_scope, Some("tenant-1".to_string()));
829    }
830
831    #[test]
832    fn recall_maps_rejected_user_approval_to_antipattern() {
833        let candidate = candidate_for_user_event(UserExperienceEvent::UserApprovalRejected {
834            gate_request_id: "gate-1".into(),
835            actor: "operator-1".into(),
836            policy_snapshot_hash: None,
837            reason: Some("risk too high".into()),
838        });
839
840        assert_eq!(candidate.summary, "user rejection: risk too high");
841        assert_eq!(candidate.confidence, UnitInterval::clamped(0.7));
842        assert_eq!(candidate.source_type, CandidateSourceType::AntiPattern);
843    }
844
845    #[test]
846    fn recall_maps_user_correction_to_runbook() {
847        let candidate = candidate_for_user_event(UserExperienceEvent::UserCorrection {
848            target: CorrectionTarget::Fact {
849                fact_id: "fact-1".into(),
850            },
851            actor: "operator-1".into(),
852            policy_snapshot_hash: None,
853            original_content: ContentHash::zero(),
854            corrected_content: FactContent::new(FactContentKind::Claim, "corrected"),
855            reason: "source was stale".into(),
856        });
857
858        assert_eq!(candidate.summary, "correction (fact): source was stale");
859        assert_eq!(candidate.confidence, UnitInterval::clamped(0.85));
860        assert_eq!(candidate.source_type, CandidateSourceType::Runbook);
861    }
862
863    #[test]
864    fn recall_maps_boundary_adjustment_to_scoped_runbook() {
865        let candidate = candidate_for_user_event(UserExperienceEvent::UserBoundaryAdjusted {
866            boundary: BoundaryKind::Authority,
867            target: BoundaryTarget::Pack {
868                pack_id: "loan-pack".into(),
869            },
870            actor: "operator-1".into(),
871            policy_snapshot_hash: None,
872            previous_value: serde_json::json!({"limit": 100}),
873            new_value: serde_json::json!({"limit": 50}),
874            reason: "manual review needed".into(),
875        });
876
877        assert_eq!(
878            candidate.summary,
879            "authority boundary adjusted on pack:loan-pack: manual review needed"
880        );
881        assert_eq!(candidate.confidence, UnitInterval::clamped(0.8));
882        assert_eq!(candidate.source_type, CandidateSourceType::Runbook);
883    }
884
885    #[test]
886    fn test_recall_policy_defaults_to_runtime_only() {
887        let policy = RecallPolicy::default();
888        assert!(
889            policy
890                .allowed_uses
891                .contains(&RecallUse::RuntimeAugmentation),
892            "Default policy must allow RuntimeAugmentation"
893        );
894        assert!(
895            !policy
896                .allowed_uses
897                .contains(&RecallUse::TrainingCandidateSelection),
898            "Default policy must NOT allow TrainingCandidateSelection"
899        );
900    }
901
902    #[test]
903    fn test_recall_training_purpose_is_blocked_in_kernel() {
904        let policy = RecallPolicy {
905            allowed_uses: vec![RecallUse::RuntimeAugmentation],
906            ..Default::default()
907        };
908
909        assert!(
910            recall_use_allowed(&policy, RecallUse::RuntimeAugmentation),
911            "RuntimeAugmentation must be allowed"
912        );
913        assert!(
914            !recall_use_allowed(&policy, RecallUse::TrainingCandidateSelection),
915            "TrainingCandidateSelection must be blocked when not in allowed_uses"
916        );
917    }
918
919    #[test]
920    fn test_recall_training_can_be_explicitly_enabled() {
921        let policy = RecallPolicy {
922            allowed_uses: vec![
923                RecallUse::RuntimeAugmentation,
924                RecallUse::TrainingCandidateSelection,
925            ],
926            ..Default::default()
927        };
928
929        assert!(recall_use_allowed(&policy, RecallUse::RuntimeAugmentation));
930        assert!(recall_use_allowed(
931            &policy,
932            RecallUse::TrainingCandidateSelection
933        ));
934    }
935
936    #[test]
937    fn recall_policy_deserialization_rejects_out_of_range_threshold() {
938        let json = r#"{
939            "enabled": true,
940            "max_k_total": 5,
941            "max_tokens_injection": 500,
942            "min_score_threshold": 1.2,
943            "budgets": {
944                "max_latency_ms": 100,
945                "max_embedding_calls": 3,
946                "max_tokens_per_candidate": 100
947            },
948            "allowed_uses": ["RuntimeAugmentation"],
949            "prior_weight": 1.0
950        }"#;
951        let result = serde_json::from_str::<RecallPolicy>(json);
952        assert!(result.is_err());
953    }
954
955    #[test]
956    fn test_policy_hash_deterministic() {
957        let policy = RecallPolicy::default();
958        let hash1 = policy.snapshot_hash();
959        let hash2 = policy.snapshot_hash();
960        assert_eq!(hash1, hash2, "Same policy must produce same hash");
961    }
962
963    #[test]
964    fn test_policy_hash_changes_with_allowed_uses() {
965        let policy1 = RecallPolicy::default();
966        let policy2 = RecallPolicy {
967            allowed_uses: vec![
968                RecallUse::RuntimeAugmentation,
969                RecallUse::TrainingCandidateSelection,
970            ],
971            ..Default::default()
972        };
973
974        assert_ne!(
975            policy1.snapshot_hash(),
976            policy2.snapshot_hash(),
977            "Different allowed_uses must produce different hash"
978        );
979    }
980
981    #[test]
982    fn test_recall_query_hash_deterministic() {
983        let query = RecallQuery::new("test query", 5);
984        let hash1 = query.query_hash();
985        let hash2 = query.query_hash();
986        assert_eq!(hash1, hash2, "Same query must produce same hash");
987    }
988
989    #[test]
990    fn test_recall_provenance_matches_for_replay() {
991        let env = RecallProvenanceEnvelope {
992            query_hash: "q".to_string(),
993            embedding_input_hash: "e".to_string(),
994            embedding_hash: "h".to_string(),
995            embedder_id: "id".to_string(),
996            embedder_settings_hash: "s".to_string(),
997            corpus_fingerprint: "c".to_string(),
998            policy_snapshot_hash: "p".to_string(),
999            purpose: RecallUse::RuntimeAugmentation,
1000            consumers: vec![RecallConsumer::Kernel],
1001            candidate_scores: vec![],
1002            candidates_searched: 10,
1003            candidates_returned: 2,
1004            stop_reason: None,
1005            latency_ms: 10,
1006            timestamp: "t".to_string(),
1007            signature: "unsigned".to_string(),
1008        };
1009
1010        // Same envelope matches
1011        assert!(env.matches_for_replay(&env.clone()));
1012
1013        // Different purpose does not match
1014        let mut env2 = env.clone();
1015        env2.purpose = RecallUse::TrainingCandidateSelection;
1016        assert!(
1017            !env.matches_for_replay(&env2),
1018            "Different purpose must not match"
1019        );
1020
1021        // Different consumers does not match
1022        let mut env3 = env.clone();
1023        env3.consumers = vec![RecallConsumer::Trainer];
1024        assert!(
1025            !env.matches_for_replay(&env3),
1026            "Different consumers must not match"
1027        );
1028    }
1029}
converge_core/recall.rs

converge_core/
recall.rs