1use crate::experience_store::{
29 EventQuery, ExperienceEvent, ExperienceRecord, ExperienceStore, ExperienceStoreResult,
30 UserExperienceEvent,
31};
32use crate::kernel_boundary::DecisionStep;
33use crate::types::TenantId;
34use serde::{Deserialize, Serialize};
35
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
46pub enum RecallUse {
47 RuntimeAugmentation,
49 TrainingCandidateSelection,
51}
52
53impl Default for RecallUse {
54 fn default() -> Self {
55 Self::RuntimeAugmentation
56 }
57}
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
64pub enum RecallConsumer {
65 Kernel,
67 Analytics,
69 Trainer,
71}
72
73impl Default for RecallConsumer {
74 fn default() -> Self {
75 Self::Kernel
76 }
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct RecallPolicy {
89 pub enabled: bool,
91 pub max_k_total: usize,
93 pub max_tokens_injection: usize,
95 pub min_score_threshold: f64,
97 pub budgets: RecallBudgets,
99 #[serde(default = "default_allowed_uses")]
104 pub allowed_uses: Vec<RecallUse>,
105
106 #[serde(default = "default_prior_weight")]
111 pub prior_weight: f64,
112}
113
114fn default_prior_weight() -> f64 {
115 1.0
116}
117
118fn default_allowed_uses() -> Vec<RecallUse> {
119 vec![RecallUse::RuntimeAugmentation]
120}
121
122impl Default for RecallPolicy {
123 fn default() -> Self {
124 Self {
125 enabled: false,
126 max_k_total: 5,
127 max_tokens_injection: 500,
128 min_score_threshold: 0.5,
129 budgets: RecallBudgets::default(),
130 allowed_uses: default_allowed_uses(),
131 prior_weight: default_prior_weight(),
132 }
133 }
134}
135
136impl RecallPolicy {
137 #[must_use]
139 pub fn enabled() -> Self {
140 Self {
141 enabled: true,
142 ..Default::default()
143 }
144 }
145
146 #[must_use]
148 pub fn disabled() -> Self {
149 Self::default()
150 }
151
152 #[must_use]
157 pub fn is_use_allowed(&self, purpose: RecallUse) -> bool {
158 self.allowed_uses.contains(&purpose)
159 }
160
161 #[must_use]
166 pub fn snapshot_hash(&self) -> String {
167 use std::collections::hash_map::DefaultHasher;
168 use std::hash::{Hash, Hasher};
169
170 let mut hasher = DefaultHasher::new();
171 self.enabled.hash(&mut hasher);
172 self.max_k_total.hash(&mut hasher);
173 self.max_tokens_injection.hash(&mut hasher);
174 (self.min_score_threshold as u64).hash(&mut hasher);
175 self.budgets.max_latency_ms.hash(&mut hasher);
176 self.budgets.max_embedding_calls.hash(&mut hasher);
177 self.budgets.max_tokens_per_candidate.hash(&mut hasher);
178 for use_type in &self.allowed_uses {
179 (*use_type as u8).hash(&mut hasher);
180 }
181 (self.prior_weight as u64).hash(&mut hasher);
182 format!("{:016x}", hasher.finish())
183 }
184}
185
186#[must_use]
191pub fn recall_use_allowed(policy: &RecallPolicy, purpose: RecallUse) -> bool {
192 policy.is_use_allowed(purpose)
193}
194
195#[derive(Debug, Clone, Serialize, Deserialize)]
197pub struct RecallBudgets {
198 pub max_latency_ms: u64,
200 pub max_embedding_calls: usize,
202 pub max_tokens_per_candidate: usize,
204}
205
206impl Default for RecallBudgets {
207 fn default() -> Self {
208 Self {
209 max_latency_ms: 100,
210 max_embedding_calls: 3,
211 max_tokens_per_candidate: 100,
212 }
213 }
214}
215
216#[derive(Debug, Clone, Serialize, Deserialize)]
222pub struct RecallQuery {
223 pub query_text: String,
225 pub top_k: usize,
227 pub step_context: Option<DecisionStep>,
229 pub tenant_scope: Option<String>,
231}
232
233impl RecallQuery {
234 #[must_use]
236 pub fn new(query_text: impl Into<String>, top_k: usize) -> Self {
237 Self {
238 query_text: query_text.into(),
239 top_k,
240 step_context: None,
241 tenant_scope: None,
242 }
243 }
244
245 #[must_use]
247 pub fn with_step_context(mut self, step: DecisionStep) -> Self {
248 self.step_context = Some(step);
249 self
250 }
251
252 #[must_use]
254 pub fn with_tenant_scope(mut self, tenant: impl Into<String>) -> Self {
255 self.tenant_scope = Some(tenant.into());
256 self
257 }
258
259 #[must_use]
261 pub fn query_hash(&self) -> String {
262 use std::collections::hash_map::DefaultHasher;
263 use std::hash::{Hash, Hasher};
264
265 let mut hasher = DefaultHasher::new();
266 self.query_text.hash(&mut hasher);
267 self.top_k.hash(&mut hasher);
268 if let Some(ref step) = self.step_context {
269 step.as_str().hash(&mut hasher);
270 }
271 if let Some(ref tenant) = self.tenant_scope {
272 tenant.hash(&mut hasher);
273 }
274 format!("{:016x}", hasher.finish())
275 }
276}
277
278#[derive(Debug, Clone, Serialize, Deserialize)]
280pub struct RecallCandidate {
281 pub id: String,
283 pub summary: String,
285 pub raw_score: f64,
287 pub final_score: f64,
289 pub relevance: RelevanceLevel,
291 pub source_type: CandidateSourceType,
293 pub provenance: CandidateProvenance,
295 #[serde(default = "default_candidate_confidence")]
300 pub confidence: f64,
301}
302
303fn default_candidate_confidence() -> f64 {
304 0.5
305}
306
307#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
309pub enum RelevanceLevel {
310 High,
311 Medium,
312 Low,
313}
314
315impl RelevanceLevel {
316 #[must_use]
318 pub fn from_score(score: f64) -> Self {
319 if score >= 0.8 {
320 Self::High
321 } else if score >= 0.5 {
322 Self::Medium
323 } else {
324 Self::Low
325 }
326 }
327
328 #[must_use]
330 pub fn as_str(&self) -> &'static str {
331 match self {
332 Self::High => "high",
333 Self::Medium => "medium",
334 Self::Low => "low",
335 }
336 }
337}
338
339#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
341pub enum CandidateSourceType {
342 SimilarFailure,
343 SimilarSuccess,
344 Runbook,
345 AdapterConfig,
346 AntiPattern,
347}
348
349#[derive(Debug, Clone, Serialize, Deserialize)]
351pub struct CandidateProvenance {
352 pub created_at: String,
354 pub source_chain_id: Option<String>,
356 pub source_step: Option<DecisionStep>,
358 pub corpus_version: String,
360}
361
362#[derive(Debug, Clone, Serialize, Deserialize)]
368pub struct RecallTraceLink {
369 pub embedding_hash: String,
371 pub corpus_version: String,
373 pub embedder_id: String,
375 pub candidates_searched: usize,
377 pub candidates_returned: usize,
379 pub latency_ms: u64,
381}
382
383#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
385pub struct CandidateScore {
386 pub id: String,
388 pub score: f64,
390}
391
392#[derive(Debug, Clone, Serialize, Deserialize)]
401pub struct RecallProvenanceEnvelope {
402 pub query_hash: String,
405
406 pub embedding_input_hash: String,
409
410 pub embedding_hash: String,
412
413 pub embedder_id: String,
416
417 pub embedder_settings_hash: String,
419
420 pub corpus_fingerprint: String,
423
424 pub policy_snapshot_hash: String,
427
428 #[serde(default)]
433 pub purpose: RecallUse,
434
435 #[serde(default)]
440 pub consumers: Vec<RecallConsumer>,
441
442 pub candidate_scores: Vec<CandidateScore>,
446
447 pub candidates_searched: usize,
449
450 pub candidates_returned: usize,
452
453 pub stop_reason: Option<StopReason>,
455
456 pub latency_ms: u64,
459
460 pub timestamp: String,
462
463 #[serde(default = "default_signature")]
467 pub signature: String,
468}
469
470fn default_signature() -> String {
471 "unsigned".to_string()
472}
473
474impl RecallProvenanceEnvelope {
475 #[must_use]
479 pub fn envelope_hash(&self) -> String {
480 use std::collections::hash_map::DefaultHasher;
481 use std::hash::{Hash, Hasher};
482
483 let mut hasher = DefaultHasher::new();
484 self.query_hash.hash(&mut hasher);
485 self.embedding_input_hash.hash(&mut hasher);
486 self.embedding_hash.hash(&mut hasher);
487 self.embedder_id.hash(&mut hasher);
488 self.embedder_settings_hash.hash(&mut hasher);
489 self.corpus_fingerprint.hash(&mut hasher);
490 self.policy_snapshot_hash.hash(&mut hasher);
491 (self.purpose as u8).hash(&mut hasher);
492 for consumer in &self.consumers {
493 (*consumer as u8).hash(&mut hasher);
494 }
495 for cs in &self.candidate_scores {
496 cs.id.hash(&mut hasher);
497 (cs.score as u64).hash(&mut hasher);
498 }
499 self.candidates_searched.hash(&mut hasher);
500 self.candidates_returned.hash(&mut hasher);
501 self.latency_ms.hash(&mut hasher);
502 self.timestamp.hash(&mut hasher);
503 format!("{:016x}", hasher.finish())
504 }
505
506 #[must_use]
517 pub fn matches_for_replay(&self, other: &Self) -> bool {
518 self.query_hash == other.query_hash
519 && self.embedding_input_hash == other.embedding_input_hash
520 && self.embedder_id == other.embedder_id
521 && self.embedder_settings_hash == other.embedder_settings_hash
522 && self.corpus_fingerprint == other.corpus_fingerprint
523 && self.policy_snapshot_hash == other.policy_snapshot_hash
524 && self.purpose == other.purpose
525 && self.consumers == other.consumers
526 && self.candidate_scores == other.candidate_scores
527 }
528
529 #[must_use]
531 pub fn summary(&self) -> String {
532 format!(
533 "Recall[query:{:.8}...][corpus:{:.8}...][{}/{} candidates][{}ms]",
534 self.query_hash,
535 self.corpus_fingerprint,
536 self.candidates_returned,
537 self.candidates_searched,
538 self.latency_ms
539 )
540 }
541}
542
543#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
545pub enum StopReason {
546 ReachedTopK,
548 BudgetExhausted,
550 BelowThreshold,
552 TokenLimitReached,
554 LatencyExceeded,
556 EmbedderNotDeterministic,
562 TenantScopeMissing,
567}
568
569pub fn recall_from_store(
584 store: &dyn ExperienceStore,
585 query: &RecallQuery,
586 policy: &RecallPolicy,
587) -> ExperienceStoreResult<Vec<RecallCandidate>> {
588 if !policy.enabled {
589 return Ok(Vec::new());
590 }
591
592 let event_query = EventQuery {
593 tenant_id: query.tenant_scope.as_deref().map(TenantId::new),
594 ..Default::default()
595 };
596
597 let records = store.query_records(&event_query)?;
598 let limit = query.top_k.min(policy.max_k_total);
599
600 let candidates = records
601 .iter()
602 .rev()
603 .filter_map(record_to_candidate)
604 .filter(|c| c.confidence >= policy.min_score_threshold)
605 .take(limit)
606 .map(|mut c| {
607 c.confidence = (c.confidence * policy.prior_weight).clamp(0.0, 1.0);
608 c
609 })
610 .collect();
611
612 Ok(candidates)
613}
614
615fn record_to_candidate(record: &ExperienceRecord) -> Option<RecallCandidate> {
616 match record {
617 ExperienceRecord::User(env) => match &env.event {
618 UserExperienceEvent::UserOverrideIssued { reason, .. } => Some(make_candidate(
619 env.event_id.as_str(),
620 env.occurred_at.as_str(),
621 format!("user override: {reason}"),
622 0.9,
623 CandidateSourceType::AntiPattern,
624 )),
625 UserExperienceEvent::UserApprovalGranted { reason, .. } => Some(make_candidate(
626 env.event_id.as_str(),
627 env.occurred_at.as_str(),
628 format!("user approval: {}", reason.as_deref().unwrap_or("granted")),
629 0.7,
630 CandidateSourceType::SimilarSuccess,
631 )),
632 },
633 ExperienceRecord::Engine(env) => match &env.event {
634 ExperienceEvent::OutcomeRecorded {
635 passed: false,
636 stop_reason,
637 ..
638 } => Some(make_candidate(
639 env.event_id.as_str(),
640 env.occurred_at.as_str(),
641 format!(
642 "outcome failed: {}",
643 stop_reason
644 .as_ref()
645 .map_or_else(|| "unspecified".to_string(), ToString::to_string)
646 ),
647 0.6,
648 CandidateSourceType::SimilarFailure,
649 )),
650 _ => None,
651 },
652 }
653}
654
655fn make_candidate(
656 id: &str,
657 occurred_at: &str,
658 summary: String,
659 confidence: f64,
660 source_type: CandidateSourceType,
661) -> RecallCandidate {
662 RecallCandidate {
663 id: id.to_string(),
664 summary,
665 raw_score: confidence,
666 final_score: confidence,
667 relevance: RelevanceLevel::from_score(confidence),
668 source_type,
669 provenance: CandidateProvenance {
670 created_at: occurred_at.to_string(),
671 source_chain_id: None,
672 source_step: None,
673 corpus_version: "experience-store-v0".to_string(),
674 },
675 confidence,
676 }
677}
678
679#[cfg(test)]
680mod tests {
681 use super::*;
682
683 #[test]
684 fn test_recall_policy_enabled() {
685 let policy = RecallPolicy::enabled();
686 assert!(policy.enabled);
687 }
688
689 #[test]
690 fn test_recall_policy_disabled() {
691 let policy = RecallPolicy::disabled();
692 assert!(!policy.enabled);
693 }
694
695 #[test]
696 fn test_relevance_from_score() {
697 assert_eq!(RelevanceLevel::from_score(0.9), RelevanceLevel::High);
698 assert_eq!(RelevanceLevel::from_score(0.6), RelevanceLevel::Medium);
699 assert_eq!(RelevanceLevel::from_score(0.3), RelevanceLevel::Low);
700 }
701
702 #[test]
703 fn test_recall_query_builder() {
704 let query = RecallQuery::new("test", 5)
705 .with_step_context(DecisionStep::Reasoning)
706 .with_tenant_scope("tenant-1");
707
708 assert_eq!(query.query_text, "test");
709 assert_eq!(query.top_k, 5);
710 assert_eq!(query.step_context, Some(DecisionStep::Reasoning));
711 assert_eq!(query.tenant_scope, Some("tenant-1".to_string()));
712 }
713
714 #[test]
715 fn test_recall_policy_defaults_to_runtime_only() {
716 let policy = RecallPolicy::default();
717 assert!(
718 policy
719 .allowed_uses
720 .contains(&RecallUse::RuntimeAugmentation),
721 "Default policy must allow RuntimeAugmentation"
722 );
723 assert!(
724 !policy
725 .allowed_uses
726 .contains(&RecallUse::TrainingCandidateSelection),
727 "Default policy must NOT allow TrainingCandidateSelection"
728 );
729 }
730
731 #[test]
732 fn test_recall_training_purpose_is_blocked_in_kernel() {
733 let policy = RecallPolicy {
734 allowed_uses: vec![RecallUse::RuntimeAugmentation],
735 ..Default::default()
736 };
737
738 assert!(
739 recall_use_allowed(&policy, RecallUse::RuntimeAugmentation),
740 "RuntimeAugmentation must be allowed"
741 );
742 assert!(
743 !recall_use_allowed(&policy, RecallUse::TrainingCandidateSelection),
744 "TrainingCandidateSelection must be blocked when not in allowed_uses"
745 );
746 }
747
748 #[test]
749 fn test_recall_training_can_be_explicitly_enabled() {
750 let policy = RecallPolicy {
751 allowed_uses: vec![
752 RecallUse::RuntimeAugmentation,
753 RecallUse::TrainingCandidateSelection,
754 ],
755 ..Default::default()
756 };
757
758 assert!(recall_use_allowed(&policy, RecallUse::RuntimeAugmentation));
759 assert!(recall_use_allowed(
760 &policy,
761 RecallUse::TrainingCandidateSelection
762 ));
763 }
764
765 #[test]
766 fn test_policy_hash_deterministic() {
767 let policy = RecallPolicy::default();
768 let hash1 = policy.snapshot_hash();
769 let hash2 = policy.snapshot_hash();
770 assert_eq!(hash1, hash2, "Same policy must produce same hash");
771 }
772
773 #[test]
774 fn test_policy_hash_changes_with_allowed_uses() {
775 let policy1 = RecallPolicy::default();
776 let policy2 = RecallPolicy {
777 allowed_uses: vec![
778 RecallUse::RuntimeAugmentation,
779 RecallUse::TrainingCandidateSelection,
780 ],
781 ..Default::default()
782 };
783
784 assert_ne!(
785 policy1.snapshot_hash(),
786 policy2.snapshot_hash(),
787 "Different allowed_uses must produce different hash"
788 );
789 }
790
791 #[test]
792 fn test_recall_query_hash_deterministic() {
793 let query = RecallQuery::new("test query", 5);
794 let hash1 = query.query_hash();
795 let hash2 = query.query_hash();
796 assert_eq!(hash1, hash2, "Same query must produce same hash");
797 }
798
799 #[test]
800 fn test_recall_provenance_matches_for_replay() {
801 let env = RecallProvenanceEnvelope {
802 query_hash: "q".to_string(),
803 embedding_input_hash: "e".to_string(),
804 embedding_hash: "h".to_string(),
805 embedder_id: "id".to_string(),
806 embedder_settings_hash: "s".to_string(),
807 corpus_fingerprint: "c".to_string(),
808 policy_snapshot_hash: "p".to_string(),
809 purpose: RecallUse::RuntimeAugmentation,
810 consumers: vec![RecallConsumer::Kernel],
811 candidate_scores: vec![],
812 candidates_searched: 10,
813 candidates_returned: 2,
814 stop_reason: None,
815 latency_ms: 10,
816 timestamp: "t".to_string(),
817 signature: "unsigned".to_string(),
818 };
819
820 assert!(env.matches_for_replay(&env.clone()));
822
823 let mut env2 = env.clone();
825 env2.purpose = RecallUse::TrainingCandidateSelection;
826 assert!(
827 !env.matches_for_replay(&env2),
828 "Different purpose must not match"
829 );
830
831 let mut env3 = env.clone();
833 env3.consumers = vec![RecallConsumer::Trainer];
834 assert!(
835 !env.matches_for_replay(&env3),
836 "Different consumers must not match"
837 );
838 }
839}