1use crate::kernel_boundary::DecisionStep;
29use serde::{Deserialize, Serialize};
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
41pub enum RecallUse {
42 RuntimeAugmentation,
44 TrainingCandidateSelection,
46}
47
48impl Default for RecallUse {
49 fn default() -> Self {
50 Self::RuntimeAugmentation
51 }
52}
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
59pub enum RecallConsumer {
60 Kernel,
62 Analytics,
64 Trainer,
66}
67
68impl Default for RecallConsumer {
69 fn default() -> Self {
70 Self::Kernel
71 }
72}
73
74#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct RecallPolicy {
84 pub enabled: bool,
86 pub max_k_total: usize,
88 pub max_tokens_injection: usize,
90 pub min_score_threshold: f64,
92 pub budgets: RecallBudgets,
94 #[serde(default = "default_allowed_uses")]
99 pub allowed_uses: Vec<RecallUse>,
100}
101
102fn default_allowed_uses() -> Vec<RecallUse> {
103 vec![RecallUse::RuntimeAugmentation]
104}
105
106impl Default for RecallPolicy {
107 fn default() -> Self {
108 Self {
109 enabled: false,
110 max_k_total: 5,
111 max_tokens_injection: 500,
112 min_score_threshold: 0.5,
113 budgets: RecallBudgets::default(),
114 allowed_uses: default_allowed_uses(),
115 }
116 }
117}
118
119impl RecallPolicy {
120 #[must_use]
122 pub fn enabled() -> Self {
123 Self {
124 enabled: true,
125 ..Default::default()
126 }
127 }
128
129 #[must_use]
131 pub fn disabled() -> Self {
132 Self::default()
133 }
134
135 #[must_use]
140 pub fn is_use_allowed(&self, purpose: RecallUse) -> bool {
141 self.allowed_uses.contains(&purpose)
142 }
143
144 #[must_use]
149 pub fn snapshot_hash(&self) -> String {
150 use std::collections::hash_map::DefaultHasher;
151 use std::hash::{Hash, Hasher};
152
153 let mut hasher = DefaultHasher::new();
154 self.enabled.hash(&mut hasher);
155 self.max_k_total.hash(&mut hasher);
156 self.max_tokens_injection.hash(&mut hasher);
157 (self.min_score_threshold as u64).hash(&mut hasher);
158 self.budgets.max_latency_ms.hash(&mut hasher);
159 self.budgets.max_embedding_calls.hash(&mut hasher);
160 self.budgets.max_tokens_per_candidate.hash(&mut hasher);
161 for use_type in &self.allowed_uses {
162 (*use_type as u8).hash(&mut hasher);
163 }
164 format!("{:016x}", hasher.finish())
165 }
166}
167
168#[must_use]
173pub fn recall_use_allowed(policy: &RecallPolicy, purpose: RecallUse) -> bool {
174 policy.is_use_allowed(purpose)
175}
176
177#[derive(Debug, Clone, Serialize, Deserialize)]
179pub struct RecallBudgets {
180 pub max_latency_ms: u64,
182 pub max_embedding_calls: usize,
184 pub max_tokens_per_candidate: usize,
186}
187
188impl Default for RecallBudgets {
189 fn default() -> Self {
190 Self {
191 max_latency_ms: 100,
192 max_embedding_calls: 3,
193 max_tokens_per_candidate: 100,
194 }
195 }
196}
197
198#[derive(Debug, Clone, Serialize, Deserialize)]
204pub struct RecallQuery {
205 pub query_text: String,
207 pub top_k: usize,
209 pub step_context: Option<DecisionStep>,
211 pub tenant_scope: Option<String>,
213}
214
215impl RecallQuery {
216 #[must_use]
218 pub fn new(query_text: impl Into<String>, top_k: usize) -> Self {
219 Self {
220 query_text: query_text.into(),
221 top_k,
222 step_context: None,
223 tenant_scope: None,
224 }
225 }
226
227 #[must_use]
229 pub fn with_step_context(mut self, step: DecisionStep) -> Self {
230 self.step_context = Some(step);
231 self
232 }
233
234 #[must_use]
236 pub fn with_tenant_scope(mut self, tenant: impl Into<String>) -> Self {
237 self.tenant_scope = Some(tenant.into());
238 self
239 }
240
241 #[must_use]
243 pub fn query_hash(&self) -> String {
244 use std::collections::hash_map::DefaultHasher;
245 use std::hash::{Hash, Hasher};
246
247 let mut hasher = DefaultHasher::new();
248 self.query_text.hash(&mut hasher);
249 self.top_k.hash(&mut hasher);
250 if let Some(ref step) = self.step_context {
251 step.as_str().hash(&mut hasher);
252 }
253 if let Some(ref tenant) = self.tenant_scope {
254 tenant.hash(&mut hasher);
255 }
256 format!("{:016x}", hasher.finish())
257 }
258}
259
260#[derive(Debug, Clone, Serialize, Deserialize)]
262pub struct RecallCandidate {
263 pub id: String,
265 pub summary: String,
267 pub raw_score: f64,
269 pub final_score: f64,
271 pub relevance: RelevanceLevel,
273 pub source_type: CandidateSourceType,
275 pub provenance: CandidateProvenance,
277}
278
279#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
281pub enum RelevanceLevel {
282 High,
283 Medium,
284 Low,
285}
286
287impl RelevanceLevel {
288 #[must_use]
290 pub fn from_score(score: f64) -> Self {
291 if score >= 0.8 {
292 Self::High
293 } else if score >= 0.5 {
294 Self::Medium
295 } else {
296 Self::Low
297 }
298 }
299
300 #[must_use]
302 pub fn as_str(&self) -> &'static str {
303 match self {
304 Self::High => "high",
305 Self::Medium => "medium",
306 Self::Low => "low",
307 }
308 }
309}
310
311#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
313pub enum CandidateSourceType {
314 SimilarFailure,
315 SimilarSuccess,
316 Runbook,
317 AdapterConfig,
318 AntiPattern,
319}
320
321#[derive(Debug, Clone, Serialize, Deserialize)]
323pub struct CandidateProvenance {
324 pub created_at: String,
326 pub source_chain_id: Option<String>,
328 pub source_step: Option<DecisionStep>,
330 pub corpus_version: String,
332}
333
334#[derive(Debug, Clone, Serialize, Deserialize)]
340pub struct RecallTraceLink {
341 pub embedding_hash: String,
343 pub corpus_version: String,
345 pub embedder_id: String,
347 pub candidates_searched: usize,
349 pub candidates_returned: usize,
351 pub latency_ms: u64,
353}
354
355#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
357pub struct CandidateScore {
358 pub id: String,
360 pub score: f64,
362}
363
364#[derive(Debug, Clone, Serialize, Deserialize)]
373pub struct RecallProvenanceEnvelope {
374 pub query_hash: String,
377
378 pub embedding_input_hash: String,
381
382 pub embedding_hash: String,
384
385 pub embedder_id: String,
388
389 pub embedder_settings_hash: String,
391
392 pub corpus_fingerprint: String,
395
396 pub policy_snapshot_hash: String,
399
400 #[serde(default)]
405 pub purpose: RecallUse,
406
407 #[serde(default)]
412 pub consumers: Vec<RecallConsumer>,
413
414 pub candidate_scores: Vec<CandidateScore>,
418
419 pub candidates_searched: usize,
421
422 pub candidates_returned: usize,
424
425 pub stop_reason: Option<StopReason>,
427
428 pub latency_ms: u64,
431
432 pub timestamp: String,
434
435 #[serde(default = "default_signature")]
439 pub signature: String,
440}
441
442fn default_signature() -> String {
443 "unsigned".to_string()
444}
445
446impl RecallProvenanceEnvelope {
447 #[must_use]
451 pub fn envelope_hash(&self) -> String {
452 use std::collections::hash_map::DefaultHasher;
453 use std::hash::{Hash, Hasher};
454
455 let mut hasher = DefaultHasher::new();
456 self.query_hash.hash(&mut hasher);
457 self.embedding_input_hash.hash(&mut hasher);
458 self.embedding_hash.hash(&mut hasher);
459 self.embedder_id.hash(&mut hasher);
460 self.embedder_settings_hash.hash(&mut hasher);
461 self.corpus_fingerprint.hash(&mut hasher);
462 self.policy_snapshot_hash.hash(&mut hasher);
463 (self.purpose as u8).hash(&mut hasher);
464 for consumer in &self.consumers {
465 (*consumer as u8).hash(&mut hasher);
466 }
467 for cs in &self.candidate_scores {
468 cs.id.hash(&mut hasher);
469 (cs.score as u64).hash(&mut hasher);
470 }
471 self.candidates_searched.hash(&mut hasher);
472 self.candidates_returned.hash(&mut hasher);
473 self.latency_ms.hash(&mut hasher);
474 self.timestamp.hash(&mut hasher);
475 format!("{:016x}", hasher.finish())
476 }
477
478 #[must_use]
489 pub fn matches_for_replay(&self, other: &Self) -> bool {
490 self.query_hash == other.query_hash
491 && self.embedding_input_hash == other.embedding_input_hash
492 && self.embedder_id == other.embedder_id
493 && self.embedder_settings_hash == other.embedder_settings_hash
494 && self.corpus_fingerprint == other.corpus_fingerprint
495 && self.policy_snapshot_hash == other.policy_snapshot_hash
496 && self.purpose == other.purpose
497 && self.consumers == other.consumers
498 && self.candidate_scores == other.candidate_scores
499 }
500
501 #[must_use]
503 pub fn summary(&self) -> String {
504 format!(
505 "Recall[query:{:.8}...][corpus:{:.8}...][{}/{} candidates][{}ms]",
506 self.query_hash,
507 self.corpus_fingerprint,
508 self.candidates_returned,
509 self.candidates_searched,
510 self.latency_ms
511 )
512 }
513}
514
515#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
517pub enum StopReason {
518 ReachedTopK,
520 BudgetExhausted,
522 BelowThreshold,
524 TokenLimitReached,
526 LatencyExceeded,
528 EmbedderNotDeterministic,
534 TenantScopeMissing,
539}
540
541#[cfg(test)]
542mod tests {
543 use super::*;
544
545 #[test]
546 fn test_recall_policy_enabled() {
547 let policy = RecallPolicy::enabled();
548 assert!(policy.enabled);
549 }
550
551 #[test]
552 fn test_recall_policy_disabled() {
553 let policy = RecallPolicy::disabled();
554 assert!(!policy.enabled);
555 }
556
557 #[test]
558 fn test_relevance_from_score() {
559 assert_eq!(RelevanceLevel::from_score(0.9), RelevanceLevel::High);
560 assert_eq!(RelevanceLevel::from_score(0.6), RelevanceLevel::Medium);
561 assert_eq!(RelevanceLevel::from_score(0.3), RelevanceLevel::Low);
562 }
563
564 #[test]
565 fn test_recall_query_builder() {
566 let query = RecallQuery::new("test", 5)
567 .with_step_context(DecisionStep::Reasoning)
568 .with_tenant_scope("tenant-1");
569
570 assert_eq!(query.query_text, "test");
571 assert_eq!(query.top_k, 5);
572 assert_eq!(query.step_context, Some(DecisionStep::Reasoning));
573 assert_eq!(query.tenant_scope, Some("tenant-1".to_string()));
574 }
575
576 #[test]
577 fn test_recall_policy_defaults_to_runtime_only() {
578 let policy = RecallPolicy::default();
579 assert!(
580 policy
581 .allowed_uses
582 .contains(&RecallUse::RuntimeAugmentation),
583 "Default policy must allow RuntimeAugmentation"
584 );
585 assert!(
586 !policy
587 .allowed_uses
588 .contains(&RecallUse::TrainingCandidateSelection),
589 "Default policy must NOT allow TrainingCandidateSelection"
590 );
591 }
592
593 #[test]
594 fn test_recall_training_purpose_is_blocked_in_kernel() {
595 let policy = RecallPolicy {
596 allowed_uses: vec![RecallUse::RuntimeAugmentation],
597 ..Default::default()
598 };
599
600 assert!(
601 recall_use_allowed(&policy, RecallUse::RuntimeAugmentation),
602 "RuntimeAugmentation must be allowed"
603 );
604 assert!(
605 !recall_use_allowed(&policy, RecallUse::TrainingCandidateSelection),
606 "TrainingCandidateSelection must be blocked when not in allowed_uses"
607 );
608 }
609
610 #[test]
611 fn test_recall_training_can_be_explicitly_enabled() {
612 let policy = RecallPolicy {
613 allowed_uses: vec![
614 RecallUse::RuntimeAugmentation,
615 RecallUse::TrainingCandidateSelection,
616 ],
617 ..Default::default()
618 };
619
620 assert!(recall_use_allowed(&policy, RecallUse::RuntimeAugmentation));
621 assert!(recall_use_allowed(
622 &policy,
623 RecallUse::TrainingCandidateSelection
624 ));
625 }
626
627 #[test]
628 fn test_policy_hash_deterministic() {
629 let policy = RecallPolicy::default();
630 let hash1 = policy.snapshot_hash();
631 let hash2 = policy.snapshot_hash();
632 assert_eq!(hash1, hash2, "Same policy must produce same hash");
633 }
634
635 #[test]
636 fn test_policy_hash_changes_with_allowed_uses() {
637 let policy1 = RecallPolicy::default();
638 let policy2 = RecallPolicy {
639 allowed_uses: vec![
640 RecallUse::RuntimeAugmentation,
641 RecallUse::TrainingCandidateSelection,
642 ],
643 ..Default::default()
644 };
645
646 assert_ne!(
647 policy1.snapshot_hash(),
648 policy2.snapshot_hash(),
649 "Different allowed_uses must produce different hash"
650 );
651 }
652
653 #[test]
654 fn test_recall_query_hash_deterministic() {
655 let query = RecallQuery::new("test query", 5);
656 let hash1 = query.query_hash();
657 let hash2 = query.query_hash();
658 assert_eq!(hash1, hash2, "Same query must produce same hash");
659 }
660
661 #[test]
662 fn test_recall_provenance_matches_for_replay() {
663 let env = RecallProvenanceEnvelope {
664 query_hash: "q".to_string(),
665 embedding_input_hash: "e".to_string(),
666 embedding_hash: "h".to_string(),
667 embedder_id: "id".to_string(),
668 embedder_settings_hash: "s".to_string(),
669 corpus_fingerprint: "c".to_string(),
670 policy_snapshot_hash: "p".to_string(),
671 purpose: RecallUse::RuntimeAugmentation,
672 consumers: vec![RecallConsumer::Kernel],
673 candidate_scores: vec![],
674 candidates_searched: 10,
675 candidates_returned: 2,
676 stop_reason: None,
677 latency_ms: 10,
678 timestamp: "t".to_string(),
679 signature: "unsigned".to_string(),
680 };
681
682 assert!(env.matches_for_replay(&env.clone()));
684
685 let mut env2 = env.clone();
687 env2.purpose = RecallUse::TrainingCandidateSelection;
688 assert!(
689 !env.matches_for_replay(&env2),
690 "Different purpose must not match"
691 );
692
693 let mut env3 = env.clone();
695 env3.consumers = vec![RecallConsumer::Trainer];
696 assert!(
697 !env.matches_for_replay(&env3),
698 "Different consumers must not match"
699 );
700 }
701}