use crate::kernel_boundary::DecisionStep;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum RecallUse {
RuntimeAugmentation,
TrainingCandidateSelection,
}
impl Default for RecallUse {
fn default() -> Self {
Self::RuntimeAugmentation
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum RecallConsumer {
Kernel,
Analytics,
Trainer,
}
impl Default for RecallConsumer {
fn default() -> Self {
Self::Kernel
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecallPolicy {
pub enabled: bool,
pub max_k_total: usize,
pub max_tokens_injection: usize,
pub min_score_threshold: f64,
pub budgets: RecallBudgets,
#[serde(default = "default_allowed_uses")]
pub allowed_uses: Vec<RecallUse>,
}
fn default_allowed_uses() -> Vec<RecallUse> {
vec![RecallUse::RuntimeAugmentation]
}
impl Default for RecallPolicy {
fn default() -> Self {
Self {
enabled: false,
max_k_total: 5,
max_tokens_injection: 500,
min_score_threshold: 0.5,
budgets: RecallBudgets::default(),
allowed_uses: default_allowed_uses(),
}
}
}
impl RecallPolicy {
#[must_use]
pub fn enabled() -> Self {
Self {
enabled: true,
..Default::default()
}
}
#[must_use]
pub fn disabled() -> Self {
Self::default()
}
#[must_use]
pub fn is_use_allowed(&self, purpose: RecallUse) -> bool {
self.allowed_uses.contains(&purpose)
}
#[must_use]
pub fn snapshot_hash(&self) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
self.enabled.hash(&mut hasher);
self.max_k_total.hash(&mut hasher);
self.max_tokens_injection.hash(&mut hasher);
(self.min_score_threshold as u64).hash(&mut hasher);
self.budgets.max_latency_ms.hash(&mut hasher);
self.budgets.max_embedding_calls.hash(&mut hasher);
self.budgets.max_tokens_per_candidate.hash(&mut hasher);
for use_type in &self.allowed_uses {
(*use_type as u8).hash(&mut hasher);
}
format!("{:016x}", hasher.finish())
}
}
#[must_use]
pub fn recall_use_allowed(policy: &RecallPolicy, purpose: RecallUse) -> bool {
policy.is_use_allowed(purpose)
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecallBudgets {
pub max_latency_ms: u64,
pub max_embedding_calls: usize,
pub max_tokens_per_candidate: usize,
}
impl Default for RecallBudgets {
fn default() -> Self {
Self {
max_latency_ms: 100,
max_embedding_calls: 3,
max_tokens_per_candidate: 100,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecallQuery {
pub query_text: String,
pub top_k: usize,
pub step_context: Option<DecisionStep>,
pub tenant_scope: Option<String>,
}
impl RecallQuery {
#[must_use]
pub fn new(query_text: impl Into<String>, top_k: usize) -> Self {
Self {
query_text: query_text.into(),
top_k,
step_context: None,
tenant_scope: None,
}
}
#[must_use]
pub fn with_step_context(mut self, step: DecisionStep) -> Self {
self.step_context = Some(step);
self
}
#[must_use]
pub fn with_tenant_scope(mut self, tenant: impl Into<String>) -> Self {
self.tenant_scope = Some(tenant.into());
self
}
#[must_use]
pub fn query_hash(&self) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
self.query_text.hash(&mut hasher);
self.top_k.hash(&mut hasher);
if let Some(ref step) = self.step_context {
step.as_str().hash(&mut hasher);
}
if let Some(ref tenant) = self.tenant_scope {
tenant.hash(&mut hasher);
}
format!("{:016x}", hasher.finish())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecallCandidate {
pub id: String,
pub summary: String,
pub raw_score: f64,
pub final_score: f64,
pub relevance: RelevanceLevel,
pub source_type: CandidateSourceType,
pub provenance: CandidateProvenance,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum RelevanceLevel {
High,
Medium,
Low,
}
impl RelevanceLevel {
#[must_use]
pub fn from_score(score: f64) -> Self {
if score >= 0.8 {
Self::High
} else if score >= 0.5 {
Self::Medium
} else {
Self::Low
}
}
#[must_use]
pub fn as_str(&self) -> &'static str {
match self {
Self::High => "high",
Self::Medium => "medium",
Self::Low => "low",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum CandidateSourceType {
SimilarFailure,
SimilarSuccess,
Runbook,
AdapterConfig,
AntiPattern,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CandidateProvenance {
pub created_at: String,
pub source_chain_id: Option<String>,
pub source_step: Option<DecisionStep>,
pub corpus_version: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecallTraceLink {
pub embedding_hash: String,
pub corpus_version: String,
pub embedder_id: String,
pub candidates_searched: usize,
pub candidates_returned: usize,
pub latency_ms: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct CandidateScore {
pub id: String,
pub score: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecallProvenanceEnvelope {
pub query_hash: String,
pub embedding_input_hash: String,
pub embedding_hash: String,
pub embedder_id: String,
pub embedder_settings_hash: String,
pub corpus_fingerprint: String,
pub policy_snapshot_hash: String,
#[serde(default)]
pub purpose: RecallUse,
#[serde(default)]
pub consumers: Vec<RecallConsumer>,
pub candidate_scores: Vec<CandidateScore>,
pub candidates_searched: usize,
pub candidates_returned: usize,
pub stop_reason: Option<StopReason>,
pub latency_ms: u64,
pub timestamp: String,
#[serde(default = "default_signature")]
pub signature: String,
}
fn default_signature() -> String {
"unsigned".to_string()
}
impl RecallProvenanceEnvelope {
#[must_use]
pub fn envelope_hash(&self) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
self.query_hash.hash(&mut hasher);
self.embedding_input_hash.hash(&mut hasher);
self.embedding_hash.hash(&mut hasher);
self.embedder_id.hash(&mut hasher);
self.embedder_settings_hash.hash(&mut hasher);
self.corpus_fingerprint.hash(&mut hasher);
self.policy_snapshot_hash.hash(&mut hasher);
(self.purpose as u8).hash(&mut hasher);
for consumer in &self.consumers {
(*consumer as u8).hash(&mut hasher);
}
for cs in &self.candidate_scores {
cs.id.hash(&mut hasher);
(cs.score as u64).hash(&mut hasher);
}
self.candidates_searched.hash(&mut hasher);
self.candidates_returned.hash(&mut hasher);
self.latency_ms.hash(&mut hasher);
self.timestamp.hash(&mut hasher);
format!("{:016x}", hasher.finish())
}
#[must_use]
pub fn matches_for_replay(&self, other: &Self) -> bool {
self.query_hash == other.query_hash
&& self.embedding_input_hash == other.embedding_input_hash
&& self.embedder_id == other.embedder_id
&& self.embedder_settings_hash == other.embedder_settings_hash
&& self.corpus_fingerprint == other.corpus_fingerprint
&& self.policy_snapshot_hash == other.policy_snapshot_hash
&& self.purpose == other.purpose
&& self.consumers == other.consumers
&& self.candidate_scores == other.candidate_scores
}
#[must_use]
pub fn summary(&self) -> String {
format!(
"Recall[query:{:.8}...][corpus:{:.8}...][{}/{} candidates][{}ms]",
self.query_hash,
self.corpus_fingerprint,
self.candidates_returned,
self.candidates_searched,
self.latency_ms
)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum StopReason {
ReachedTopK,
BudgetExhausted,
BelowThreshold,
TokenLimitReached,
LatencyExceeded,
EmbedderNotDeterministic,
TenantScopeMissing,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_recall_policy_enabled() {
let policy = RecallPolicy::enabled();
assert!(policy.enabled);
}
#[test]
fn test_recall_policy_disabled() {
let policy = RecallPolicy::disabled();
assert!(!policy.enabled);
}
#[test]
fn test_relevance_from_score() {
assert_eq!(RelevanceLevel::from_score(0.9), RelevanceLevel::High);
assert_eq!(RelevanceLevel::from_score(0.6), RelevanceLevel::Medium);
assert_eq!(RelevanceLevel::from_score(0.3), RelevanceLevel::Low);
}
#[test]
fn test_recall_query_builder() {
let query = RecallQuery::new("test", 5)
.with_step_context(DecisionStep::Reasoning)
.with_tenant_scope("tenant-1");
assert_eq!(query.query_text, "test");
assert_eq!(query.top_k, 5);
assert_eq!(query.step_context, Some(DecisionStep::Reasoning));
assert_eq!(query.tenant_scope, Some("tenant-1".to_string()));
}
#[test]
fn test_recall_policy_defaults_to_runtime_only() {
let policy = RecallPolicy::default();
assert!(
policy
.allowed_uses
.contains(&RecallUse::RuntimeAugmentation),
"Default policy must allow RuntimeAugmentation"
);
assert!(
!policy
.allowed_uses
.contains(&RecallUse::TrainingCandidateSelection),
"Default policy must NOT allow TrainingCandidateSelection"
);
}
#[test]
fn test_recall_training_purpose_is_blocked_in_kernel() {
let policy = RecallPolicy {
allowed_uses: vec![RecallUse::RuntimeAugmentation],
..Default::default()
};
assert!(
recall_use_allowed(&policy, RecallUse::RuntimeAugmentation),
"RuntimeAugmentation must be allowed"
);
assert!(
!recall_use_allowed(&policy, RecallUse::TrainingCandidateSelection),
"TrainingCandidateSelection must be blocked when not in allowed_uses"
);
}
#[test]
fn test_recall_training_can_be_explicitly_enabled() {
let policy = RecallPolicy {
allowed_uses: vec![
RecallUse::RuntimeAugmentation,
RecallUse::TrainingCandidateSelection,
],
..Default::default()
};
assert!(recall_use_allowed(&policy, RecallUse::RuntimeAugmentation));
assert!(recall_use_allowed(
&policy,
RecallUse::TrainingCandidateSelection
));
}
#[test]
fn test_policy_hash_deterministic() {
let policy = RecallPolicy::default();
let hash1 = policy.snapshot_hash();
let hash2 = policy.snapshot_hash();
assert_eq!(hash1, hash2, "Same policy must produce same hash");
}
#[test]
fn test_policy_hash_changes_with_allowed_uses() {
let policy1 = RecallPolicy::default();
let policy2 = RecallPolicy {
allowed_uses: vec![
RecallUse::RuntimeAugmentation,
RecallUse::TrainingCandidateSelection,
],
..Default::default()
};
assert_ne!(
policy1.snapshot_hash(),
policy2.snapshot_hash(),
"Different allowed_uses must produce different hash"
);
}
#[test]
fn test_recall_query_hash_deterministic() {
let query = RecallQuery::new("test query", 5);
let hash1 = query.query_hash();
let hash2 = query.query_hash();
assert_eq!(hash1, hash2, "Same query must produce same hash");
}
#[test]
fn test_recall_provenance_matches_for_replay() {
let env = RecallProvenanceEnvelope {
query_hash: "q".to_string(),
embedding_input_hash: "e".to_string(),
embedding_hash: "h".to_string(),
embedder_id: "id".to_string(),
embedder_settings_hash: "s".to_string(),
corpus_fingerprint: "c".to_string(),
policy_snapshot_hash: "p".to_string(),
purpose: RecallUse::RuntimeAugmentation,
consumers: vec![RecallConsumer::Kernel],
candidate_scores: vec![],
candidates_searched: 10,
candidates_returned: 2,
stop_reason: None,
latency_ms: 10,
timestamp: "t".to_string(),
signature: "unsigned".to_string(),
};
assert!(env.matches_for_replay(&env.clone()));
let mut env2 = env.clone();
env2.purpose = RecallUse::TrainingCandidateSelection;
assert!(
!env.matches_for_replay(&env2),
"Different purpose must not match"
);
let mut env3 = env.clone();
env3.consumers = vec![RecallConsumer::Trainer];
assert!(
!env.matches_for_replay(&env3),
"Different consumers must not match"
);
}
}