use crate::experience_store::{
EventQuery, ExperienceEvent, ExperienceRecord, ExperienceStore, ExperienceStoreResult,
UserExperienceEvent,
};
use crate::kernel_boundary::DecisionStep;
use crate::types::TenantId;
use converge_pack::UnitInterval;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum RecallUse {
RuntimeAugmentation,
TrainingCandidateSelection,
}
impl Default for RecallUse {
fn default() -> Self {
Self::RuntimeAugmentation
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum RecallConsumer {
Kernel,
Analytics,
Trainer,
}
impl Default for RecallConsumer {
fn default() -> Self {
Self::Kernel
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecallPolicy {
pub enabled: bool,
pub max_k_total: usize,
pub max_tokens_injection: usize,
pub min_score_threshold: UnitInterval,
pub budgets: RecallBudgets,
#[serde(default = "default_allowed_uses")]
pub allowed_uses: Vec<RecallUse>,
#[serde(default = "default_prior_weight")]
pub prior_weight: UnitInterval,
}
fn default_prior_weight() -> UnitInterval {
UnitInterval::ONE
}
fn default_allowed_uses() -> Vec<RecallUse> {
vec![RecallUse::RuntimeAugmentation]
}
impl Default for RecallPolicy {
fn default() -> Self {
Self {
enabled: false,
max_k_total: 5,
max_tokens_injection: 500,
min_score_threshold: UnitInterval::clamped(0.5),
budgets: RecallBudgets::default(),
allowed_uses: default_allowed_uses(),
prior_weight: default_prior_weight(),
}
}
}
impl RecallPolicy {
#[must_use]
pub fn enabled() -> Self {
Self {
enabled: true,
..Default::default()
}
}
#[must_use]
pub fn disabled() -> Self {
Self::default()
}
#[must_use]
pub fn is_use_allowed(&self, purpose: RecallUse) -> bool {
self.allowed_uses.contains(&purpose)
}
#[must_use]
pub fn snapshot_hash(&self) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
self.enabled.hash(&mut hasher);
self.max_k_total.hash(&mut hasher);
self.max_tokens_injection.hash(&mut hasher);
self.min_score_threshold.to_basis_points().hash(&mut hasher);
self.budgets.max_latency_ms.hash(&mut hasher);
self.budgets.max_embedding_calls.hash(&mut hasher);
self.budgets.max_tokens_per_candidate.hash(&mut hasher);
for use_type in &self.allowed_uses {
(*use_type as u8).hash(&mut hasher);
}
self.prior_weight.to_basis_points().hash(&mut hasher);
format!("{:016x}", hasher.finish())
}
}
#[must_use]
pub fn recall_use_allowed(policy: &RecallPolicy, purpose: RecallUse) -> bool {
policy.is_use_allowed(purpose)
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecallBudgets {
pub max_latency_ms: u64,
pub max_embedding_calls: usize,
pub max_tokens_per_candidate: usize,
}
impl Default for RecallBudgets {
fn default() -> Self {
Self {
max_latency_ms: 100,
max_embedding_calls: 3,
max_tokens_per_candidate: 100,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecallQuery {
pub query_text: String,
pub top_k: usize,
pub step_context: Option<DecisionStep>,
pub tenant_scope: Option<String>,
}
impl RecallQuery {
#[must_use]
pub fn new(query_text: impl Into<String>, top_k: usize) -> Self {
Self {
query_text: query_text.into(),
top_k,
step_context: None,
tenant_scope: None,
}
}
#[must_use]
pub fn with_step_context(mut self, step: DecisionStep) -> Self {
self.step_context = Some(step);
self
}
#[must_use]
pub fn with_tenant_scope(mut self, tenant: impl Into<String>) -> Self {
self.tenant_scope = Some(tenant.into());
self
}
#[must_use]
pub fn query_hash(&self) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
self.query_text.hash(&mut hasher);
self.top_k.hash(&mut hasher);
if let Some(ref step) = self.step_context {
step.as_str().hash(&mut hasher);
}
if let Some(ref tenant) = self.tenant_scope {
tenant.hash(&mut hasher);
}
format!("{:016x}", hasher.finish())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecallCandidate {
pub id: String,
pub summary: String,
pub raw_score: UnitInterval,
pub final_score: UnitInterval,
pub relevance: RelevanceLevel,
pub source_type: CandidateSourceType,
pub provenance: CandidateProvenance,
#[serde(default = "default_candidate_confidence")]
pub confidence: UnitInterval,
}
fn default_candidate_confidence() -> UnitInterval {
UnitInterval::clamped(0.5)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum RelevanceLevel {
High,
Medium,
Low,
}
impl RelevanceLevel {
#[must_use]
pub fn from_score(score: UnitInterval) -> Self {
let score = score.as_f64();
if score >= 0.8 {
Self::High
} else if score >= 0.5 {
Self::Medium
} else {
Self::Low
}
}
#[must_use]
pub fn as_str(&self) -> &'static str {
match self {
Self::High => "high",
Self::Medium => "medium",
Self::Low => "low",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum CandidateSourceType {
SimilarFailure,
SimilarSuccess,
Runbook,
AdapterConfig,
AntiPattern,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CandidateProvenance {
pub created_at: String,
pub source_chain_id: Option<String>,
pub source_step: Option<DecisionStep>,
pub corpus_version: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecallTraceLink {
pub embedding_hash: String,
pub corpus_version: String,
pub embedder_id: String,
pub candidates_searched: usize,
pub candidates_returned: usize,
pub latency_ms: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct CandidateScore {
pub id: String,
pub score: UnitInterval,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecallProvenanceEnvelope {
pub query_hash: String,
pub embedding_input_hash: String,
pub embedding_hash: String,
pub embedder_id: String,
pub embedder_settings_hash: String,
pub corpus_fingerprint: String,
pub policy_snapshot_hash: String,
#[serde(default)]
pub purpose: RecallUse,
#[serde(default)]
pub consumers: Vec<RecallConsumer>,
pub candidate_scores: Vec<CandidateScore>,
pub candidates_searched: usize,
pub candidates_returned: usize,
pub stop_reason: Option<StopReason>,
pub latency_ms: u64,
pub timestamp: String,
#[serde(default = "default_signature")]
pub signature: String,
}
fn default_signature() -> String {
"unsigned".to_string()
}
impl RecallProvenanceEnvelope {
#[must_use]
pub fn envelope_hash(&self) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
self.query_hash.hash(&mut hasher);
self.embedding_input_hash.hash(&mut hasher);
self.embedding_hash.hash(&mut hasher);
self.embedder_id.hash(&mut hasher);
self.embedder_settings_hash.hash(&mut hasher);
self.corpus_fingerprint.hash(&mut hasher);
self.policy_snapshot_hash.hash(&mut hasher);
(self.purpose as u8).hash(&mut hasher);
for consumer in &self.consumers {
(*consumer as u8).hash(&mut hasher);
}
for cs in &self.candidate_scores {
cs.id.hash(&mut hasher);
cs.score.to_basis_points().hash(&mut hasher);
}
self.candidates_searched.hash(&mut hasher);
self.candidates_returned.hash(&mut hasher);
self.latency_ms.hash(&mut hasher);
self.timestamp.hash(&mut hasher);
format!("{:016x}", hasher.finish())
}
#[must_use]
pub fn matches_for_replay(&self, other: &Self) -> bool {
self.query_hash == other.query_hash
&& self.embedding_input_hash == other.embedding_input_hash
&& self.embedder_id == other.embedder_id
&& self.embedder_settings_hash == other.embedder_settings_hash
&& self.corpus_fingerprint == other.corpus_fingerprint
&& self.policy_snapshot_hash == other.policy_snapshot_hash
&& self.purpose == other.purpose
&& self.consumers == other.consumers
&& self.candidate_scores == other.candidate_scores
}
#[must_use]
pub fn summary(&self) -> String {
format!(
"Recall[query:{:.8}...][corpus:{:.8}...][{}/{} candidates][{}ms]",
self.query_hash,
self.corpus_fingerprint,
self.candidates_returned,
self.candidates_searched,
self.latency_ms
)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum StopReason {
ReachedTopK,
BudgetExhausted,
BelowThreshold,
TokenLimitReached,
LatencyExceeded,
EmbedderNotDeterministic,
TenantScopeMissing,
}
pub fn recall_from_store(
store: &dyn ExperienceStore,
query: &RecallQuery,
policy: &RecallPolicy,
) -> ExperienceStoreResult<Vec<RecallCandidate>> {
if !policy.enabled {
return Ok(Vec::new());
}
let event_query = EventQuery {
tenant_id: query.tenant_scope.as_deref().map(TenantId::new),
..Default::default()
};
let records = store.query_records(&event_query)?;
let limit = query.top_k.min(policy.max_k_total);
let candidates = records
.iter()
.rev()
.filter_map(record_to_candidate)
.filter(|c| c.confidence >= policy.min_score_threshold)
.take(limit)
.map(|mut c| {
c.confidence = c.confidence.scale_by(policy.prior_weight);
c
})
.collect();
Ok(candidates)
}
fn record_to_candidate(record: &ExperienceRecord) -> Option<RecallCandidate> {
match record {
ExperienceRecord::User(env) => match &env.event {
UserExperienceEvent::UserOverrideIssued { reason, .. } => Some(make_candidate(
env.event_id.as_str(),
env.occurred_at.as_str(),
format!("user override: {reason}"),
UnitInterval::clamped(0.9),
CandidateSourceType::AntiPattern,
)),
UserExperienceEvent::UserApprovalGranted { reason, .. } => Some(make_candidate(
env.event_id.as_str(),
env.occurred_at.as_str(),
format!("user approval: {}", reason.as_deref().unwrap_or("granted")),
UnitInterval::clamped(0.7),
CandidateSourceType::SimilarSuccess,
)),
UserExperienceEvent::UserApprovalRejected { reason, .. } => Some(make_candidate(
env.event_id.as_str(),
env.occurred_at.as_str(),
format!(
"user rejection: {}",
reason.as_deref().unwrap_or("declined")
),
UnitInterval::clamped(0.7),
CandidateSourceType::AntiPattern,
)),
UserExperienceEvent::UserCorrection { target, reason, .. } => Some(make_candidate(
env.event_id.as_str(),
env.occurred_at.as_str(),
format!("correction ({}): {reason}", target.kind_label()),
UnitInterval::clamped(0.85),
CandidateSourceType::Runbook,
)),
UserExperienceEvent::UserBoundaryAdjusted {
boundary,
target,
reason,
..
} => Some(make_candidate(
env.event_id.as_str(),
env.occurred_at.as_str(),
format!(
"{} boundary adjusted on {}: {reason}",
boundary_kind_label(*boundary),
boundary_target_label(target)
),
UnitInterval::clamped(0.8),
CandidateSourceType::Runbook,
)),
},
ExperienceRecord::Engine(env) => match &env.event {
ExperienceEvent::OutcomeRecorded {
passed: false,
stop_reason,
..
} => Some(make_candidate(
env.event_id.as_str(),
env.occurred_at.as_str(),
format!(
"outcome failed: {}",
stop_reason
.as_ref()
.map_or_else(|| "unspecified".to_string(), ToString::to_string)
),
UnitInterval::clamped(0.6),
CandidateSourceType::SimilarFailure,
)),
_ => None,
},
}
}
fn boundary_kind_label(kind: crate::BoundaryKind) -> &'static str {
match kind {
crate::BoundaryKind::Authority => "authority",
crate::BoundaryKind::Forbidden => "forbidden",
crate::BoundaryKind::Expiry => "expiry",
crate::BoundaryKind::Reversibility => "reversibility",
}
}
fn boundary_target_label(target: &crate::BoundaryTarget) -> String {
match target {
crate::BoundaryTarget::Pack { pack_id } => format!("pack:{}", pack_id.as_str()),
crate::BoundaryTarget::Intent { intent_id } => format!("intent:{}", intent_id.as_str()),
crate::BoundaryTarget::Global => "global".to_string(),
}
}
fn make_candidate(
id: &str,
occurred_at: &str,
summary: String,
confidence: UnitInterval,
source_type: CandidateSourceType,
) -> RecallCandidate {
RecallCandidate {
id: id.to_string(),
summary,
raw_score: confidence,
final_score: confidence,
relevance: RelevanceLevel::from_score(confidence),
source_type,
provenance: CandidateProvenance {
created_at: occurred_at.to_string(),
source_chain_id: None,
source_step: None,
corpus_version: "experience-store-v0".to_string(),
},
confidence,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
BoundaryKind, BoundaryTarget, ContentHash, CorrectionTarget, ExperienceRecord, FactContent,
FactContentKind, UserExperienceEventEnvelope,
};
fn candidate_for_user_event(event: UserExperienceEvent) -> RecallCandidate {
let envelope = UserExperienceEventEnvelope::new("evt-user", event);
record_to_candidate(&ExperienceRecord::User(envelope)).expect("candidate")
}
#[test]
fn test_recall_policy_enabled() {
let policy = RecallPolicy::enabled();
assert!(policy.enabled);
}
#[test]
fn test_recall_policy_disabled() {
let policy = RecallPolicy::disabled();
assert!(!policy.enabled);
}
#[test]
fn test_relevance_from_score() {
assert_eq!(
RelevanceLevel::from_score(UnitInterval::clamped(0.9)),
RelevanceLevel::High
);
assert_eq!(
RelevanceLevel::from_score(UnitInterval::clamped(0.6)),
RelevanceLevel::Medium
);
assert_eq!(
RelevanceLevel::from_score(UnitInterval::clamped(0.3)),
RelevanceLevel::Low
);
}
#[test]
fn test_recall_query_builder() {
let query = RecallQuery::new("test", 5)
.with_step_context(DecisionStep::Reasoning)
.with_tenant_scope("tenant-1");
assert_eq!(query.query_text, "test");
assert_eq!(query.top_k, 5);
assert_eq!(query.step_context, Some(DecisionStep::Reasoning));
assert_eq!(query.tenant_scope, Some("tenant-1".to_string()));
}
#[test]
fn recall_maps_rejected_user_approval_to_antipattern() {
let candidate = candidate_for_user_event(UserExperienceEvent::UserApprovalRejected {
gate_request_id: "gate-1".into(),
actor: "operator-1".into(),
policy_snapshot_hash: None,
reason: Some("risk too high".into()),
});
assert_eq!(candidate.summary, "user rejection: risk too high");
assert_eq!(candidate.confidence, UnitInterval::clamped(0.7));
assert_eq!(candidate.source_type, CandidateSourceType::AntiPattern);
}
#[test]
fn recall_maps_user_correction_to_runbook() {
let candidate = candidate_for_user_event(UserExperienceEvent::UserCorrection {
target: CorrectionTarget::Fact {
fact_id: "fact-1".into(),
},
actor: "operator-1".into(),
policy_snapshot_hash: None,
original_content: ContentHash::zero(),
corrected_content: FactContent::new(FactContentKind::Claim, "corrected"),
reason: "source was stale".into(),
});
assert_eq!(candidate.summary, "correction (fact): source was stale");
assert_eq!(candidate.confidence, UnitInterval::clamped(0.85));
assert_eq!(candidate.source_type, CandidateSourceType::Runbook);
}
#[test]
fn recall_maps_boundary_adjustment_to_scoped_runbook() {
let candidate = candidate_for_user_event(UserExperienceEvent::UserBoundaryAdjusted {
boundary: BoundaryKind::Authority,
target: BoundaryTarget::Pack {
pack_id: "loan-pack".into(),
},
actor: "operator-1".into(),
policy_snapshot_hash: None,
previous_value: serde_json::json!({"limit": 100}),
new_value: serde_json::json!({"limit": 50}),
reason: "manual review needed".into(),
});
assert_eq!(
candidate.summary,
"authority boundary adjusted on pack:loan-pack: manual review needed"
);
assert_eq!(candidate.confidence, UnitInterval::clamped(0.8));
assert_eq!(candidate.source_type, CandidateSourceType::Runbook);
}
#[test]
fn test_recall_policy_defaults_to_runtime_only() {
let policy = RecallPolicy::default();
assert!(
policy
.allowed_uses
.contains(&RecallUse::RuntimeAugmentation),
"Default policy must allow RuntimeAugmentation"
);
assert!(
!policy
.allowed_uses
.contains(&RecallUse::TrainingCandidateSelection),
"Default policy must NOT allow TrainingCandidateSelection"
);
}
#[test]
fn test_recall_training_purpose_is_blocked_in_kernel() {
let policy = RecallPolicy {
allowed_uses: vec![RecallUse::RuntimeAugmentation],
..Default::default()
};
assert!(
recall_use_allowed(&policy, RecallUse::RuntimeAugmentation),
"RuntimeAugmentation must be allowed"
);
assert!(
!recall_use_allowed(&policy, RecallUse::TrainingCandidateSelection),
"TrainingCandidateSelection must be blocked when not in allowed_uses"
);
}
#[test]
fn test_recall_training_can_be_explicitly_enabled() {
let policy = RecallPolicy {
allowed_uses: vec![
RecallUse::RuntimeAugmentation,
RecallUse::TrainingCandidateSelection,
],
..Default::default()
};
assert!(recall_use_allowed(&policy, RecallUse::RuntimeAugmentation));
assert!(recall_use_allowed(
&policy,
RecallUse::TrainingCandidateSelection
));
}
#[test]
fn recall_policy_deserialization_rejects_out_of_range_threshold() {
let json = r#"{
"enabled": true,
"max_k_total": 5,
"max_tokens_injection": 500,
"min_score_threshold": 1.2,
"budgets": {
"max_latency_ms": 100,
"max_embedding_calls": 3,
"max_tokens_per_candidate": 100
},
"allowed_uses": ["RuntimeAugmentation"],
"prior_weight": 1.0
}"#;
let result = serde_json::from_str::<RecallPolicy>(json);
assert!(result.is_err());
}
#[test]
fn test_policy_hash_deterministic() {
let policy = RecallPolicy::default();
let hash1 = policy.snapshot_hash();
let hash2 = policy.snapshot_hash();
assert_eq!(hash1, hash2, "Same policy must produce same hash");
}
#[test]
fn test_policy_hash_changes_with_allowed_uses() {
let policy1 = RecallPolicy::default();
let policy2 = RecallPolicy {
allowed_uses: vec![
RecallUse::RuntimeAugmentation,
RecallUse::TrainingCandidateSelection,
],
..Default::default()
};
assert_ne!(
policy1.snapshot_hash(),
policy2.snapshot_hash(),
"Different allowed_uses must produce different hash"
);
}
#[test]
fn test_recall_query_hash_deterministic() {
let query = RecallQuery::new("test query", 5);
let hash1 = query.query_hash();
let hash2 = query.query_hash();
assert_eq!(hash1, hash2, "Same query must produce same hash");
}
#[test]
fn test_recall_provenance_matches_for_replay() {
let env = RecallProvenanceEnvelope {
query_hash: "q".to_string(),
embedding_input_hash: "e".to_string(),
embedding_hash: "h".to_string(),
embedder_id: "id".to_string(),
embedder_settings_hash: "s".to_string(),
corpus_fingerprint: "c".to_string(),
policy_snapshot_hash: "p".to_string(),
purpose: RecallUse::RuntimeAugmentation,
consumers: vec![RecallConsumer::Kernel],
candidate_scores: vec![],
candidates_searched: 10,
candidates_returned: 2,
stop_reason: None,
latency_ms: 10,
timestamp: "t".to_string(),
signature: "unsigned".to_string(),
};
assert!(env.matches_for_replay(&env.clone()));
let mut env2 = env.clone();
env2.purpose = RecallUse::TrainingCandidateSelection;
assert!(
!env.matches_for_replay(&env2),
"Different purpose must not match"
);
let mut env3 = env.clone();
env3.consumers = vec![RecallConsumer::Trainer];
assert!(
!env.matches_for_replay(&env3),
"Different consumers must not match"
);
}
}