1use std::sync::Arc;
10use std::time::Duration;
11
12use zeph_llm::any::AnyProvider;
13use zeph_llm::provider::LlmProvider as _;
14
15use crate::embedding_store::EmbeddingStore;
16use zeph_common::math::cosine_similarity;
17
18#[derive(Debug, Clone, serde::Serialize)]
20pub struct AdmissionFactors {
21 pub future_utility: f32,
23 pub factual_confidence: f32,
25 pub semantic_novelty: f32,
27 pub temporal_recency: f32,
29 pub content_type_prior: f32,
31 pub goal_utility: f32,
34}
35
36#[derive(Debug, Clone)]
38pub struct AdmissionDecision {
39 pub admitted: bool,
40 pub composite_score: f32,
41 pub factors: AdmissionFactors,
42}
43
44#[derive(Debug, Clone, Copy)]
46pub struct AdmissionWeights {
47 pub future_utility: f32,
48 pub factual_confidence: f32,
49 pub semantic_novelty: f32,
50 pub temporal_recency: f32,
51 pub content_type_prior: f32,
52 pub goal_utility: f32,
54}
55
56impl AdmissionWeights {
57 #[must_use]
61 pub fn normalized(&self) -> Self {
62 let fu = self.future_utility.max(0.0);
63 let fc = self.factual_confidence.max(0.0);
64 let sn = self.semantic_novelty.max(0.0);
65 let tr = self.temporal_recency.max(0.0);
66 let cp = self.content_type_prior.max(0.0);
67 let gu = self.goal_utility.max(0.0);
68 let sum = fu + fc + sn + tr + cp + gu;
69 if sum <= f32::EPSILON {
70 return Self {
72 future_utility: 0.2,
73 factual_confidence: 0.2,
74 semantic_novelty: 0.2,
75 temporal_recency: 0.2,
76 content_type_prior: 0.2,
77 goal_utility: 0.0,
78 };
79 }
80 Self {
81 future_utility: fu / sum,
82 factual_confidence: fc / sum,
83 semantic_novelty: sn / sum,
84 temporal_recency: tr / sum,
85 content_type_prior: cp / sum,
86 goal_utility: gu / sum,
87 }
88 }
89}
90
91#[derive(Debug, Clone)]
93pub struct GoalGateConfig {
94 pub threshold: f32,
96 pub provider: Option<AnyProvider>,
98 pub weight: f32,
100}
101
102pub struct AdmissionControl {
128 threshold: f32,
129 fast_path_margin: f32,
130 weights: AdmissionWeights,
131 provider: Option<AnyProvider>,
134 goal_gate: Option<GoalGateConfig>,
136}
137
138impl AdmissionControl {
139 #[must_use]
146 pub fn new(threshold: f32, fast_path_margin: f32, weights: AdmissionWeights) -> Self {
147 Self {
148 threshold,
149 fast_path_margin,
150 weights: weights.normalized(),
151 provider: None,
152 goal_gate: None,
153 }
154 }
155
156 #[must_use]
160 pub fn with_provider(mut self, provider: AnyProvider) -> Self {
161 self.provider = Some(provider);
162 self
163 }
164
165 #[must_use]
167 pub fn with_goal_gate(mut self, config: GoalGateConfig) -> Self {
168 let gu = config.weight.clamp(0.0, 1.0);
170 let mut weights = self.weights;
171 weights.goal_utility = gu;
172 weights.future_utility = (weights.future_utility - gu).max(0.0);
174 self.weights = weights.normalized();
175 self.goal_gate = Some(config);
176 self
177 }
178
179 #[must_use]
181 pub fn threshold(&self) -> f32 {
182 self.threshold
183 }
184
185 #[cfg_attr(
195 feature = "profiling",
196 tracing::instrument(name = "memory.admission", skip_all)
197 )]
198 pub async fn evaluate(
199 &self,
200 content: &str,
201 role: &str,
202 fallback_provider: &AnyProvider,
203 qdrant: Option<&Arc<EmbeddingStore>>,
204 goal_text: Option<&str>,
205 ) -> AdmissionDecision {
206 let effective_provider = self.provider.as_ref().unwrap_or(fallback_provider);
207 let factual_confidence = compute_factual_confidence(content);
208 let temporal_recency = 1.0f32;
209 let content_type_prior = compute_content_type_prior(role);
210
211 let semantic_novelty = compute_semantic_novelty(content, effective_provider, qdrant).await;
213
214 let goal_utility = match &self.goal_gate {
216 Some(gate) => {
217 let effective_goal = goal_text.filter(|t| t.trim().len() >= 10);
218 if let Some(goal) = effective_goal {
219 compute_goal_utility(content, goal, gate, effective_provider, qdrant).await
220 } else {
221 0.0
222 }
223 }
224 None => 0.0,
225 };
226
227 let heuristic_score = self.weighted_score(
229 0.5,
230 factual_confidence,
231 semantic_novelty,
232 temporal_recency,
233 content_type_prior,
234 goal_utility,
235 );
236
237 let future_utility = if heuristic_score >= self.threshold + self.fast_path_margin {
239 0.5 } else {
241 compute_future_utility(content, role, effective_provider).await
242 };
243
244 let composite_score = self.weighted_score(
245 future_utility,
246 factual_confidence,
247 semantic_novelty,
248 temporal_recency,
249 content_type_prior,
250 goal_utility,
251 );
252
253 let admitted = composite_score >= self.threshold
254 || heuristic_score >= self.threshold + self.fast_path_margin;
255
256 AdmissionDecision {
257 admitted,
258 composite_score,
259 factors: AdmissionFactors {
260 future_utility,
261 factual_confidence,
262 semantic_novelty,
263 temporal_recency,
264 content_type_prior,
265 goal_utility,
266 },
267 }
268 }
269
270 fn weighted_score(
271 &self,
272 future_utility: f32,
273 factual_confidence: f32,
274 semantic_novelty: f32,
275 temporal_recency: f32,
276 content_type_prior: f32,
277 goal_utility: f32,
278 ) -> f32 {
279 future_utility * self.weights.future_utility
280 + factual_confidence * self.weights.factual_confidence
281 + semantic_novelty * self.weights.semantic_novelty
282 + temporal_recency * self.weights.temporal_recency
283 + content_type_prior * self.weights.content_type_prior
284 + goal_utility * self.weights.goal_utility
285 }
286}
287
288#[must_use]
292pub fn compute_factual_confidence(content: &str) -> f32 {
293 const HEDGING_MARKERS: &[&str] = &[
295 "maybe",
296 "might",
297 "perhaps",
298 "i think",
299 "i believe",
300 "not sure",
301 "could be",
302 "possibly",
303 "probably",
304 "uncertain",
305 "not certain",
306 "i'm not sure",
307 "im not sure",
308 "not confident",
309 ];
310 let lower = content.to_lowercase();
311 let matches = HEDGING_MARKERS
312 .iter()
313 .filter(|&&m| lower.contains(m))
314 .count();
315 #[allow(clippy::cast_precision_loss)]
317 let penalty = (matches as f32) * 0.1;
318 (1.0 - penalty).max(0.2)
319}
320
321#[must_use]
326pub fn compute_content_type_prior(role: &str) -> f32 {
327 match role {
328 "user" => 0.7,
329 "assistant" => 0.6,
330 "tool" | "tool_result" => 0.8,
331 "system" => 0.3,
332 _ => 0.5,
333 }
334}
335
336async fn compute_semantic_novelty(
340 content: &str,
341 provider: &AnyProvider,
342 qdrant: Option<&Arc<EmbeddingStore>>,
343) -> f32 {
344 let Some(store) = qdrant else {
345 return 1.0;
346 };
347 if !provider.supports_embeddings() {
348 return 1.0;
349 }
350 let vector = match provider.embed(content).await {
351 Ok(v) => v,
352 Err(e) => {
353 tracing::debug!(error = %e, "A-MAC: failed to embed for novelty, using 1.0");
354 return 1.0;
355 }
356 };
357 let Ok(vector_size) = u64::try_from(vector.len()) else {
358 return 1.0;
359 };
360 if let Err(e) = store.ensure_collection(vector_size).await {
361 tracing::debug!(error = %e, "A-MAC: collection not ready for novelty check");
362 return 1.0;
363 }
364 let results = match store.search(&vector, 3, None).await {
365 Ok(r) => r,
366 Err(e) => {
367 tracing::debug!(error = %e, "A-MAC: novelty search failed, using 1.0");
368 return 1.0;
369 }
370 };
371 let max_sim = results.iter().map(|r| r.score).fold(0.0f32, f32::max);
372 (1.0 - max_sim).max(0.0)
373}
374
375async fn compute_future_utility(content: &str, role: &str, provider: &AnyProvider) -> f32 {
379 use zeph_llm::provider::{Message, MessageMetadata, Role};
380
381 let system = "You are a memory relevance judge. Rate how likely this message will be \
382 referenced in future conversations on a scale of 0.0 to 1.0. \
383 Respond with ONLY a decimal number between 0.0 and 1.0, nothing else.";
384
385 let user = format!(
386 "Role: {role}\nContent: {}\n\nFuture utility score (0.0-1.0):",
387 content.chars().take(500).collect::<String>()
388 );
389
390 let messages = vec![
391 Message {
392 role: Role::System,
393 content: system.to_owned(),
394 parts: vec![],
395 metadata: MessageMetadata::default(),
396 },
397 Message {
398 role: Role::User,
399 content: user,
400 parts: vec![],
401 metadata: MessageMetadata::default(),
402 },
403 ];
404
405 let result = match tokio::time::timeout(Duration::from_secs(8), provider.chat(&messages)).await
406 {
407 Ok(Ok(r)) => r,
408 Ok(Err(e)) => {
409 tracing::debug!(error = %e, "A-MAC: future_utility LLM call failed, using 0.5");
410 return 0.5;
411 }
412 Err(_) => {
413 tracing::debug!("A-MAC: future_utility LLM timed out, using 0.5");
414 return 0.5;
415 }
416 };
417
418 result.trim().parse::<f32>().unwrap_or(0.5).clamp(0.0, 1.0)
419}
420
421async fn compute_goal_utility(
432 content: &str,
433 goal_text: &str,
434 gate: &GoalGateConfig,
435 provider: &AnyProvider,
436 qdrant: Option<&Arc<EmbeddingStore>>,
437) -> f32 {
438 use zeph_llm::provider::LlmProvider as _;
439
440 if !provider.supports_embeddings() {
441 return 0.0;
442 }
443
444 let goal_emb = match provider.embed(goal_text).await {
445 Ok(v) => v,
446 Err(e) => {
447 tracing::debug!(error = %e, "goal_utility: failed to embed goal text, using 0.0");
448 return 0.0;
449 }
450 };
451 let content_emb = match provider.embed(content).await {
452 Ok(v) => v,
453 Err(e) => {
454 tracing::debug!(error = %e, "goal_utility: failed to embed content, using 0.0");
455 return 0.0;
456 }
457 };
458
459 let _ = qdrant; let similarity = cosine_similarity(&goal_emb, &content_emb);
463
464 let borderline_lo = gate.threshold - 0.1;
466 let borderline_hi = gate.threshold + 0.1;
467 let in_borderline = similarity >= borderline_lo && similarity <= borderline_hi;
468
469 let final_similarity = if in_borderline {
470 if let Some(ref goal_provider) = gate.provider {
471 refine_goal_utility_llm(content, goal_text, similarity, goal_provider).await
472 } else {
473 similarity
474 }
475 } else {
476 similarity
477 };
478
479 if final_similarity < gate.threshold {
481 0.0
482 } else {
483 final_similarity.max(0.1)
484 }
485}
486
487async fn refine_goal_utility_llm(
491 content: &str,
492 goal_text: &str,
493 embedding_sim: f32,
494 provider: &AnyProvider,
495) -> f32 {
496 use zeph_llm::provider::{LlmProvider as _, Message, MessageMetadata, Role};
497
498 let system = "You are a memory relevance judge. Given a task goal and a candidate memory, \
499 rate how relevant the memory is to the goal on a scale of 0.0 to 1.0. \
500 Respond with ONLY a decimal number between 0.0 and 1.0, nothing else.";
501
502 let user = format!(
503 "Goal: {}\nMemory: {}\n\nRelevance score (0.0-1.0):",
504 goal_text.chars().take(200).collect::<String>(),
505 content.chars().take(300).collect::<String>(),
506 );
507
508 let messages = vec![
509 Message {
510 role: Role::System,
511 content: system.to_owned(),
512 parts: vec![],
513 metadata: MessageMetadata::default(),
514 },
515 Message {
516 role: Role::User,
517 content: user,
518 parts: vec![],
519 metadata: MessageMetadata::default(),
520 },
521 ];
522
523 let result = match tokio::time::timeout(Duration::from_secs(6), provider.chat(&messages)).await
524 {
525 Ok(Ok(r)) => r,
526 Ok(Err(e)) => {
527 tracing::debug!(error = %e, "goal_utility LLM refinement failed, using embedding sim");
528 return embedding_sim;
529 }
530 Err(_) => {
531 tracing::debug!("goal_utility LLM refinement timed out, using embedding sim");
532 return embedding_sim;
533 }
534 };
535
536 result
537 .trim()
538 .parse::<f32>()
539 .unwrap_or(embedding_sim)
540 .clamp(0.0, 1.0)
541}
542
543pub fn log_admission_decision(
547 decision: &AdmissionDecision,
548 content_preview: &str,
549 role: &str,
550 threshold: f32,
551) {
552 if decision.admitted {
553 tracing::trace!(
554 role,
555 composite_score = decision.composite_score,
556 threshold,
557 content_preview,
558 "A-MAC: admitted"
559 );
560 } else {
561 tracing::debug!(
562 role,
563 composite_score = decision.composite_score,
564 threshold,
565 future_utility = decision.factors.future_utility,
566 factual_confidence = decision.factors.factual_confidence,
567 semantic_novelty = decision.factors.semantic_novelty,
568 content_type_prior = decision.factors.content_type_prior,
569 content_preview,
570 "A-MAC: rejected"
571 );
572 }
573}
574
575#[derive(Debug)]
577pub struct AdmissionRejected {
578 pub composite_score: f32,
579 pub threshold: f32,
580}
581
582impl std::fmt::Display for AdmissionRejected {
583 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
584 write!(
585 f,
586 "A-MAC admission rejected (score={:.3} < threshold={:.3})",
587 self.composite_score, self.threshold
588 )
589 }
590}
591
592#[cfg(test)]
593mod tests {
594 use super::*;
595
596 #[test]
597 fn factual_confidence_no_hedging() {
598 assert!((compute_factual_confidence("The server uses TLS 1.3.") - 1.0).abs() < 0.01);
599 }
600
601 #[test]
602 fn factual_confidence_with_one_marker() {
603 let score = compute_factual_confidence("Maybe we should use TLS 1.3.");
604 assert!((score - 0.9).abs() < 0.01);
605 }
606
607 #[test]
608 fn factual_confidence_many_markers_floors_at_0_2() {
609 let content = "maybe i think perhaps possibly might not sure i believe";
610 let score = compute_factual_confidence(content);
611 assert!(score >= 0.2);
612 assert!(score < 0.5);
613 }
614
615 #[test]
616 fn content_type_prior_values() {
617 assert!((compute_content_type_prior("user") - 0.7).abs() < 0.01);
618 assert!((compute_content_type_prior("assistant") - 0.6).abs() < 0.01);
619 assert!((compute_content_type_prior("tool") - 0.8).abs() < 0.01);
620 assert!((compute_content_type_prior("system") - 0.3).abs() < 0.01);
621 assert!((compute_content_type_prior("unknown") - 0.5).abs() < 0.01);
622 }
623
624 #[test]
625 fn admission_control_admits_high_score() {
626 let weights = AdmissionWeights {
627 future_utility: 0.30,
628 factual_confidence: 0.15,
629 semantic_novelty: 0.30,
630 temporal_recency: 0.10,
631 content_type_prior: 0.15,
632 goal_utility: 0.0,
633 };
634 let ctrl = AdmissionControl::new(0.40, 0.15, weights);
635 let score = ctrl.weighted_score(1.0, 1.0, 1.0, 1.0, 1.0, 0.0);
637 assert!(score >= 0.99);
638 let admitted = score >= ctrl.threshold;
640 assert!(admitted);
641 }
642
643 #[test]
644 fn admission_control_rejects_low_score() {
645 let weights = AdmissionWeights {
646 future_utility: 0.30,
647 factual_confidence: 0.15,
648 semantic_novelty: 0.30,
649 temporal_recency: 0.10,
650 content_type_prior: 0.15,
651 goal_utility: 0.0,
652 };
653 let ctrl = AdmissionControl::new(0.40, 0.15, weights);
654 let score = ctrl.weighted_score(0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
656 assert!(score < ctrl.threshold);
657 }
658
659 #[test]
662 fn fast_path_admits_when_heuristic_above_threshold_plus_margin() {
663 let weights = AdmissionWeights {
664 future_utility: 0.20,
665 factual_confidence: 0.20,
666 semantic_novelty: 0.20,
667 temporal_recency: 0.20,
668 content_type_prior: 0.20,
669 goal_utility: 0.0,
670 };
671 let threshold = 0.40f32;
672 let margin = 0.15f32;
673 let ctrl = AdmissionControl::new(threshold, margin, weights);
674
675 let heuristic = ctrl.weighted_score(0.5, 1.0, 1.0, 1.0, 1.0, 0.0);
677 assert!(
679 heuristic >= threshold + margin,
680 "heuristic {heuristic} must exceed threshold+margin {}",
681 threshold + margin
682 );
683 let admitted = heuristic >= threshold + margin;
685 assert!(admitted, "fast path must admit without LLM call");
686 }
687
688 #[test]
690 fn slow_path_required_when_heuristic_below_threshold_plus_margin() {
691 let weights = AdmissionWeights {
692 future_utility: 0.40,
693 factual_confidence: 0.15,
694 semantic_novelty: 0.15,
695 temporal_recency: 0.15,
696 content_type_prior: 0.15,
697 goal_utility: 0.0,
698 };
699 let threshold = 0.50f32;
700 let margin = 0.20f32;
701 let ctrl = AdmissionControl::new(threshold, margin, weights);
702
703 let heuristic = ctrl.weighted_score(0.5, 0.3, 0.3, 0.3, 0.3, 0.0);
705 assert!(
706 heuristic < threshold + margin,
707 "heuristic {heuristic} must be below threshold+margin {}",
708 threshold + margin
709 );
710 }
711
712 #[test]
714 fn log_admission_decision_does_not_panic() {
715 let admitted_decision = AdmissionDecision {
716 admitted: true,
717 composite_score: 0.75,
718 factors: AdmissionFactors {
719 future_utility: 0.8,
720 factual_confidence: 0.9,
721 semantic_novelty: 0.7,
722 temporal_recency: 1.0,
723 content_type_prior: 0.7,
724 goal_utility: 0.0,
725 },
726 };
727 log_admission_decision(&admitted_decision, "preview text", "user", 0.40);
728
729 let rejected_decision = AdmissionDecision {
730 admitted: false,
731 composite_score: 0.20,
732 factors: AdmissionFactors {
733 future_utility: 0.1,
734 factual_confidence: 0.2,
735 semantic_novelty: 0.3,
736 temporal_recency: 1.0,
737 content_type_prior: 0.3,
738 goal_utility: 0.0,
739 },
740 };
741 log_admission_decision(&rejected_decision, "maybe short content", "assistant", 0.40);
742 }
743
744 #[test]
746 fn admission_rejected_display() {
747 let err = AdmissionRejected {
748 composite_score: 0.25,
749 threshold: 0.45,
750 };
751 let msg = format!("{err}");
752 assert!(msg.contains("0.250"));
753 assert!(msg.contains("0.450"));
754 }
755
756 #[test]
758 fn threshold_accessor() {
759 let weights = AdmissionWeights {
760 future_utility: 0.20,
761 factual_confidence: 0.20,
762 semantic_novelty: 0.20,
763 temporal_recency: 0.20,
764 content_type_prior: 0.20,
765 goal_utility: 0.0,
766 };
767 let ctrl = AdmissionControl::new(0.55, 0.10, weights);
768 assert!((ctrl.threshold() - 0.55).abs() < 0.001);
769 }
770
771 #[test]
773 fn content_type_prior_tool_result_alias() {
774 assert!((compute_content_type_prior("tool_result") - 0.8).abs() < 0.01);
775 }
776
777 #[test]
780 fn cosine_similarity_identical_vectors() {
781 let v = vec![1.0f32, 0.0, 0.0];
782 assert!((cosine_similarity(&v, &v) - 1.0).abs() < 1e-6);
783 }
784
785 #[test]
786 fn cosine_similarity_orthogonal_vectors() {
787 let a = vec![1.0f32, 0.0];
788 let b = vec![0.0f32, 1.0];
789 assert!(cosine_similarity(&a, &b).abs() < 1e-6);
790 }
791
792 #[test]
793 fn cosine_similarity_zero_vector_returns_zero() {
794 let z = vec![0.0f32, 0.0, 0.0];
795 let v = vec![1.0f32, 2.0, 3.0];
796 assert!(cosine_similarity(&z, &v).abs() < f32::EPSILON);
797 }
798
799 #[test]
800 fn cosine_similarity_length_mismatch_returns_zero() {
801 let a = vec![1.0f32, 0.0];
802 let b = vec![1.0f32, 0.0, 0.0];
803 assert!(cosine_similarity(&a, &b).abs() < f32::EPSILON);
804 }
805
806 #[test]
809 fn with_goal_gate_sets_goal_utility_weight() {
810 let weights = AdmissionWeights {
811 future_utility: 0.30,
812 factual_confidence: 0.15,
813 semantic_novelty: 0.30,
814 temporal_recency: 0.10,
815 content_type_prior: 0.15,
816 goal_utility: 0.0,
817 };
818 let ctrl = AdmissionControl::new(0.40, 0.15, weights);
819 let config = GoalGateConfig {
820 weight: 0.20,
821 threshold: 0.5,
822 provider: None,
823 };
824 let ctrl = ctrl.with_goal_gate(config);
825 assert!(
826 ctrl.weights.goal_utility > 0.0,
827 "goal_utility must be nonzero after with_goal_gate"
828 );
829 let w = &ctrl.weights;
831 let total = w.future_utility
832 + w.factual_confidence
833 + w.semantic_novelty
834 + w.temporal_recency
835 + w.content_type_prior
836 + w.goal_utility;
837 assert!(
838 (total - 1.0).abs() < 0.01,
839 "normalized weights must sum to 1.0, got {total}"
840 );
841 }
842
843 #[test]
844 fn with_goal_gate_zero_weight_leaves_goal_utility_at_zero() {
845 let weights = AdmissionWeights {
846 future_utility: 0.30,
847 factual_confidence: 0.15,
848 semantic_novelty: 0.30,
849 temporal_recency: 0.10,
850 content_type_prior: 0.15,
851 goal_utility: 0.0,
852 };
853 let ctrl = AdmissionControl::new(0.40, 0.15, weights);
854 let config = GoalGateConfig {
855 weight: 0.0,
856 threshold: 0.5,
857 provider: None,
858 };
859 let ctrl = ctrl.with_goal_gate(config);
860 assert!(ctrl.weights.goal_utility.abs() < f32::EPSILON);
861 }
862}