1use std::sync::Arc;
10use std::time::Duration;
11
12use zeph_llm::any::AnyProvider;
13use zeph_llm::provider::LlmProvider as _;
14
15use crate::embedding_store::EmbeddingStore;
16
17#[derive(Debug, Clone, serde::Serialize)]
19pub struct AdmissionFactors {
20 pub future_utility: f32,
22 pub factual_confidence: f32,
24 pub semantic_novelty: f32,
26 pub temporal_recency: f32,
28 pub content_type_prior: f32,
30 pub goal_utility: f32,
33}
34
35#[derive(Debug, Clone)]
37pub struct AdmissionDecision {
38 pub admitted: bool,
39 pub composite_score: f32,
40 pub factors: AdmissionFactors,
41}
42
43#[derive(Debug, Clone, Copy)]
45pub struct AdmissionWeights {
46 pub future_utility: f32,
47 pub factual_confidence: f32,
48 pub semantic_novelty: f32,
49 pub temporal_recency: f32,
50 pub content_type_prior: f32,
51 pub goal_utility: f32,
53}
54
55impl AdmissionWeights {
56 #[must_use]
60 pub fn normalized(&self) -> Self {
61 let fu = self.future_utility.max(0.0);
62 let fc = self.factual_confidence.max(0.0);
63 let sn = self.semantic_novelty.max(0.0);
64 let tr = self.temporal_recency.max(0.0);
65 let cp = self.content_type_prior.max(0.0);
66 let gu = self.goal_utility.max(0.0);
67 let sum = fu + fc + sn + tr + cp + gu;
68 if sum <= f32::EPSILON {
69 return Self {
71 future_utility: 0.2,
72 factual_confidence: 0.2,
73 semantic_novelty: 0.2,
74 temporal_recency: 0.2,
75 content_type_prior: 0.2,
76 goal_utility: 0.0,
77 };
78 }
79 Self {
80 future_utility: fu / sum,
81 factual_confidence: fc / sum,
82 semantic_novelty: sn / sum,
83 temporal_recency: tr / sum,
84 content_type_prior: cp / sum,
85 goal_utility: gu / sum,
86 }
87 }
88}
89
90#[derive(Debug, Clone)]
92pub struct GoalGateConfig {
93 pub threshold: f32,
95 pub provider: Option<AnyProvider>,
97 pub weight: f32,
99}
100
101pub struct AdmissionControl {
103 threshold: f32,
104 fast_path_margin: f32,
105 weights: AdmissionWeights,
106 provider: Option<AnyProvider>,
109 goal_gate: Option<GoalGateConfig>,
111}
112
113impl AdmissionControl {
114 #[must_use]
115 pub fn new(threshold: f32, fast_path_margin: f32, weights: AdmissionWeights) -> Self {
116 Self {
117 threshold,
118 fast_path_margin,
119 weights: weights.normalized(),
120 provider: None,
121 goal_gate: None,
122 }
123 }
124
125 #[must_use]
129 pub fn with_provider(mut self, provider: AnyProvider) -> Self {
130 self.provider = Some(provider);
131 self
132 }
133
134 #[must_use]
136 pub fn with_goal_gate(mut self, config: GoalGateConfig) -> Self {
137 let gu = config.weight.clamp(0.0, 1.0);
139 let mut weights = self.weights;
140 weights.goal_utility = gu;
141 weights.future_utility = (weights.future_utility - gu).max(0.0);
143 self.weights = weights.normalized();
144 self.goal_gate = Some(config);
145 self
146 }
147
148 #[must_use]
150 pub fn threshold(&self) -> f32 {
151 self.threshold
152 }
153
154 pub async fn evaluate(
164 &self,
165 content: &str,
166 role: &str,
167 fallback_provider: &AnyProvider,
168 qdrant: Option<&Arc<EmbeddingStore>>,
169 goal_text: Option<&str>,
170 ) -> AdmissionDecision {
171 let effective_provider = self.provider.as_ref().unwrap_or(fallback_provider);
172 let factual_confidence = compute_factual_confidence(content);
173 let temporal_recency = 1.0f32;
174 let content_type_prior = compute_content_type_prior(role);
175
176 let semantic_novelty = compute_semantic_novelty(content, effective_provider, qdrant).await;
178
179 let goal_utility = match &self.goal_gate {
181 Some(gate) => {
182 let effective_goal = goal_text.filter(|t| t.trim().len() >= 10);
183 if let Some(goal) = effective_goal {
184 compute_goal_utility(content, goal, gate, effective_provider, qdrant).await
185 } else {
186 0.0
187 }
188 }
189 None => 0.0,
190 };
191
192 let heuristic_score = self.weighted_score(
194 0.5,
195 factual_confidence,
196 semantic_novelty,
197 temporal_recency,
198 content_type_prior,
199 goal_utility,
200 );
201
202 let future_utility = if heuristic_score >= self.threshold + self.fast_path_margin {
204 0.5 } else {
206 compute_future_utility(content, role, effective_provider).await
207 };
208
209 let composite_score = self.weighted_score(
210 future_utility,
211 factual_confidence,
212 semantic_novelty,
213 temporal_recency,
214 content_type_prior,
215 goal_utility,
216 );
217
218 let admitted = composite_score >= self.threshold
219 || heuristic_score >= self.threshold + self.fast_path_margin;
220
221 AdmissionDecision {
222 admitted,
223 composite_score,
224 factors: AdmissionFactors {
225 future_utility,
226 factual_confidence,
227 semantic_novelty,
228 temporal_recency,
229 content_type_prior,
230 goal_utility,
231 },
232 }
233 }
234
235 fn weighted_score(
236 &self,
237 future_utility: f32,
238 factual_confidence: f32,
239 semantic_novelty: f32,
240 temporal_recency: f32,
241 content_type_prior: f32,
242 goal_utility: f32,
243 ) -> f32 {
244 future_utility * self.weights.future_utility
245 + factual_confidence * self.weights.factual_confidence
246 + semantic_novelty * self.weights.semantic_novelty
247 + temporal_recency * self.weights.temporal_recency
248 + content_type_prior * self.weights.content_type_prior
249 + goal_utility * self.weights.goal_utility
250 }
251}
252
253#[must_use]
257pub fn compute_factual_confidence(content: &str) -> f32 {
258 const HEDGING_MARKERS: &[&str] = &[
260 "maybe",
261 "might",
262 "perhaps",
263 "i think",
264 "i believe",
265 "not sure",
266 "could be",
267 "possibly",
268 "probably",
269 "uncertain",
270 "not certain",
271 "i'm not sure",
272 "im not sure",
273 "not confident",
274 ];
275 let lower = content.to_lowercase();
276 let matches = HEDGING_MARKERS
277 .iter()
278 .filter(|&&m| lower.contains(m))
279 .count();
280 #[allow(clippy::cast_precision_loss)]
282 let penalty = (matches as f32) * 0.1;
283 (1.0 - penalty).max(0.2)
284}
285
286#[must_use]
291pub fn compute_content_type_prior(role: &str) -> f32 {
292 match role {
293 "user" => 0.7,
294 "assistant" => 0.6,
295 "tool" | "tool_result" => 0.8,
296 "system" => 0.3,
297 _ => 0.5,
298 }
299}
300
301async fn compute_semantic_novelty(
305 content: &str,
306 provider: &AnyProvider,
307 qdrant: Option<&Arc<EmbeddingStore>>,
308) -> f32 {
309 let Some(store) = qdrant else {
310 return 1.0;
311 };
312 if !provider.supports_embeddings() {
313 return 1.0;
314 }
315 let vector = match provider.embed(content).await {
316 Ok(v) => v,
317 Err(e) => {
318 tracing::debug!(error = %e, "A-MAC: failed to embed for novelty, using 1.0");
319 return 1.0;
320 }
321 };
322 let Ok(vector_size) = u64::try_from(vector.len()) else {
323 return 1.0;
324 };
325 if let Err(e) = store.ensure_collection(vector_size).await {
326 tracing::debug!(error = %e, "A-MAC: collection not ready for novelty check");
327 return 1.0;
328 }
329 let results = match store.search(&vector, 3, None).await {
330 Ok(r) => r,
331 Err(e) => {
332 tracing::debug!(error = %e, "A-MAC: novelty search failed, using 1.0");
333 return 1.0;
334 }
335 };
336 let max_sim = results.iter().map(|r| r.score).fold(0.0f32, f32::max);
337 (1.0 - max_sim).max(0.0)
338}
339
340async fn compute_future_utility(content: &str, role: &str, provider: &AnyProvider) -> f32 {
344 use zeph_llm::provider::{Message, MessageMetadata, Role};
345
346 let system = "You are a memory relevance judge. Rate how likely this message will be \
347 referenced in future conversations on a scale of 0.0 to 1.0. \
348 Respond with ONLY a decimal number between 0.0 and 1.0, nothing else.";
349
350 let user = format!(
351 "Role: {role}\nContent: {}\n\nFuture utility score (0.0-1.0):",
352 content.chars().take(500).collect::<String>()
353 );
354
355 let messages = vec![
356 Message {
357 role: Role::System,
358 content: system.to_owned(),
359 parts: vec![],
360 metadata: MessageMetadata::default(),
361 },
362 Message {
363 role: Role::User,
364 content: user,
365 parts: vec![],
366 metadata: MessageMetadata::default(),
367 },
368 ];
369
370 let result = match tokio::time::timeout(Duration::from_secs(8), provider.chat(&messages)).await
371 {
372 Ok(Ok(r)) => r,
373 Ok(Err(e)) => {
374 tracing::debug!(error = %e, "A-MAC: future_utility LLM call failed, using 0.5");
375 return 0.5;
376 }
377 Err(_) => {
378 tracing::debug!("A-MAC: future_utility LLM timed out, using 0.5");
379 return 0.5;
380 }
381 };
382
383 result.trim().parse::<f32>().unwrap_or(0.5).clamp(0.0, 1.0)
384}
385
386async fn compute_goal_utility(
397 content: &str,
398 goal_text: &str,
399 gate: &GoalGateConfig,
400 provider: &AnyProvider,
401 qdrant: Option<&Arc<EmbeddingStore>>,
402) -> f32 {
403 use zeph_llm::provider::LlmProvider as _;
404
405 if !provider.supports_embeddings() {
406 return 0.0;
407 }
408
409 let goal_emb = match provider.embed(goal_text).await {
410 Ok(v) => v,
411 Err(e) => {
412 tracing::debug!(error = %e, "goal_utility: failed to embed goal text, using 0.0");
413 return 0.0;
414 }
415 };
416 let content_emb = match provider.embed(content).await {
417 Ok(v) => v,
418 Err(e) => {
419 tracing::debug!(error = %e, "goal_utility: failed to embed content, using 0.0");
420 return 0.0;
421 }
422 };
423
424 let _ = qdrant; let similarity = cosine_similarity(&goal_emb, &content_emb);
428
429 let borderline_lo = gate.threshold - 0.1;
431 let borderline_hi = gate.threshold + 0.1;
432 let in_borderline = similarity >= borderline_lo && similarity <= borderline_hi;
433
434 let final_similarity = if in_borderline {
435 if let Some(ref goal_provider) = gate.provider {
436 refine_goal_utility_llm(content, goal_text, similarity, goal_provider).await
437 } else {
438 similarity
439 }
440 } else {
441 similarity
442 };
443
444 if final_similarity < gate.threshold {
446 0.0
447 } else {
448 final_similarity.max(0.1)
449 }
450}
451
452fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
454 if a.len() != b.len() || a.is_empty() {
455 return 0.0;
456 }
457 let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
458 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
459 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
460 if norm_a < f32::EPSILON || norm_b < f32::EPSILON {
461 return 0.0;
462 }
463 (dot / (norm_a * norm_b)).clamp(0.0, 1.0)
464}
465
466async fn refine_goal_utility_llm(
470 content: &str,
471 goal_text: &str,
472 embedding_sim: f32,
473 provider: &AnyProvider,
474) -> f32 {
475 use zeph_llm::provider::{LlmProvider as _, Message, MessageMetadata, Role};
476
477 let system = "You are a memory relevance judge. Given a task goal and a candidate memory, \
478 rate how relevant the memory is to the goal on a scale of 0.0 to 1.0. \
479 Respond with ONLY a decimal number between 0.0 and 1.0, nothing else.";
480
481 let user = format!(
482 "Goal: {}\nMemory: {}\n\nRelevance score (0.0-1.0):",
483 goal_text.chars().take(200).collect::<String>(),
484 content.chars().take(300).collect::<String>(),
485 );
486
487 let messages = vec![
488 Message {
489 role: Role::System,
490 content: system.to_owned(),
491 parts: vec![],
492 metadata: MessageMetadata::default(),
493 },
494 Message {
495 role: Role::User,
496 content: user,
497 parts: vec![],
498 metadata: MessageMetadata::default(),
499 },
500 ];
501
502 let result = match tokio::time::timeout(Duration::from_secs(6), provider.chat(&messages)).await
503 {
504 Ok(Ok(r)) => r,
505 Ok(Err(e)) => {
506 tracing::debug!(error = %e, "goal_utility LLM refinement failed, using embedding sim");
507 return embedding_sim;
508 }
509 Err(_) => {
510 tracing::debug!("goal_utility LLM refinement timed out, using embedding sim");
511 return embedding_sim;
512 }
513 };
514
515 result
516 .trim()
517 .parse::<f32>()
518 .unwrap_or(embedding_sim)
519 .clamp(0.0, 1.0)
520}
521
522pub fn log_admission_decision(
526 decision: &AdmissionDecision,
527 content_preview: &str,
528 role: &str,
529 threshold: f32,
530) {
531 if decision.admitted {
532 tracing::trace!(
533 role,
534 composite_score = decision.composite_score,
535 threshold,
536 content_preview,
537 "A-MAC: admitted"
538 );
539 } else {
540 tracing::debug!(
541 role,
542 composite_score = decision.composite_score,
543 threshold,
544 future_utility = decision.factors.future_utility,
545 factual_confidence = decision.factors.factual_confidence,
546 semantic_novelty = decision.factors.semantic_novelty,
547 content_type_prior = decision.factors.content_type_prior,
548 content_preview,
549 "A-MAC: rejected"
550 );
551 }
552}
553
554#[derive(Debug)]
556pub struct AdmissionRejected {
557 pub composite_score: f32,
558 pub threshold: f32,
559}
560
561impl std::fmt::Display for AdmissionRejected {
562 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
563 write!(
564 f,
565 "A-MAC admission rejected (score={:.3} < threshold={:.3})",
566 self.composite_score, self.threshold
567 )
568 }
569}
570
571#[cfg(test)]
572mod tests {
573 use super::*;
574
575 #[test]
576 fn factual_confidence_no_hedging() {
577 assert!((compute_factual_confidence("The server uses TLS 1.3.") - 1.0).abs() < 0.01);
578 }
579
580 #[test]
581 fn factual_confidence_with_one_marker() {
582 let score = compute_factual_confidence("Maybe we should use TLS 1.3.");
583 assert!((score - 0.9).abs() < 0.01);
584 }
585
586 #[test]
587 fn factual_confidence_many_markers_floors_at_0_2() {
588 let content = "maybe i think perhaps possibly might not sure i believe";
589 let score = compute_factual_confidence(content);
590 assert!(score >= 0.2);
591 assert!(score < 0.5);
592 }
593
594 #[test]
595 fn content_type_prior_values() {
596 assert!((compute_content_type_prior("user") - 0.7).abs() < 0.01);
597 assert!((compute_content_type_prior("assistant") - 0.6).abs() < 0.01);
598 assert!((compute_content_type_prior("tool") - 0.8).abs() < 0.01);
599 assert!((compute_content_type_prior("system") - 0.3).abs() < 0.01);
600 assert!((compute_content_type_prior("unknown") - 0.5).abs() < 0.01);
601 }
602
603 #[test]
604 fn admission_control_admits_high_score() {
605 let weights = AdmissionWeights {
606 future_utility: 0.30,
607 factual_confidence: 0.15,
608 semantic_novelty: 0.30,
609 temporal_recency: 0.10,
610 content_type_prior: 0.15,
611 goal_utility: 0.0,
612 };
613 let ctrl = AdmissionControl::new(0.40, 0.15, weights);
614 let score = ctrl.weighted_score(1.0, 1.0, 1.0, 1.0, 1.0, 0.0);
616 assert!(score >= 0.99);
617 let admitted = score >= ctrl.threshold;
619 assert!(admitted);
620 }
621
622 #[test]
623 fn admission_control_rejects_low_score() {
624 let weights = AdmissionWeights {
625 future_utility: 0.30,
626 factual_confidence: 0.15,
627 semantic_novelty: 0.30,
628 temporal_recency: 0.10,
629 content_type_prior: 0.15,
630 goal_utility: 0.0,
631 };
632 let ctrl = AdmissionControl::new(0.40, 0.15, weights);
633 let score = ctrl.weighted_score(0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
635 assert!(score < ctrl.threshold);
636 }
637
638 #[test]
641 fn fast_path_admits_when_heuristic_above_threshold_plus_margin() {
642 let weights = AdmissionWeights {
643 future_utility: 0.20,
644 factual_confidence: 0.20,
645 semantic_novelty: 0.20,
646 temporal_recency: 0.20,
647 content_type_prior: 0.20,
648 goal_utility: 0.0,
649 };
650 let threshold = 0.40f32;
651 let margin = 0.15f32;
652 let ctrl = AdmissionControl::new(threshold, margin, weights);
653
654 let heuristic = ctrl.weighted_score(0.5, 1.0, 1.0, 1.0, 1.0, 0.0);
656 assert!(
658 heuristic >= threshold + margin,
659 "heuristic {heuristic} must exceed threshold+margin {}",
660 threshold + margin
661 );
662 let admitted = heuristic >= threshold + margin;
664 assert!(admitted, "fast path must admit without LLM call");
665 }
666
667 #[test]
669 fn slow_path_required_when_heuristic_below_threshold_plus_margin() {
670 let weights = AdmissionWeights {
671 future_utility: 0.40,
672 factual_confidence: 0.15,
673 semantic_novelty: 0.15,
674 temporal_recency: 0.15,
675 content_type_prior: 0.15,
676 goal_utility: 0.0,
677 };
678 let threshold = 0.50f32;
679 let margin = 0.20f32;
680 let ctrl = AdmissionControl::new(threshold, margin, weights);
681
682 let heuristic = ctrl.weighted_score(0.5, 0.3, 0.3, 0.3, 0.3, 0.0);
684 assert!(
685 heuristic < threshold + margin,
686 "heuristic {heuristic} must be below threshold+margin {}",
687 threshold + margin
688 );
689 }
690
691 #[test]
693 fn log_admission_decision_does_not_panic() {
694 let admitted_decision = AdmissionDecision {
695 admitted: true,
696 composite_score: 0.75,
697 factors: AdmissionFactors {
698 future_utility: 0.8,
699 factual_confidence: 0.9,
700 semantic_novelty: 0.7,
701 temporal_recency: 1.0,
702 content_type_prior: 0.7,
703 goal_utility: 0.0,
704 },
705 };
706 log_admission_decision(&admitted_decision, "preview text", "user", 0.40);
707
708 let rejected_decision = AdmissionDecision {
709 admitted: false,
710 composite_score: 0.20,
711 factors: AdmissionFactors {
712 future_utility: 0.1,
713 factual_confidence: 0.2,
714 semantic_novelty: 0.3,
715 temporal_recency: 1.0,
716 content_type_prior: 0.3,
717 goal_utility: 0.0,
718 },
719 };
720 log_admission_decision(&rejected_decision, "maybe short content", "assistant", 0.40);
721 }
722
723 #[test]
725 fn admission_rejected_display() {
726 let err = AdmissionRejected {
727 composite_score: 0.25,
728 threshold: 0.45,
729 };
730 let msg = format!("{err}");
731 assert!(msg.contains("0.250"));
732 assert!(msg.contains("0.450"));
733 }
734
735 #[test]
737 fn threshold_accessor() {
738 let weights = AdmissionWeights {
739 future_utility: 0.20,
740 factual_confidence: 0.20,
741 semantic_novelty: 0.20,
742 temporal_recency: 0.20,
743 content_type_prior: 0.20,
744 goal_utility: 0.0,
745 };
746 let ctrl = AdmissionControl::new(0.55, 0.10, weights);
747 assert!((ctrl.threshold() - 0.55).abs() < 0.001);
748 }
749
750 #[test]
752 fn content_type_prior_tool_result_alias() {
753 assert!((compute_content_type_prior("tool_result") - 0.8).abs() < 0.01);
754 }
755
756 #[test]
759 fn cosine_similarity_identical_vectors() {
760 let v = vec![1.0f32, 0.0, 0.0];
761 assert!((cosine_similarity(&v, &v) - 1.0).abs() < 1e-6);
762 }
763
764 #[test]
765 fn cosine_similarity_orthogonal_vectors() {
766 let a = vec![1.0f32, 0.0];
767 let b = vec![0.0f32, 1.0];
768 assert!(cosine_similarity(&a, &b).abs() < 1e-6);
769 }
770
771 #[test]
772 fn cosine_similarity_zero_vector_returns_zero() {
773 let z = vec![0.0f32, 0.0, 0.0];
774 let v = vec![1.0f32, 2.0, 3.0];
775 assert_eq!(cosine_similarity(&z, &v), 0.0);
776 }
777
778 #[test]
779 fn cosine_similarity_length_mismatch_returns_zero() {
780 let a = vec![1.0f32, 0.0];
781 let b = vec![1.0f32, 0.0, 0.0];
782 assert_eq!(cosine_similarity(&a, &b), 0.0);
783 }
784
785 #[test]
788 fn with_goal_gate_sets_goal_utility_weight() {
789 let weights = AdmissionWeights {
790 future_utility: 0.30,
791 factual_confidence: 0.15,
792 semantic_novelty: 0.30,
793 temporal_recency: 0.10,
794 content_type_prior: 0.15,
795 goal_utility: 0.0,
796 };
797 let ctrl = AdmissionControl::new(0.40, 0.15, weights);
798 let config = GoalGateConfig {
799 weight: 0.20,
800 threshold: 0.5,
801 provider: None,
802 };
803 let ctrl = ctrl.with_goal_gate(config);
804 assert!(
805 ctrl.weights.goal_utility > 0.0,
806 "goal_utility must be nonzero after with_goal_gate"
807 );
808 let w = &ctrl.weights;
810 let total = w.future_utility
811 + w.factual_confidence
812 + w.semantic_novelty
813 + w.temporal_recency
814 + w.content_type_prior
815 + w.goal_utility;
816 assert!(
817 (total - 1.0).abs() < 0.01,
818 "normalized weights must sum to 1.0, got {total}"
819 );
820 }
821
822 #[test]
823 fn with_goal_gate_zero_weight_leaves_goal_utility_at_zero() {
824 let weights = AdmissionWeights {
825 future_utility: 0.30,
826 factual_confidence: 0.15,
827 semantic_novelty: 0.30,
828 temporal_recency: 0.10,
829 content_type_prior: 0.15,
830 goal_utility: 0.0,
831 };
832 let ctrl = AdmissionControl::new(0.40, 0.15, weights);
833 let config = GoalGateConfig {
834 weight: 0.0,
835 threshold: 0.5,
836 provider: None,
837 };
838 let ctrl = ctrl.with_goal_gate(config);
839 assert_eq!(ctrl.weights.goal_utility, 0.0);
840 }
841}