1use std::sync::Arc;
10use std::time::Duration;
11
12use zeph_llm::any::AnyProvider;
13use zeph_llm::provider::LlmProvider as _;
14
15use crate::embedding_store::EmbeddingStore;
16use zeph_common::math::cosine_similarity;
17
18#[derive(Debug, Clone, serde::Serialize)]
20pub struct AdmissionFactors {
21 pub future_utility: f32,
23 pub factual_confidence: f32,
25 pub semantic_novelty: f32,
27 pub temporal_recency: f32,
29 pub content_type_prior: f32,
31 pub goal_utility: f32,
34}
35
36#[derive(Debug, Clone)]
38pub struct AdmissionDecision {
39 pub admitted: bool,
40 pub composite_score: f32,
41 pub factors: AdmissionFactors,
42}
43
44#[derive(Debug, Clone, Copy)]
46pub struct AdmissionWeights {
47 pub future_utility: f32,
48 pub factual_confidence: f32,
49 pub semantic_novelty: f32,
50 pub temporal_recency: f32,
51 pub content_type_prior: f32,
52 pub goal_utility: f32,
54}
55
56impl AdmissionWeights {
57 #[must_use]
61 pub fn normalized(&self) -> Self {
62 let fu = self.future_utility.max(0.0);
63 let fc = self.factual_confidence.max(0.0);
64 let sn = self.semantic_novelty.max(0.0);
65 let tr = self.temporal_recency.max(0.0);
66 let cp = self.content_type_prior.max(0.0);
67 let gu = self.goal_utility.max(0.0);
68 let sum = fu + fc + sn + tr + cp + gu;
69 if sum <= f32::EPSILON {
70 return Self {
72 future_utility: 0.2,
73 factual_confidence: 0.2,
74 semantic_novelty: 0.2,
75 temporal_recency: 0.2,
76 content_type_prior: 0.2,
77 goal_utility: 0.0,
78 };
79 }
80 Self {
81 future_utility: fu / sum,
82 factual_confidence: fc / sum,
83 semantic_novelty: sn / sum,
84 temporal_recency: tr / sum,
85 content_type_prior: cp / sum,
86 goal_utility: gu / sum,
87 }
88 }
89}
90
91#[derive(Debug, Clone)]
93pub struct GoalGateConfig {
94 pub threshold: f32,
96 pub provider: Option<AnyProvider>,
98 pub weight: f32,
100}
101
102pub struct AdmissionControl {
104 threshold: f32,
105 fast_path_margin: f32,
106 weights: AdmissionWeights,
107 provider: Option<AnyProvider>,
110 goal_gate: Option<GoalGateConfig>,
112}
113
114impl AdmissionControl {
115 #[must_use]
116 pub fn new(threshold: f32, fast_path_margin: f32, weights: AdmissionWeights) -> Self {
117 Self {
118 threshold,
119 fast_path_margin,
120 weights: weights.normalized(),
121 provider: None,
122 goal_gate: None,
123 }
124 }
125
126 #[must_use]
130 pub fn with_provider(mut self, provider: AnyProvider) -> Self {
131 self.provider = Some(provider);
132 self
133 }
134
135 #[must_use]
137 pub fn with_goal_gate(mut self, config: GoalGateConfig) -> Self {
138 let gu = config.weight.clamp(0.0, 1.0);
140 let mut weights = self.weights;
141 weights.goal_utility = gu;
142 weights.future_utility = (weights.future_utility - gu).max(0.0);
144 self.weights = weights.normalized();
145 self.goal_gate = Some(config);
146 self
147 }
148
149 #[must_use]
151 pub fn threshold(&self) -> f32 {
152 self.threshold
153 }
154
155 pub async fn evaluate(
165 &self,
166 content: &str,
167 role: &str,
168 fallback_provider: &AnyProvider,
169 qdrant: Option<&Arc<EmbeddingStore>>,
170 goal_text: Option<&str>,
171 ) -> AdmissionDecision {
172 let effective_provider = self.provider.as_ref().unwrap_or(fallback_provider);
173 let factual_confidence = compute_factual_confidence(content);
174 let temporal_recency = 1.0f32;
175 let content_type_prior = compute_content_type_prior(role);
176
177 let semantic_novelty = compute_semantic_novelty(content, effective_provider, qdrant).await;
179
180 let goal_utility = match &self.goal_gate {
182 Some(gate) => {
183 let effective_goal = goal_text.filter(|t| t.trim().len() >= 10);
184 if let Some(goal) = effective_goal {
185 compute_goal_utility(content, goal, gate, effective_provider, qdrant).await
186 } else {
187 0.0
188 }
189 }
190 None => 0.0,
191 };
192
193 let heuristic_score = self.weighted_score(
195 0.5,
196 factual_confidence,
197 semantic_novelty,
198 temporal_recency,
199 content_type_prior,
200 goal_utility,
201 );
202
203 let future_utility = if heuristic_score >= self.threshold + self.fast_path_margin {
205 0.5 } else {
207 compute_future_utility(content, role, effective_provider).await
208 };
209
210 let composite_score = self.weighted_score(
211 future_utility,
212 factual_confidence,
213 semantic_novelty,
214 temporal_recency,
215 content_type_prior,
216 goal_utility,
217 );
218
219 let admitted = composite_score >= self.threshold
220 || heuristic_score >= self.threshold + self.fast_path_margin;
221
222 AdmissionDecision {
223 admitted,
224 composite_score,
225 factors: AdmissionFactors {
226 future_utility,
227 factual_confidence,
228 semantic_novelty,
229 temporal_recency,
230 content_type_prior,
231 goal_utility,
232 },
233 }
234 }
235
236 fn weighted_score(
237 &self,
238 future_utility: f32,
239 factual_confidence: f32,
240 semantic_novelty: f32,
241 temporal_recency: f32,
242 content_type_prior: f32,
243 goal_utility: f32,
244 ) -> f32 {
245 future_utility * self.weights.future_utility
246 + factual_confidence * self.weights.factual_confidence
247 + semantic_novelty * self.weights.semantic_novelty
248 + temporal_recency * self.weights.temporal_recency
249 + content_type_prior * self.weights.content_type_prior
250 + goal_utility * self.weights.goal_utility
251 }
252}
253
254#[must_use]
258pub fn compute_factual_confidence(content: &str) -> f32 {
259 const HEDGING_MARKERS: &[&str] = &[
261 "maybe",
262 "might",
263 "perhaps",
264 "i think",
265 "i believe",
266 "not sure",
267 "could be",
268 "possibly",
269 "probably",
270 "uncertain",
271 "not certain",
272 "i'm not sure",
273 "im not sure",
274 "not confident",
275 ];
276 let lower = content.to_lowercase();
277 let matches = HEDGING_MARKERS
278 .iter()
279 .filter(|&&m| lower.contains(m))
280 .count();
281 #[allow(clippy::cast_precision_loss)]
283 let penalty = (matches as f32) * 0.1;
284 (1.0 - penalty).max(0.2)
285}
286
287#[must_use]
292pub fn compute_content_type_prior(role: &str) -> f32 {
293 match role {
294 "user" => 0.7,
295 "assistant" => 0.6,
296 "tool" | "tool_result" => 0.8,
297 "system" => 0.3,
298 _ => 0.5,
299 }
300}
301
302async fn compute_semantic_novelty(
306 content: &str,
307 provider: &AnyProvider,
308 qdrant: Option<&Arc<EmbeddingStore>>,
309) -> f32 {
310 let Some(store) = qdrant else {
311 return 1.0;
312 };
313 if !provider.supports_embeddings() {
314 return 1.0;
315 }
316 let vector = match provider.embed(content).await {
317 Ok(v) => v,
318 Err(e) => {
319 tracing::debug!(error = %e, "A-MAC: failed to embed for novelty, using 1.0");
320 return 1.0;
321 }
322 };
323 let Ok(vector_size) = u64::try_from(vector.len()) else {
324 return 1.0;
325 };
326 if let Err(e) = store.ensure_collection(vector_size).await {
327 tracing::debug!(error = %e, "A-MAC: collection not ready for novelty check");
328 return 1.0;
329 }
330 let results = match store.search(&vector, 3, None).await {
331 Ok(r) => r,
332 Err(e) => {
333 tracing::debug!(error = %e, "A-MAC: novelty search failed, using 1.0");
334 return 1.0;
335 }
336 };
337 let max_sim = results.iter().map(|r| r.score).fold(0.0f32, f32::max);
338 (1.0 - max_sim).max(0.0)
339}
340
341async fn compute_future_utility(content: &str, role: &str, provider: &AnyProvider) -> f32 {
345 use zeph_llm::provider::{Message, MessageMetadata, Role};
346
347 let system = "You are a memory relevance judge. Rate how likely this message will be \
348 referenced in future conversations on a scale of 0.0 to 1.0. \
349 Respond with ONLY a decimal number between 0.0 and 1.0, nothing else.";
350
351 let user = format!(
352 "Role: {role}\nContent: {}\n\nFuture utility score (0.0-1.0):",
353 content.chars().take(500).collect::<String>()
354 );
355
356 let messages = vec![
357 Message {
358 role: Role::System,
359 content: system.to_owned(),
360 parts: vec![],
361 metadata: MessageMetadata::default(),
362 },
363 Message {
364 role: Role::User,
365 content: user,
366 parts: vec![],
367 metadata: MessageMetadata::default(),
368 },
369 ];
370
371 let result = match tokio::time::timeout(Duration::from_secs(8), provider.chat(&messages)).await
372 {
373 Ok(Ok(r)) => r,
374 Ok(Err(e)) => {
375 tracing::debug!(error = %e, "A-MAC: future_utility LLM call failed, using 0.5");
376 return 0.5;
377 }
378 Err(_) => {
379 tracing::debug!("A-MAC: future_utility LLM timed out, using 0.5");
380 return 0.5;
381 }
382 };
383
384 result.trim().parse::<f32>().unwrap_or(0.5).clamp(0.0, 1.0)
385}
386
387async fn compute_goal_utility(
398 content: &str,
399 goal_text: &str,
400 gate: &GoalGateConfig,
401 provider: &AnyProvider,
402 qdrant: Option<&Arc<EmbeddingStore>>,
403) -> f32 {
404 use zeph_llm::provider::LlmProvider as _;
405
406 if !provider.supports_embeddings() {
407 return 0.0;
408 }
409
410 let goal_emb = match provider.embed(goal_text).await {
411 Ok(v) => v,
412 Err(e) => {
413 tracing::debug!(error = %e, "goal_utility: failed to embed goal text, using 0.0");
414 return 0.0;
415 }
416 };
417 let content_emb = match provider.embed(content).await {
418 Ok(v) => v,
419 Err(e) => {
420 tracing::debug!(error = %e, "goal_utility: failed to embed content, using 0.0");
421 return 0.0;
422 }
423 };
424
425 let _ = qdrant; let similarity = cosine_similarity(&goal_emb, &content_emb);
429
430 let borderline_lo = gate.threshold - 0.1;
432 let borderline_hi = gate.threshold + 0.1;
433 let in_borderline = similarity >= borderline_lo && similarity <= borderline_hi;
434
435 let final_similarity = if in_borderline {
436 if let Some(ref goal_provider) = gate.provider {
437 refine_goal_utility_llm(content, goal_text, similarity, goal_provider).await
438 } else {
439 similarity
440 }
441 } else {
442 similarity
443 };
444
445 if final_similarity < gate.threshold {
447 0.0
448 } else {
449 final_similarity.max(0.1)
450 }
451}
452
453async fn refine_goal_utility_llm(
457 content: &str,
458 goal_text: &str,
459 embedding_sim: f32,
460 provider: &AnyProvider,
461) -> f32 {
462 use zeph_llm::provider::{LlmProvider as _, Message, MessageMetadata, Role};
463
464 let system = "You are a memory relevance judge. Given a task goal and a candidate memory, \
465 rate how relevant the memory is to the goal on a scale of 0.0 to 1.0. \
466 Respond with ONLY a decimal number between 0.0 and 1.0, nothing else.";
467
468 let user = format!(
469 "Goal: {}\nMemory: {}\n\nRelevance score (0.0-1.0):",
470 goal_text.chars().take(200).collect::<String>(),
471 content.chars().take(300).collect::<String>(),
472 );
473
474 let messages = vec![
475 Message {
476 role: Role::System,
477 content: system.to_owned(),
478 parts: vec![],
479 metadata: MessageMetadata::default(),
480 },
481 Message {
482 role: Role::User,
483 content: user,
484 parts: vec![],
485 metadata: MessageMetadata::default(),
486 },
487 ];
488
489 let result = match tokio::time::timeout(Duration::from_secs(6), provider.chat(&messages)).await
490 {
491 Ok(Ok(r)) => r,
492 Ok(Err(e)) => {
493 tracing::debug!(error = %e, "goal_utility LLM refinement failed, using embedding sim");
494 return embedding_sim;
495 }
496 Err(_) => {
497 tracing::debug!("goal_utility LLM refinement timed out, using embedding sim");
498 return embedding_sim;
499 }
500 };
501
502 result
503 .trim()
504 .parse::<f32>()
505 .unwrap_or(embedding_sim)
506 .clamp(0.0, 1.0)
507}
508
509pub fn log_admission_decision(
513 decision: &AdmissionDecision,
514 content_preview: &str,
515 role: &str,
516 threshold: f32,
517) {
518 if decision.admitted {
519 tracing::trace!(
520 role,
521 composite_score = decision.composite_score,
522 threshold,
523 content_preview,
524 "A-MAC: admitted"
525 );
526 } else {
527 tracing::debug!(
528 role,
529 composite_score = decision.composite_score,
530 threshold,
531 future_utility = decision.factors.future_utility,
532 factual_confidence = decision.factors.factual_confidence,
533 semantic_novelty = decision.factors.semantic_novelty,
534 content_type_prior = decision.factors.content_type_prior,
535 content_preview,
536 "A-MAC: rejected"
537 );
538 }
539}
540
541#[derive(Debug)]
543pub struct AdmissionRejected {
544 pub composite_score: f32,
545 pub threshold: f32,
546}
547
548impl std::fmt::Display for AdmissionRejected {
549 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
550 write!(
551 f,
552 "A-MAC admission rejected (score={:.3} < threshold={:.3})",
553 self.composite_score, self.threshold
554 )
555 }
556}
557
558#[cfg(test)]
559mod tests {
560 use super::*;
561
562 #[test]
563 fn factual_confidence_no_hedging() {
564 assert!((compute_factual_confidence("The server uses TLS 1.3.") - 1.0).abs() < 0.01);
565 }
566
567 #[test]
568 fn factual_confidence_with_one_marker() {
569 let score = compute_factual_confidence("Maybe we should use TLS 1.3.");
570 assert!((score - 0.9).abs() < 0.01);
571 }
572
573 #[test]
574 fn factual_confidence_many_markers_floors_at_0_2() {
575 let content = "maybe i think perhaps possibly might not sure i believe";
576 let score = compute_factual_confidence(content);
577 assert!(score >= 0.2);
578 assert!(score < 0.5);
579 }
580
581 #[test]
582 fn content_type_prior_values() {
583 assert!((compute_content_type_prior("user") - 0.7).abs() < 0.01);
584 assert!((compute_content_type_prior("assistant") - 0.6).abs() < 0.01);
585 assert!((compute_content_type_prior("tool") - 0.8).abs() < 0.01);
586 assert!((compute_content_type_prior("system") - 0.3).abs() < 0.01);
587 assert!((compute_content_type_prior("unknown") - 0.5).abs() < 0.01);
588 }
589
590 #[test]
591 fn admission_control_admits_high_score() {
592 let weights = AdmissionWeights {
593 future_utility: 0.30,
594 factual_confidence: 0.15,
595 semantic_novelty: 0.30,
596 temporal_recency: 0.10,
597 content_type_prior: 0.15,
598 goal_utility: 0.0,
599 };
600 let ctrl = AdmissionControl::new(0.40, 0.15, weights);
601 let score = ctrl.weighted_score(1.0, 1.0, 1.0, 1.0, 1.0, 0.0);
603 assert!(score >= 0.99);
604 let admitted = score >= ctrl.threshold;
606 assert!(admitted);
607 }
608
609 #[test]
610 fn admission_control_rejects_low_score() {
611 let weights = AdmissionWeights {
612 future_utility: 0.30,
613 factual_confidence: 0.15,
614 semantic_novelty: 0.30,
615 temporal_recency: 0.10,
616 content_type_prior: 0.15,
617 goal_utility: 0.0,
618 };
619 let ctrl = AdmissionControl::new(0.40, 0.15, weights);
620 let score = ctrl.weighted_score(0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
622 assert!(score < ctrl.threshold);
623 }
624
625 #[test]
628 fn fast_path_admits_when_heuristic_above_threshold_plus_margin() {
629 let weights = AdmissionWeights {
630 future_utility: 0.20,
631 factual_confidence: 0.20,
632 semantic_novelty: 0.20,
633 temporal_recency: 0.20,
634 content_type_prior: 0.20,
635 goal_utility: 0.0,
636 };
637 let threshold = 0.40f32;
638 let margin = 0.15f32;
639 let ctrl = AdmissionControl::new(threshold, margin, weights);
640
641 let heuristic = ctrl.weighted_score(0.5, 1.0, 1.0, 1.0, 1.0, 0.0);
643 assert!(
645 heuristic >= threshold + margin,
646 "heuristic {heuristic} must exceed threshold+margin {}",
647 threshold + margin
648 );
649 let admitted = heuristic >= threshold + margin;
651 assert!(admitted, "fast path must admit without LLM call");
652 }
653
654 #[test]
656 fn slow_path_required_when_heuristic_below_threshold_plus_margin() {
657 let weights = AdmissionWeights {
658 future_utility: 0.40,
659 factual_confidence: 0.15,
660 semantic_novelty: 0.15,
661 temporal_recency: 0.15,
662 content_type_prior: 0.15,
663 goal_utility: 0.0,
664 };
665 let threshold = 0.50f32;
666 let margin = 0.20f32;
667 let ctrl = AdmissionControl::new(threshold, margin, weights);
668
669 let heuristic = ctrl.weighted_score(0.5, 0.3, 0.3, 0.3, 0.3, 0.0);
671 assert!(
672 heuristic < threshold + margin,
673 "heuristic {heuristic} must be below threshold+margin {}",
674 threshold + margin
675 );
676 }
677
678 #[test]
680 fn log_admission_decision_does_not_panic() {
681 let admitted_decision = AdmissionDecision {
682 admitted: true,
683 composite_score: 0.75,
684 factors: AdmissionFactors {
685 future_utility: 0.8,
686 factual_confidence: 0.9,
687 semantic_novelty: 0.7,
688 temporal_recency: 1.0,
689 content_type_prior: 0.7,
690 goal_utility: 0.0,
691 },
692 };
693 log_admission_decision(&admitted_decision, "preview text", "user", 0.40);
694
695 let rejected_decision = AdmissionDecision {
696 admitted: false,
697 composite_score: 0.20,
698 factors: AdmissionFactors {
699 future_utility: 0.1,
700 factual_confidence: 0.2,
701 semantic_novelty: 0.3,
702 temporal_recency: 1.0,
703 content_type_prior: 0.3,
704 goal_utility: 0.0,
705 },
706 };
707 log_admission_decision(&rejected_decision, "maybe short content", "assistant", 0.40);
708 }
709
710 #[test]
712 fn admission_rejected_display() {
713 let err = AdmissionRejected {
714 composite_score: 0.25,
715 threshold: 0.45,
716 };
717 let msg = format!("{err}");
718 assert!(msg.contains("0.250"));
719 assert!(msg.contains("0.450"));
720 }
721
722 #[test]
724 fn threshold_accessor() {
725 let weights = AdmissionWeights {
726 future_utility: 0.20,
727 factual_confidence: 0.20,
728 semantic_novelty: 0.20,
729 temporal_recency: 0.20,
730 content_type_prior: 0.20,
731 goal_utility: 0.0,
732 };
733 let ctrl = AdmissionControl::new(0.55, 0.10, weights);
734 assert!((ctrl.threshold() - 0.55).abs() < 0.001);
735 }
736
737 #[test]
739 fn content_type_prior_tool_result_alias() {
740 assert!((compute_content_type_prior("tool_result") - 0.8).abs() < 0.01);
741 }
742
743 #[test]
746 fn cosine_similarity_identical_vectors() {
747 let v = vec![1.0f32, 0.0, 0.0];
748 assert!((cosine_similarity(&v, &v) - 1.0).abs() < 1e-6);
749 }
750
751 #[test]
752 fn cosine_similarity_orthogonal_vectors() {
753 let a = vec![1.0f32, 0.0];
754 let b = vec![0.0f32, 1.0];
755 assert!(cosine_similarity(&a, &b).abs() < 1e-6);
756 }
757
758 #[test]
759 fn cosine_similarity_zero_vector_returns_zero() {
760 let z = vec![0.0f32, 0.0, 0.0];
761 let v = vec![1.0f32, 2.0, 3.0];
762 assert!(cosine_similarity(&z, &v).abs() < f32::EPSILON);
763 }
764
765 #[test]
766 fn cosine_similarity_length_mismatch_returns_zero() {
767 let a = vec![1.0f32, 0.0];
768 let b = vec![1.0f32, 0.0, 0.0];
769 assert!(cosine_similarity(&a, &b).abs() < f32::EPSILON);
770 }
771
772 #[test]
775 fn with_goal_gate_sets_goal_utility_weight() {
776 let weights = AdmissionWeights {
777 future_utility: 0.30,
778 factual_confidence: 0.15,
779 semantic_novelty: 0.30,
780 temporal_recency: 0.10,
781 content_type_prior: 0.15,
782 goal_utility: 0.0,
783 };
784 let ctrl = AdmissionControl::new(0.40, 0.15, weights);
785 let config = GoalGateConfig {
786 weight: 0.20,
787 threshold: 0.5,
788 provider: None,
789 };
790 let ctrl = ctrl.with_goal_gate(config);
791 assert!(
792 ctrl.weights.goal_utility > 0.0,
793 "goal_utility must be nonzero after with_goal_gate"
794 );
795 let w = &ctrl.weights;
797 let total = w.future_utility
798 + w.factual_confidence
799 + w.semantic_novelty
800 + w.temporal_recency
801 + w.content_type_prior
802 + w.goal_utility;
803 assert!(
804 (total - 1.0).abs() < 0.01,
805 "normalized weights must sum to 1.0, got {total}"
806 );
807 }
808
809 #[test]
810 fn with_goal_gate_zero_weight_leaves_goal_utility_at_zero() {
811 let weights = AdmissionWeights {
812 future_utility: 0.30,
813 factual_confidence: 0.15,
814 semantic_novelty: 0.30,
815 temporal_recency: 0.10,
816 content_type_prior: 0.15,
817 goal_utility: 0.0,
818 };
819 let ctrl = AdmissionControl::new(0.40, 0.15, weights);
820 let config = GoalGateConfig {
821 weight: 0.0,
822 threshold: 0.5,
823 provider: None,
824 };
825 let ctrl = ctrl.with_goal_gate(config);
826 assert!(ctrl.weights.goal_utility.abs() < f32::EPSILON);
827 }
828}