1use std::sync::Arc;
10use std::time::Duration;
11
12use zeph_llm::any::AnyProvider;
13use zeph_llm::provider::LlmProvider as _;
14
15use crate::embedding_store::EmbeddingStore;
16
17#[derive(Debug, Clone, serde::Serialize)]
19pub struct AdmissionFactors {
20 pub future_utility: f32,
22 pub factual_confidence: f32,
24 pub semantic_novelty: f32,
26 pub temporal_recency: f32,
28 pub content_type_prior: f32,
30}
31
32#[derive(Debug, Clone)]
34pub struct AdmissionDecision {
35 pub admitted: bool,
36 pub composite_score: f32,
37 pub factors: AdmissionFactors,
38}
39
40#[derive(Debug, Clone, Copy)]
42pub struct AdmissionWeights {
43 pub future_utility: f32,
44 pub factual_confidence: f32,
45 pub semantic_novelty: f32,
46 pub temporal_recency: f32,
47 pub content_type_prior: f32,
48}
49
50impl AdmissionWeights {
51 #[must_use]
55 pub fn normalized(&self) -> Self {
56 let fu = self.future_utility.max(0.0);
57 let fc = self.factual_confidence.max(0.0);
58 let sn = self.semantic_novelty.max(0.0);
59 let tr = self.temporal_recency.max(0.0);
60 let cp = self.content_type_prior.max(0.0);
61 let sum = fu + fc + sn + tr + cp;
62 if sum <= f32::EPSILON {
63 return Self {
65 future_utility: 0.2,
66 factual_confidence: 0.2,
67 semantic_novelty: 0.2,
68 temporal_recency: 0.2,
69 content_type_prior: 0.2,
70 };
71 }
72 Self {
73 future_utility: fu / sum,
74 factual_confidence: fc / sum,
75 semantic_novelty: sn / sum,
76 temporal_recency: tr / sum,
77 content_type_prior: cp / sum,
78 }
79 }
80}
81
82pub struct AdmissionControl {
84 threshold: f32,
85 fast_path_margin: f32,
86 weights: AdmissionWeights,
87 provider: Option<AnyProvider>,
90}
91
92impl AdmissionControl {
93 #[must_use]
94 pub fn new(threshold: f32, fast_path_margin: f32, weights: AdmissionWeights) -> Self {
95 Self {
96 threshold,
97 fast_path_margin,
98 weights: weights.normalized(),
99 provider: None,
100 }
101 }
102
103 #[must_use]
107 pub fn with_provider(mut self, provider: AnyProvider) -> Self {
108 self.provider = Some(provider);
109 self
110 }
111
112 #[must_use]
114 pub fn threshold(&self) -> f32 {
115 self.threshold
116 }
117
118 pub async fn evaluate(
125 &self,
126 content: &str,
127 role: &str,
128 fallback_provider: &AnyProvider,
129 qdrant: Option<&Arc<EmbeddingStore>>,
130 ) -> AdmissionDecision {
131 let effective_provider = self.provider.as_ref().unwrap_or(fallback_provider);
132 let factual_confidence = compute_factual_confidence(content);
133 let temporal_recency = 1.0f32;
134 let content_type_prior = compute_content_type_prior(role);
135
136 let semantic_novelty = compute_semantic_novelty(content, effective_provider, qdrant).await;
138
139 let heuristic_score = self.weighted_score(
141 0.5,
142 factual_confidence,
143 semantic_novelty,
144 temporal_recency,
145 content_type_prior,
146 );
147
148 let future_utility = if heuristic_score >= self.threshold + self.fast_path_margin {
150 0.5 } else {
152 compute_future_utility(content, role, effective_provider).await
153 };
154
155 let composite_score = self.weighted_score(
156 future_utility,
157 factual_confidence,
158 semantic_novelty,
159 temporal_recency,
160 content_type_prior,
161 );
162
163 let admitted = composite_score >= self.threshold
164 || heuristic_score >= self.threshold + self.fast_path_margin;
165
166 AdmissionDecision {
167 admitted,
168 composite_score,
169 factors: AdmissionFactors {
170 future_utility,
171 factual_confidence,
172 semantic_novelty,
173 temporal_recency,
174 content_type_prior,
175 },
176 }
177 }
178
179 fn weighted_score(
180 &self,
181 future_utility: f32,
182 factual_confidence: f32,
183 semantic_novelty: f32,
184 temporal_recency: f32,
185 content_type_prior: f32,
186 ) -> f32 {
187 future_utility * self.weights.future_utility
188 + factual_confidence * self.weights.factual_confidence
189 + semantic_novelty * self.weights.semantic_novelty
190 + temporal_recency * self.weights.temporal_recency
191 + content_type_prior * self.weights.content_type_prior
192 }
193}
194
195#[must_use]
199pub fn compute_factual_confidence(content: &str) -> f32 {
200 const HEDGING_MARKERS: &[&str] = &[
202 "maybe",
203 "might",
204 "perhaps",
205 "i think",
206 "i believe",
207 "not sure",
208 "could be",
209 "possibly",
210 "probably",
211 "uncertain",
212 "not certain",
213 "i'm not sure",
214 "im not sure",
215 "not confident",
216 ];
217 let lower = content.to_lowercase();
218 let matches = HEDGING_MARKERS
219 .iter()
220 .filter(|&&m| lower.contains(m))
221 .count();
222 #[allow(clippy::cast_precision_loss)]
224 let penalty = (matches as f32) * 0.1;
225 (1.0 - penalty).max(0.2)
226}
227
228#[must_use]
233pub fn compute_content_type_prior(role: &str) -> f32 {
234 match role {
235 "user" => 0.7,
236 "assistant" => 0.6,
237 "tool" | "tool_result" => 0.8,
238 "system" => 0.3,
239 _ => 0.5,
240 }
241}
242
243async fn compute_semantic_novelty(
247 content: &str,
248 provider: &AnyProvider,
249 qdrant: Option<&Arc<EmbeddingStore>>,
250) -> f32 {
251 let Some(store) = qdrant else {
252 return 1.0;
253 };
254 if !provider.supports_embeddings() {
255 return 1.0;
256 }
257 let vector = match provider.embed(content).await {
258 Ok(v) => v,
259 Err(e) => {
260 tracing::debug!(error = %e, "A-MAC: failed to embed for novelty, using 1.0");
261 return 1.0;
262 }
263 };
264 let Ok(vector_size) = u64::try_from(vector.len()) else {
265 return 1.0;
266 };
267 if let Err(e) = store.ensure_collection(vector_size).await {
268 tracing::debug!(error = %e, "A-MAC: collection not ready for novelty check");
269 return 1.0;
270 }
271 let results = match store.search(&vector, 3, None).await {
272 Ok(r) => r,
273 Err(e) => {
274 tracing::debug!(error = %e, "A-MAC: novelty search failed, using 1.0");
275 return 1.0;
276 }
277 };
278 let max_sim = results.iter().map(|r| r.score).fold(0.0f32, f32::max);
279 (1.0 - max_sim).max(0.0)
280}
281
282async fn compute_future_utility(content: &str, role: &str, provider: &AnyProvider) -> f32 {
286 use zeph_llm::provider::{Message, MessageMetadata, Role};
287
288 let system = "You are a memory relevance judge. Rate how likely this message will be \
289 referenced in future conversations on a scale of 0.0 to 1.0. \
290 Respond with ONLY a decimal number between 0.0 and 1.0, nothing else.";
291
292 let user = format!(
293 "Role: {role}\nContent: {}\n\nFuture utility score (0.0-1.0):",
294 content.chars().take(500).collect::<String>()
295 );
296
297 let messages = vec![
298 Message {
299 role: Role::System,
300 content: system.to_owned(),
301 parts: vec![],
302 metadata: MessageMetadata::default(),
303 },
304 Message {
305 role: Role::User,
306 content: user,
307 parts: vec![],
308 metadata: MessageMetadata::default(),
309 },
310 ];
311
312 let result = match tokio::time::timeout(Duration::from_secs(8), provider.chat(&messages)).await
313 {
314 Ok(Ok(r)) => r,
315 Ok(Err(e)) => {
316 tracing::debug!(error = %e, "A-MAC: future_utility LLM call failed, using 0.5");
317 return 0.5;
318 }
319 Err(_) => {
320 tracing::debug!("A-MAC: future_utility LLM timed out, using 0.5");
321 return 0.5;
322 }
323 };
324
325 result.trim().parse::<f32>().unwrap_or(0.5).clamp(0.0, 1.0)
326}
327
328pub fn log_admission_decision(
332 decision: &AdmissionDecision,
333 content_preview: &str,
334 role: &str,
335 threshold: f32,
336) {
337 if decision.admitted {
338 tracing::trace!(
339 role,
340 composite_score = decision.composite_score,
341 threshold,
342 content_preview,
343 "A-MAC: admitted"
344 );
345 } else {
346 tracing::debug!(
347 role,
348 composite_score = decision.composite_score,
349 threshold,
350 future_utility = decision.factors.future_utility,
351 factual_confidence = decision.factors.factual_confidence,
352 semantic_novelty = decision.factors.semantic_novelty,
353 content_type_prior = decision.factors.content_type_prior,
354 content_preview,
355 "A-MAC: rejected"
356 );
357 }
358}
359
360#[derive(Debug)]
362pub struct AdmissionRejected {
363 pub composite_score: f32,
364 pub threshold: f32,
365}
366
367impl std::fmt::Display for AdmissionRejected {
368 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
369 write!(
370 f,
371 "A-MAC admission rejected (score={:.3} < threshold={:.3})",
372 self.composite_score, self.threshold
373 )
374 }
375}
376
377#[cfg(test)]
378mod tests {
379 use super::*;
380
381 #[test]
382 fn factual_confidence_no_hedging() {
383 assert!((compute_factual_confidence("The server uses TLS 1.3.") - 1.0).abs() < 0.01);
384 }
385
386 #[test]
387 fn factual_confidence_with_one_marker() {
388 let score = compute_factual_confidence("Maybe we should use TLS 1.3.");
389 assert!((score - 0.9).abs() < 0.01);
390 }
391
392 #[test]
393 fn factual_confidence_many_markers_floors_at_0_2() {
394 let content = "maybe i think perhaps possibly might not sure i believe";
395 let score = compute_factual_confidence(content);
396 assert!(score >= 0.2);
397 assert!(score < 0.5);
398 }
399
400 #[test]
401 fn content_type_prior_values() {
402 assert!((compute_content_type_prior("user") - 0.7).abs() < 0.01);
403 assert!((compute_content_type_prior("assistant") - 0.6).abs() < 0.01);
404 assert!((compute_content_type_prior("tool") - 0.8).abs() < 0.01);
405 assert!((compute_content_type_prior("system") - 0.3).abs() < 0.01);
406 assert!((compute_content_type_prior("unknown") - 0.5).abs() < 0.01);
407 }
408
409 #[test]
410 fn admission_control_admits_high_score() {
411 let weights = AdmissionWeights {
412 future_utility: 0.30,
413 factual_confidence: 0.15,
414 semantic_novelty: 0.30,
415 temporal_recency: 0.10,
416 content_type_prior: 0.15,
417 };
418 let ctrl = AdmissionControl::new(0.40, 0.15, weights);
419 let score = ctrl.weighted_score(1.0, 1.0, 1.0, 1.0, 1.0);
421 assert!(score >= 0.99);
422 let admitted = score >= ctrl.threshold;
424 assert!(admitted);
425 }
426
427 #[test]
428 fn admission_control_rejects_low_score() {
429 let weights = AdmissionWeights {
430 future_utility: 0.30,
431 factual_confidence: 0.15,
432 semantic_novelty: 0.30,
433 temporal_recency: 0.10,
434 content_type_prior: 0.15,
435 };
436 let ctrl = AdmissionControl::new(0.40, 0.15, weights);
437 let score = ctrl.weighted_score(0.0, 0.0, 0.0, 0.0, 0.0);
439 assert!(score < ctrl.threshold);
440 }
441
442 #[test]
445 fn fast_path_admits_when_heuristic_above_threshold_plus_margin() {
446 let weights = AdmissionWeights {
447 future_utility: 0.20,
448 factual_confidence: 0.20,
449 semantic_novelty: 0.20,
450 temporal_recency: 0.20,
451 content_type_prior: 0.20,
452 };
453 let threshold = 0.40f32;
454 let margin = 0.15f32;
455 let ctrl = AdmissionControl::new(threshold, margin, weights);
456
457 let heuristic = ctrl.weighted_score(0.5, 1.0, 1.0, 1.0, 1.0);
459 assert!(
461 heuristic >= threshold + margin,
462 "heuristic {heuristic} must exceed threshold+margin {}",
463 threshold + margin
464 );
465 let admitted = heuristic >= threshold + margin;
467 assert!(admitted, "fast path must admit without LLM call");
468 }
469
470 #[test]
472 fn slow_path_required_when_heuristic_below_threshold_plus_margin() {
473 let weights = AdmissionWeights {
474 future_utility: 0.40,
475 factual_confidence: 0.15,
476 semantic_novelty: 0.15,
477 temporal_recency: 0.15,
478 content_type_prior: 0.15,
479 };
480 let threshold = 0.50f32;
481 let margin = 0.20f32;
482 let ctrl = AdmissionControl::new(threshold, margin, weights);
483
484 let heuristic = ctrl.weighted_score(0.5, 0.3, 0.3, 0.3, 0.3);
486 assert!(
487 heuristic < threshold + margin,
488 "heuristic {heuristic} must be below threshold+margin {}",
489 threshold + margin
490 );
491 }
492
493 #[test]
495 fn log_admission_decision_does_not_panic() {
496 let admitted_decision = AdmissionDecision {
497 admitted: true,
498 composite_score: 0.75,
499 factors: AdmissionFactors {
500 future_utility: 0.8,
501 factual_confidence: 0.9,
502 semantic_novelty: 0.7,
503 temporal_recency: 1.0,
504 content_type_prior: 0.7,
505 },
506 };
507 log_admission_decision(&admitted_decision, "preview text", "user", 0.40);
508
509 let rejected_decision = AdmissionDecision {
510 admitted: false,
511 composite_score: 0.20,
512 factors: AdmissionFactors {
513 future_utility: 0.1,
514 factual_confidence: 0.2,
515 semantic_novelty: 0.3,
516 temporal_recency: 1.0,
517 content_type_prior: 0.3,
518 },
519 };
520 log_admission_decision(&rejected_decision, "maybe short content", "assistant", 0.40);
521 }
522
523 #[test]
525 fn admission_rejected_display() {
526 let err = AdmissionRejected {
527 composite_score: 0.25,
528 threshold: 0.45,
529 };
530 let msg = format!("{err}");
531 assert!(msg.contains("0.250"));
532 assert!(msg.contains("0.450"));
533 }
534
535 #[test]
537 fn threshold_accessor() {
538 let weights = AdmissionWeights {
539 future_utility: 0.20,
540 factual_confidence: 0.20,
541 semantic_novelty: 0.20,
542 temporal_recency: 0.20,
543 content_type_prior: 0.20,
544 };
545 let ctrl = AdmissionControl::new(0.55, 0.10, weights);
546 assert!((ctrl.threshold() - 0.55).abs() < 0.001);
547 }
548
549 #[test]
551 fn content_type_prior_tool_result_alias() {
552 assert!((compute_content_type_prior("tool_result") - 0.8).abs() < 0.01);
553 }
554}