Skip to main content

zeph_memory/
admission.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! A-MAC adaptive memory admission control (#2317).
5//!
6//! Write-time gate inserted before `SQLite` persistence in `remember()` and `remember_with_parts()`.
7//! Evaluates 5 factors and rejects messages below the configured threshold.
8
9use std::sync::Arc;
10use std::time::Duration;
11
12use zeph_llm::any::AnyProvider;
13use zeph_llm::provider::LlmProvider as _;
14
15use crate::embedding_store::EmbeddingStore;
16
17/// Per-factor scores for the admission decision.
18#[derive(Debug, Clone, serde::Serialize)]
19pub struct AdmissionFactors {
20    /// LLM-estimated reuse probability. `[0, 1]`. Set to 0.5 on fast path or LLM failure.
21    pub future_utility: f32,
22    /// Inverse hedging heuristic: high confidence → high score. `[0, 1]`.
23    pub factual_confidence: f32,
24    /// `1.0 - max_similarity_to_top3_neighbors`. `[0, 1]`. 1.0 when memory is empty.
25    pub semantic_novelty: f32,
26    /// Always `1.0` at write time (decay applied at recall). `[0, 1]`.
27    pub temporal_recency: f32,
28    /// Prior based on message role. `[0, 1]`.
29    pub content_type_prior: f32,
30}
31
32/// Result of an admission evaluation.
33#[derive(Debug, Clone)]
34pub struct AdmissionDecision {
35    pub admitted: bool,
36    pub composite_score: f32,
37    pub factors: AdmissionFactors,
38}
39
40/// Normalized weights for the composite score.
41#[derive(Debug, Clone, Copy)]
42pub struct AdmissionWeights {
43    pub future_utility: f32,
44    pub factual_confidence: f32,
45    pub semantic_novelty: f32,
46    pub temporal_recency: f32,
47    pub content_type_prior: f32,
48}
49
50impl AdmissionWeights {
51    /// Return a copy with all fields clamped to `>= 0.0` and normalized so they sum to `1.0`.
52    ///
53    /// Falls back to equal weights when the sum is effectively zero (all fields were zero/negative).
54    #[must_use]
55    pub fn normalized(&self) -> Self {
56        let fu = self.future_utility.max(0.0);
57        let fc = self.factual_confidence.max(0.0);
58        let sn = self.semantic_novelty.max(0.0);
59        let tr = self.temporal_recency.max(0.0);
60        let cp = self.content_type_prior.max(0.0);
61        let sum = fu + fc + sn + tr + cp;
62        if sum <= f32::EPSILON {
63            // Equal fallback weights.
64            return Self {
65                future_utility: 0.2,
66                factual_confidence: 0.2,
67                semantic_novelty: 0.2,
68                temporal_recency: 0.2,
69                content_type_prior: 0.2,
70            };
71        }
72        Self {
73            future_utility: fu / sum,
74            factual_confidence: fc / sum,
75            semantic_novelty: sn / sum,
76            temporal_recency: tr / sum,
77            content_type_prior: cp / sum,
78        }
79    }
80}
81
82/// A-MAC admission controller.
83pub struct AdmissionControl {
84    threshold: f32,
85    fast_path_margin: f32,
86    weights: AdmissionWeights,
87    /// Dedicated provider for LLM-based evaluation. Falls back to the caller-supplied provider
88    /// when `None` (e.g. in tests or when `admission_provider` is not configured).
89    provider: Option<AnyProvider>,
90}
91
92impl AdmissionControl {
93    #[must_use]
94    pub fn new(threshold: f32, fast_path_margin: f32, weights: AdmissionWeights) -> Self {
95        Self {
96            threshold,
97            fast_path_margin,
98            weights: weights.normalized(),
99            provider: None,
100        }
101    }
102
103    /// Attach a dedicated LLM provider for `future_utility` evaluation.
104    ///
105    /// When set, this provider is used instead of the caller-supplied fallback.
106    #[must_use]
107    pub fn with_provider(mut self, provider: AnyProvider) -> Self {
108        self.provider = Some(provider);
109        self
110    }
111
112    /// Return the configured admission threshold.
113    #[must_use]
114    pub fn threshold(&self) -> f32 {
115        self.threshold
116    }
117
118    /// Evaluate admission for a message.
119    ///
120    /// Fast path: skips LLM when heuristic-only score is already above `threshold + fast_path_margin`.
121    /// Slow path: calls LLM for `future_utility` when borderline.
122    ///
123    /// On LLM failure, `future_utility` defaults to `0.5` (neutral).
124    pub async fn evaluate(
125        &self,
126        content: &str,
127        role: &str,
128        fallback_provider: &AnyProvider,
129        qdrant: Option<&Arc<EmbeddingStore>>,
130    ) -> AdmissionDecision {
131        let effective_provider = self.provider.as_ref().unwrap_or(fallback_provider);
132        let factual_confidence = compute_factual_confidence(content);
133        let temporal_recency = 1.0f32;
134        let content_type_prior = compute_content_type_prior(role);
135
136        // Semantic novelty requires an async embedding search.
137        let semantic_novelty = compute_semantic_novelty(content, effective_provider, qdrant).await;
138
139        // Heuristic-only composite (future_utility treated as 0.5 neutral placeholder).
140        let heuristic_score = self.weighted_score(
141            0.5,
142            factual_confidence,
143            semantic_novelty,
144            temporal_recency,
145            content_type_prior,
146        );
147
148        // Fast path: admit without LLM if score is clearly above threshold + margin.
149        let future_utility = if heuristic_score >= self.threshold + self.fast_path_margin {
150            0.5 // not used in final score since we admit early, but kept for audit
151        } else {
152            compute_future_utility(content, role, effective_provider).await
153        };
154
155        let composite_score = self.weighted_score(
156            future_utility,
157            factual_confidence,
158            semantic_novelty,
159            temporal_recency,
160            content_type_prior,
161        );
162
163        let admitted = composite_score >= self.threshold
164            || heuristic_score >= self.threshold + self.fast_path_margin;
165
166        AdmissionDecision {
167            admitted,
168            composite_score,
169            factors: AdmissionFactors {
170                future_utility,
171                factual_confidence,
172                semantic_novelty,
173                temporal_recency,
174                content_type_prior,
175            },
176        }
177    }
178
179    fn weighted_score(
180        &self,
181        future_utility: f32,
182        factual_confidence: f32,
183        semantic_novelty: f32,
184        temporal_recency: f32,
185        content_type_prior: f32,
186    ) -> f32 {
187        future_utility * self.weights.future_utility
188            + factual_confidence * self.weights.factual_confidence
189            + semantic_novelty * self.weights.semantic_novelty
190            + temporal_recency * self.weights.temporal_recency
191            + content_type_prior * self.weights.content_type_prior
192    }
193}
194
195/// Heuristic: detect hedging markers and compute confidence score.
196///
197/// Returns `1.0` for confident content, lower for content with hedging language.
198#[must_use]
199pub fn compute_factual_confidence(content: &str) -> f32 {
200    // Common English hedging markers. Content in other languages scores 1.0 (no penalty).
201    const HEDGING_MARKERS: &[&str] = &[
202        "maybe",
203        "might",
204        "perhaps",
205        "i think",
206        "i believe",
207        "not sure",
208        "could be",
209        "possibly",
210        "probably",
211        "uncertain",
212        "not certain",
213        "i'm not sure",
214        "im not sure",
215        "not confident",
216    ];
217    let lower = content.to_lowercase();
218    let matches = HEDGING_MARKERS
219        .iter()
220        .filter(|&&m| lower.contains(m))
221        .count();
222    // Each hedging marker reduces confidence by 0.1, min 0.2.
223    #[allow(clippy::cast_precision_loss)]
224    let penalty = (matches as f32) * 0.1;
225    (1.0 - penalty).max(0.2)
226}
227
228/// Prior score based on message role.
229///
230/// Tool results (role "tool") are treated as high-value since they contain factual data.
231/// The table is not symmetric to role importance — it's calibrated by typical content density.
232#[must_use]
233pub fn compute_content_type_prior(role: &str) -> f32 {
234    match role {
235        "user" => 0.7,
236        "assistant" => 0.6,
237        "tool" | "tool_result" => 0.8,
238        "system" => 0.3,
239        _ => 0.5,
240    }
241}
242
243/// Compute semantic novelty as `1.0 - max_cosine_similarity_to_top3_neighbors`.
244///
245/// Returns `1.0` when the memory is empty (everything is novel at cold start).
246async fn compute_semantic_novelty(
247    content: &str,
248    provider: &AnyProvider,
249    qdrant: Option<&Arc<EmbeddingStore>>,
250) -> f32 {
251    let Some(store) = qdrant else {
252        return 1.0;
253    };
254    if !provider.supports_embeddings() {
255        return 1.0;
256    }
257    let vector = match provider.embed(content).await {
258        Ok(v) => v,
259        Err(e) => {
260            tracing::debug!(error = %e, "A-MAC: failed to embed for novelty, using 1.0");
261            return 1.0;
262        }
263    };
264    let Ok(vector_size) = u64::try_from(vector.len()) else {
265        return 1.0;
266    };
267    if let Err(e) = store.ensure_collection(vector_size).await {
268        tracing::debug!(error = %e, "A-MAC: collection not ready for novelty check");
269        return 1.0;
270    }
271    let results = match store.search(&vector, 3, None).await {
272        Ok(r) => r,
273        Err(e) => {
274            tracing::debug!(error = %e, "A-MAC: novelty search failed, using 1.0");
275            return 1.0;
276        }
277    };
278    let max_sim = results.iter().map(|r| r.score).fold(0.0f32, f32::max);
279    (1.0 - max_sim).max(0.0)
280}
281
282/// LLM-based future utility estimate.
283///
284/// On timeout or error, returns `0.5` (neutral — no bias toward admit or reject).
285async fn compute_future_utility(content: &str, role: &str, provider: &AnyProvider) -> f32 {
286    use zeph_llm::provider::{Message, MessageMetadata, Role};
287
288    let system = "You are a memory relevance judge. Rate how likely this message will be \
289        referenced in future conversations on a scale of 0.0 to 1.0. \
290        Respond with ONLY a decimal number between 0.0 and 1.0, nothing else.";
291
292    let user = format!(
293        "Role: {role}\nContent: {}\n\nFuture utility score (0.0-1.0):",
294        content.chars().take(500).collect::<String>()
295    );
296
297    let messages = vec![
298        Message {
299            role: Role::System,
300            content: system.to_owned(),
301            parts: vec![],
302            metadata: MessageMetadata::default(),
303        },
304        Message {
305            role: Role::User,
306            content: user,
307            parts: vec![],
308            metadata: MessageMetadata::default(),
309        },
310    ];
311
312    let result = match tokio::time::timeout(Duration::from_secs(8), provider.chat(&messages)).await
313    {
314        Ok(Ok(r)) => r,
315        Ok(Err(e)) => {
316            tracing::debug!(error = %e, "A-MAC: future_utility LLM call failed, using 0.5");
317            return 0.5;
318        }
319        Err(_) => {
320            tracing::debug!("A-MAC: future_utility LLM timed out, using 0.5");
321            return 0.5;
322        }
323    };
324
325    result.trim().parse::<f32>().unwrap_or(0.5).clamp(0.0, 1.0)
326}
327
328/// Log an admission decision to the audit log via `tracing`.
329///
330/// Rejections are always logged at debug level. Admissions are trace-level.
331pub fn log_admission_decision(
332    decision: &AdmissionDecision,
333    content_preview: &str,
334    role: &str,
335    threshold: f32,
336) {
337    if decision.admitted {
338        tracing::trace!(
339            role,
340            composite_score = decision.composite_score,
341            threshold,
342            content_preview,
343            "A-MAC: admitted"
344        );
345    } else {
346        tracing::debug!(
347            role,
348            composite_score = decision.composite_score,
349            threshold,
350            future_utility = decision.factors.future_utility,
351            factual_confidence = decision.factors.factual_confidence,
352            semantic_novelty = decision.factors.semantic_novelty,
353            content_type_prior = decision.factors.content_type_prior,
354            content_preview,
355            "A-MAC: rejected"
356        );
357    }
358}
359
360/// Error type for admission-rejected persists.
361#[derive(Debug)]
362pub struct AdmissionRejected {
363    pub composite_score: f32,
364    pub threshold: f32,
365}
366
367impl std::fmt::Display for AdmissionRejected {
368    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
369        write!(
370            f,
371            "A-MAC admission rejected (score={:.3} < threshold={:.3})",
372            self.composite_score, self.threshold
373        )
374    }
375}
376
377#[cfg(test)]
378mod tests {
379    use super::*;
380
381    #[test]
382    fn factual_confidence_no_hedging() {
383        assert!((compute_factual_confidence("The server uses TLS 1.3.") - 1.0).abs() < 0.01);
384    }
385
386    #[test]
387    fn factual_confidence_with_one_marker() {
388        let score = compute_factual_confidence("Maybe we should use TLS 1.3.");
389        assert!((score - 0.9).abs() < 0.01);
390    }
391
392    #[test]
393    fn factual_confidence_many_markers_floors_at_0_2() {
394        let content = "maybe i think perhaps possibly might not sure i believe";
395        let score = compute_factual_confidence(content);
396        assert!(score >= 0.2);
397        assert!(score < 0.5);
398    }
399
400    #[test]
401    fn content_type_prior_values() {
402        assert!((compute_content_type_prior("user") - 0.7).abs() < 0.01);
403        assert!((compute_content_type_prior("assistant") - 0.6).abs() < 0.01);
404        assert!((compute_content_type_prior("tool") - 0.8).abs() < 0.01);
405        assert!((compute_content_type_prior("system") - 0.3).abs() < 0.01);
406        assert!((compute_content_type_prior("unknown") - 0.5).abs() < 0.01);
407    }
408
409    #[test]
410    fn admission_control_admits_high_score() {
411        let weights = AdmissionWeights {
412            future_utility: 0.30,
413            factual_confidence: 0.15,
414            semantic_novelty: 0.30,
415            temporal_recency: 0.10,
416            content_type_prior: 0.15,
417        };
418        let ctrl = AdmissionControl::new(0.40, 0.15, weights);
419        // Score all factors at 1.0 → composite = 1.0.
420        let score = ctrl.weighted_score(1.0, 1.0, 1.0, 1.0, 1.0);
421        assert!(score >= 0.99);
422        // Admitted when score >= threshold.
423        let admitted = score >= ctrl.threshold;
424        assert!(admitted);
425    }
426
427    #[test]
428    fn admission_control_rejects_low_score() {
429        let weights = AdmissionWeights {
430            future_utility: 0.30,
431            factual_confidence: 0.15,
432            semantic_novelty: 0.30,
433            temporal_recency: 0.10,
434            content_type_prior: 0.15,
435        };
436        let ctrl = AdmissionControl::new(0.40, 0.15, weights);
437        // Score all factors at 0.0 → composite = 0.0.
438        let score = ctrl.weighted_score(0.0, 0.0, 0.0, 0.0, 0.0);
439        assert!(score < ctrl.threshold);
440    }
441
442    // Test: fast-path score above threshold + margin bypasses slow-path (LLM call skipped).
443    // We verify the branch logic in weighted_score: if heuristic >= threshold + margin, admitted.
444    #[test]
445    fn fast_path_admits_when_heuristic_above_threshold_plus_margin() {
446        let weights = AdmissionWeights {
447            future_utility: 0.20,
448            factual_confidence: 0.20,
449            semantic_novelty: 0.20,
450            temporal_recency: 0.20,
451            content_type_prior: 0.20,
452        };
453        let threshold = 0.40f32;
454        let margin = 0.15f32;
455        let ctrl = AdmissionControl::new(threshold, margin, weights);
456
457        // All non-future_utility factors at 1.0; future_utility treated as 0.5 (fast path neutral).
458        let heuristic = ctrl.weighted_score(0.5, 1.0, 1.0, 1.0, 1.0);
459        // heuristic = 0.5*0.2 + 1.0*0.2 + 1.0*0.2 + 1.0*0.2 + 1.0*0.2 = 0.1 + 0.8 = 0.9
460        assert!(
461            heuristic >= threshold + margin,
462            "heuristic {heuristic} must exceed threshold+margin {}",
463            threshold + margin
464        );
465        // In evaluate(), admitted = composite >= threshold || heuristic >= threshold + margin.
466        let admitted = heuristic >= threshold + margin;
467        assert!(admitted, "fast path must admit without LLM call");
468    }
469
470    // Test: slow-path engages when heuristic is below threshold + margin.
471    #[test]
472    fn slow_path_required_when_heuristic_below_threshold_plus_margin() {
473        let weights = AdmissionWeights {
474            future_utility: 0.40,
475            factual_confidence: 0.15,
476            semantic_novelty: 0.15,
477            temporal_recency: 0.15,
478            content_type_prior: 0.15,
479        };
480        let threshold = 0.50f32;
481        let margin = 0.20f32;
482        let ctrl = AdmissionControl::new(threshold, margin, weights);
483
484        // All factors low — heuristic will be below threshold + margin.
485        let heuristic = ctrl.weighted_score(0.5, 0.3, 0.3, 0.3, 0.3);
486        assert!(
487            heuristic < threshold + margin,
488            "heuristic {heuristic} must be below threshold+margin {}",
489            threshold + margin
490        );
491    }
492
493    // Test: log_admission_decision runs without panic for both admitted and rejected.
494    #[test]
495    fn log_admission_decision_does_not_panic() {
496        let admitted_decision = AdmissionDecision {
497            admitted: true,
498            composite_score: 0.75,
499            factors: AdmissionFactors {
500                future_utility: 0.8,
501                factual_confidence: 0.9,
502                semantic_novelty: 0.7,
503                temporal_recency: 1.0,
504                content_type_prior: 0.7,
505            },
506        };
507        log_admission_decision(&admitted_decision, "preview text", "user", 0.40);
508
509        let rejected_decision = AdmissionDecision {
510            admitted: false,
511            composite_score: 0.20,
512            factors: AdmissionFactors {
513                future_utility: 0.1,
514                factual_confidence: 0.2,
515                semantic_novelty: 0.3,
516                temporal_recency: 1.0,
517                content_type_prior: 0.3,
518            },
519        };
520        log_admission_decision(&rejected_decision, "maybe short content", "assistant", 0.40);
521    }
522
523    // Test: AdmissionRejected Display formats correctly.
524    #[test]
525    fn admission_rejected_display() {
526        let err = AdmissionRejected {
527            composite_score: 0.25,
528            threshold: 0.45,
529        };
530        let msg = format!("{err}");
531        assert!(msg.contains("0.250"));
532        assert!(msg.contains("0.450"));
533    }
534
535    // Test: threshold() accessor returns the configured value.
536    #[test]
537    fn threshold_accessor() {
538        let weights = AdmissionWeights {
539            future_utility: 0.20,
540            factual_confidence: 0.20,
541            semantic_novelty: 0.20,
542            temporal_recency: 0.20,
543            content_type_prior: 0.20,
544        };
545        let ctrl = AdmissionControl::new(0.55, 0.10, weights);
546        assert!((ctrl.threshold() - 0.55).abs() < 0.001);
547    }
548
549    // Test: content_type_prior for "tool_result" alias.
550    #[test]
551    fn content_type_prior_tool_result_alias() {
552        assert!((compute_content_type_prior("tool_result") - 0.8).abs() < 0.01);
553    }
554}