Skip to main content

zeph_memory/
admission.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! A-MAC adaptive memory admission control (#2317).
5//!
6//! Write-time gate inserted before `SQLite` persistence in `remember()` and `remember_with_parts()`.
7//! Evaluates 5 factors and rejects messages below the configured threshold.
8
9use std::sync::Arc;
10use std::time::Duration;
11
12use zeph_llm::any::AnyProvider;
13use zeph_llm::provider::LlmProvider as _;
14
15use crate::embedding_store::EmbeddingStore;
16use zeph_common::math::cosine_similarity;
17
18/// Per-factor scores for the admission decision.
19#[derive(Debug, Clone, serde::Serialize)]
20pub struct AdmissionFactors {
21    /// LLM-estimated reuse probability. `[0, 1]`. Set to 0.5 on fast path or LLM failure.
22    pub future_utility: f32,
23    /// Inverse hedging heuristic: high confidence → high score. `[0, 1]`.
24    pub factual_confidence: f32,
25    /// `1.0 - max_similarity_to_top3_neighbors`. `[0, 1]`. 1.0 when memory is empty.
26    pub semantic_novelty: f32,
27    /// Always `1.0` at write time (decay applied at recall). `[0, 1]`.
28    pub temporal_recency: f32,
29    /// Prior based on message role. `[0, 1]`.
30    pub content_type_prior: f32,
31    /// Goal-conditioned utility (#2408). Cosine similarity between goal embedding and
32    /// candidate memory. `0.0` when `goal_conditioned_write = false` or goal text is absent/trivial.
33    pub goal_utility: f32,
34}
35
36/// Result of an admission evaluation.
37#[derive(Debug, Clone)]
38pub struct AdmissionDecision {
39    pub admitted: bool,
40    pub composite_score: f32,
41    pub factors: AdmissionFactors,
42}
43
44/// Normalized weights for the composite score.
45#[derive(Debug, Clone, Copy)]
46pub struct AdmissionWeights {
47    pub future_utility: f32,
48    pub factual_confidence: f32,
49    pub semantic_novelty: f32,
50    pub temporal_recency: f32,
51    pub content_type_prior: f32,
52    /// Goal-conditioned utility weight. `0.0` when goal gate is disabled.
53    pub goal_utility: f32,
54}
55
56impl AdmissionWeights {
57    /// Return a copy with all fields clamped to `>= 0.0` and normalized so they sum to `1.0`.
58    ///
59    /// Falls back to equal weights when the sum is effectively zero (all fields were zero/negative).
60    #[must_use]
61    pub fn normalized(&self) -> Self {
62        let fu = self.future_utility.max(0.0);
63        let fc = self.factual_confidence.max(0.0);
64        let sn = self.semantic_novelty.max(0.0);
65        let tr = self.temporal_recency.max(0.0);
66        let cp = self.content_type_prior.max(0.0);
67        let gu = self.goal_utility.max(0.0);
68        let sum = fu + fc + sn + tr + cp + gu;
69        if sum <= f32::EPSILON {
70            // Equal fallback weights (6 factors when goal gate is enabled).
71            return Self {
72                future_utility: 0.2,
73                factual_confidence: 0.2,
74                semantic_novelty: 0.2,
75                temporal_recency: 0.2,
76                content_type_prior: 0.2,
77                goal_utility: 0.0,
78            };
79        }
80        Self {
81            future_utility: fu / sum,
82            factual_confidence: fc / sum,
83            semantic_novelty: sn / sum,
84            temporal_recency: tr / sum,
85            content_type_prior: cp / sum,
86            goal_utility: gu / sum,
87        }
88    }
89}
90
91/// Goal-conditioned write gate configuration for `AdmissionControl`.
92#[derive(Debug, Clone)]
93pub struct GoalGateConfig {
94    /// Minimum cosine similarity to consider memory goal-relevant.
95    pub threshold: f32,
96    /// LLM provider for borderline refinement (similarity within 0.1 of threshold).
97    pub provider: Option<AnyProvider>,
98    /// Weight of the `goal_utility` factor in the composite score.
99    pub weight: f32,
100}
101
102/// A-MAC adaptive memory admission controller (#2317).
103///
104/// Evaluates five factors (future utility, factual confidence, semantic novelty,
105/// temporal recency, content-type prior) and rejects messages below the configured
106/// composite score threshold before they are persisted.
107///
108/// Optionally extended with a goal-conditioned write gate (#2408) that adds a
109/// sixth factor based on the cosine similarity between the current goal embedding
110/// and the candidate memory.
111///
112/// # Examples
113///
114/// ```rust,no_run
115/// use zeph_memory::{AdmissionControl, AdmissionWeights};
116///
117/// let weights = AdmissionWeights {
118///     future_utility: 0.3,
119///     factual_confidence: 0.2,
120///     semantic_novelty: 0.2,
121///     temporal_recency: 0.1,
122///     content_type_prior: 0.2,
123///     goal_utility: 0.0,
124/// };
125/// let controller = AdmissionControl::new(0.4, 0.1, weights);
126/// ```
127pub struct AdmissionControl {
128    threshold: f32,
129    fast_path_margin: f32,
130    weights: AdmissionWeights,
131    /// Dedicated provider for LLM-based evaluation. Falls back to the caller-supplied provider
132    /// when `None` (e.g. in tests or when `admission_provider` is not configured).
133    provider: Option<AnyProvider>,
134    /// Goal-conditioned write gate. `None` when `goal_conditioned_write = false`.
135    goal_gate: Option<GoalGateConfig>,
136}
137
138impl AdmissionControl {
139    /// Create a new admission controller.
140    ///
141    /// - `threshold` — composite score `[0, 1]` below which messages are rejected.
142    /// - `fast_path_margin` — when all non-LLM factors already push the score far above
143    ///   the threshold (by at least this margin), the LLM `future_utility` call is skipped.
144    /// - `weights` — factor weights; normalized automatically so they sum to `1.0`.
145    #[must_use]
146    pub fn new(threshold: f32, fast_path_margin: f32, weights: AdmissionWeights) -> Self {
147        Self {
148            threshold,
149            fast_path_margin,
150            weights: weights.normalized(),
151            provider: None,
152            goal_gate: None,
153        }
154    }
155
156    /// Attach a dedicated LLM provider for `future_utility` evaluation.
157    ///
158    /// When set, this provider is used instead of the caller-supplied fallback.
159    #[must_use]
160    pub fn with_provider(mut self, provider: AnyProvider) -> Self {
161        self.provider = Some(provider);
162        self
163    }
164
165    /// Enable goal-conditioned write gate (#2408).
166    #[must_use]
167    pub fn with_goal_gate(mut self, config: GoalGateConfig) -> Self {
168        // Redistribute goal_utility weight from future_utility.
169        let gu = config.weight.clamp(0.0, 1.0);
170        let mut weights = self.weights;
171        weights.goal_utility = gu;
172        // Reduce future_utility by the same amount (soft redistribution).
173        weights.future_utility = (weights.future_utility - gu).max(0.0);
174        self.weights = weights.normalized();
175        self.goal_gate = Some(config);
176        self
177    }
178
179    /// Return the configured admission threshold.
180    #[must_use]
181    pub fn threshold(&self) -> f32 {
182        self.threshold
183    }
184
185    /// Evaluate admission for a message.
186    ///
187    /// `goal_text`: optional current-turn goal context for goal-conditioned scoring.
188    /// Ignored when the goal gate is disabled or `goal_text` is `None`/trivial (< 10 chars).
189    ///
190    /// Fast path: skips LLM when heuristic-only score is already above `threshold + fast_path_margin`.
191    /// Slow path: calls LLM for `future_utility` when borderline.
192    ///
193    /// On LLM failure, `future_utility` defaults to `0.5` (neutral).
194    #[cfg_attr(
195        feature = "profiling",
196        tracing::instrument(name = "memory.admission", skip_all)
197    )]
198    pub async fn evaluate(
199        &self,
200        content: &str,
201        role: &str,
202        fallback_provider: &AnyProvider,
203        qdrant: Option<&Arc<EmbeddingStore>>,
204        goal_text: Option<&str>,
205    ) -> AdmissionDecision {
206        let effective_provider = self.provider.as_ref().unwrap_or(fallback_provider);
207        let factual_confidence = compute_factual_confidence(content);
208        let temporal_recency = 1.0f32;
209        let content_type_prior = compute_content_type_prior(role);
210
211        // Semantic novelty requires an async embedding search.
212        let semantic_novelty = compute_semantic_novelty(content, effective_provider, qdrant).await;
213
214        // Goal-conditioned utility (W3.1 fix: skip trivial goal text < 10 chars).
215        let goal_utility = match &self.goal_gate {
216            Some(gate) => {
217                let effective_goal = goal_text.filter(|t| t.trim().len() >= 10);
218                if let Some(goal) = effective_goal {
219                    compute_goal_utility(content, goal, gate, effective_provider, qdrant).await
220                } else {
221                    0.0
222                }
223            }
224            None => 0.0,
225        };
226
227        // Heuristic-only composite (future_utility treated as 0.5 neutral placeholder).
228        let heuristic_score = self.weighted_score(
229            0.5,
230            factual_confidence,
231            semantic_novelty,
232            temporal_recency,
233            content_type_prior,
234            goal_utility,
235        );
236
237        // Fast path: admit without LLM if score is clearly above threshold + margin.
238        let future_utility = if heuristic_score >= self.threshold + self.fast_path_margin {
239            0.5 // not used in final score since we admit early, but kept for audit
240        } else {
241            compute_future_utility(content, role, effective_provider).await
242        };
243
244        let composite_score = self.weighted_score(
245            future_utility,
246            factual_confidence,
247            semantic_novelty,
248            temporal_recency,
249            content_type_prior,
250            goal_utility,
251        );
252
253        let admitted = composite_score >= self.threshold
254            || heuristic_score >= self.threshold + self.fast_path_margin;
255
256        AdmissionDecision {
257            admitted,
258            composite_score,
259            factors: AdmissionFactors {
260                future_utility,
261                factual_confidence,
262                semantic_novelty,
263                temporal_recency,
264                content_type_prior,
265                goal_utility,
266            },
267        }
268    }
269
270    fn weighted_score(
271        &self,
272        future_utility: f32,
273        factual_confidence: f32,
274        semantic_novelty: f32,
275        temporal_recency: f32,
276        content_type_prior: f32,
277        goal_utility: f32,
278    ) -> f32 {
279        future_utility * self.weights.future_utility
280            + factual_confidence * self.weights.factual_confidence
281            + semantic_novelty * self.weights.semantic_novelty
282            + temporal_recency * self.weights.temporal_recency
283            + content_type_prior * self.weights.content_type_prior
284            + goal_utility * self.weights.goal_utility
285    }
286}
287
288/// Heuristic: detect hedging markers and compute confidence score.
289///
290/// Returns `1.0` for confident content, lower for content with hedging language.
291#[must_use]
292pub fn compute_factual_confidence(content: &str) -> f32 {
293    // Common English hedging markers. Content in other languages scores 1.0 (no penalty).
294    const HEDGING_MARKERS: &[&str] = &[
295        "maybe",
296        "might",
297        "perhaps",
298        "i think",
299        "i believe",
300        "not sure",
301        "could be",
302        "possibly",
303        "probably",
304        "uncertain",
305        "not certain",
306        "i'm not sure",
307        "im not sure",
308        "not confident",
309    ];
310    let lower = content.to_lowercase();
311    let matches = HEDGING_MARKERS
312        .iter()
313        .filter(|&&m| lower.contains(m))
314        .count();
315    // Each hedging marker reduces confidence by 0.1, min 0.2.
316    #[allow(clippy::cast_precision_loss)]
317    let penalty = (matches as f32) * 0.1;
318    (1.0 - penalty).max(0.2)
319}
320
321/// Prior score based on message role.
322///
323/// Tool results (role "tool") are treated as high-value since they contain factual data.
324/// The table is not symmetric to role importance — it's calibrated by typical content density.
325#[must_use]
326pub fn compute_content_type_prior(role: &str) -> f32 {
327    match role {
328        "user" => 0.7,
329        "assistant" => 0.6,
330        "tool" | "tool_result" => 0.8,
331        "system" => 0.3,
332        _ => 0.5,
333    }
334}
335
336/// Compute semantic novelty as `1.0 - max_cosine_similarity_to_top3_neighbors`.
337///
338/// Returns `1.0` when the memory is empty (everything is novel at cold start).
339#[tracing::instrument(name = "memory.admission.semantic_novelty", skip_all)]
340async fn compute_semantic_novelty(
341    content: &str,
342    provider: &AnyProvider,
343    qdrant: Option<&Arc<EmbeddingStore>>,
344) -> f32 {
345    let Some(store) = qdrant else {
346        return 1.0;
347    };
348    if !provider.supports_embeddings() {
349        return 1.0;
350    }
351    let vector = match tokio::time::timeout(Duration::from_secs(5), provider.embed(content)).await {
352        Ok(Ok(v)) => v,
353        Ok(Err(e)) => {
354            tracing::debug!(error = %e, "A-MAC: failed to embed for novelty, using 1.0");
355            return 1.0;
356        }
357        Err(_) => {
358            tracing::warn!("A-MAC: embed timed out in semantic_novelty, using 1.0");
359            return 1.0;
360        }
361    };
362    let Ok(vector_size) = u64::try_from(vector.len()) else {
363        return 1.0;
364    };
365    if let Err(e) = store.ensure_collection(vector_size).await {
366        tracing::debug!(error = %e, "A-MAC: collection not ready for novelty check");
367        return 1.0;
368    }
369    let results = match store.search(&vector, 3, None).await {
370        Ok(r) => r,
371        Err(e) => {
372            tracing::debug!(error = %e, "A-MAC: novelty search failed, using 1.0");
373            return 1.0;
374        }
375    };
376    let max_sim = results.iter().map(|r| r.score).fold(0.0f32, f32::max);
377    (1.0 - max_sim).max(0.0)
378}
379
380/// LLM-based future utility estimate.
381///
382/// On timeout or error, returns `0.5` (neutral — no bias toward admit or reject).
383#[tracing::instrument(name = "memory.admission.future_utility_llm", skip_all)]
384async fn compute_future_utility(content: &str, role: &str, provider: &AnyProvider) -> f32 {
385    use zeph_llm::provider::{Message, MessageMetadata, Role};
386
387    let system = "You are a memory relevance judge. Rate how likely this message will be \
388        referenced in future conversations on a scale of 0.0 to 1.0. \
389        Respond with ONLY a decimal number between 0.0 and 1.0, nothing else.";
390
391    let user = format!(
392        "Role: {role}\nContent: {}\n\nFuture utility score (0.0-1.0):",
393        content.chars().take(500).collect::<String>()
394    );
395
396    let messages = vec![
397        Message {
398            role: Role::System,
399            content: system.to_owned(),
400            parts: vec![],
401            metadata: MessageMetadata::default(),
402        },
403        Message {
404            role: Role::User,
405            content: user,
406            parts: vec![],
407            metadata: MessageMetadata::default(),
408        },
409    ];
410
411    let result = match tokio::time::timeout(Duration::from_secs(8), provider.chat(&messages)).await
412    {
413        Ok(Ok(r)) => r,
414        Ok(Err(e)) => {
415            tracing::debug!(error = %e, "A-MAC: future_utility LLM call failed, using 0.5");
416            return 0.5;
417        }
418        Err(_) => {
419            tracing::debug!("A-MAC: future_utility LLM timed out, using 0.5");
420            return 0.5;
421        }
422    };
423
424    result.trim().parse::<f32>().unwrap_or(0.5).clamp(0.0, 1.0)
425}
426
427/// Compute goal-conditioned utility for a candidate memory.
428///
429/// Embeds the goal text and candidate content, then returns cosine similarity.
430/// For borderline cases (similarity within 0.1 of threshold), optionally calls
431/// the LLM for refinement if a `goal_utility_provider` is configured.
432///
433/// Returns a soft-floored score: min similarity is 0.1 to avoid fully eliminating
434/// memories that are somewhat off-goal but otherwise high-value (W3.4 fix).
435///
436/// Returns `0.0` on embedding failure (safe admission without goal factor).
437async fn compute_goal_utility(
438    content: &str,
439    goal_text: &str,
440    gate: &GoalGateConfig,
441    provider: &AnyProvider,
442    qdrant: Option<&Arc<EmbeddingStore>>,
443) -> f32 {
444    use zeph_llm::provider::LlmProvider as _;
445
446    if !provider.supports_embeddings() {
447        return 0.0;
448    }
449
450    let goal_emb =
451        match tokio::time::timeout(Duration::from_secs(5), provider.embed(goal_text)).await {
452            Ok(Ok(v)) => v,
453            Ok(Err(e)) => {
454                tracing::debug!(error = %e, "goal_utility: failed to embed goal text, using 0.0");
455                return 0.0;
456            }
457            Err(_) => {
458                tracing::warn!("A-MAC: embed timed out in goal_utility (goal text), using 0.0");
459                return 0.0;
460            }
461        };
462    let content_emb =
463        match tokio::time::timeout(Duration::from_secs(5), provider.embed(content)).await {
464            Ok(Ok(v)) => v,
465            Ok(Err(e)) => {
466                tracing::debug!(error = %e, "goal_utility: failed to embed content, using 0.0");
467                return 0.0;
468            }
469            Err(_) => {
470                tracing::warn!("A-MAC: embed timed out in goal_utility (content), using 0.0");
471                return 0.0;
472            }
473        };
474
475    // Qdrant is used for novelty search, not for goal utility — we compute cosine directly.
476    let _ = qdrant; // unused here; kept for API symmetry
477
478    let similarity = cosine_similarity(&goal_emb, &content_emb);
479
480    // Borderline: call LLM for refinement when configured (W3.5: skipped when no provider).
481    let borderline_lo = gate.threshold - 0.1;
482    let borderline_hi = gate.threshold + 0.1;
483    let in_borderline = similarity >= borderline_lo && similarity <= borderline_hi;
484
485    let final_similarity = if in_borderline {
486        if let Some(ref goal_provider) = gate.provider {
487            refine_goal_utility_llm(content, goal_text, similarity, goal_provider).await
488        } else {
489            similarity
490        }
491    } else {
492        similarity
493    };
494
495    // Hard gate below threshold; soft floor of 0.1 above threshold (W3.4 fix).
496    if final_similarity < gate.threshold {
497        0.0
498    } else {
499        final_similarity.max(0.1)
500    }
501}
502
503/// LLM-based goal utility refinement for borderline cases.
504///
505/// Returns the original `embedding_sim` on failure (safe fallback).
506#[tracing::instrument(name = "memory.admission.goal_utility_refine_llm", skip_all)]
507async fn refine_goal_utility_llm(
508    content: &str,
509    goal_text: &str,
510    embedding_sim: f32,
511    provider: &AnyProvider,
512) -> f32 {
513    use zeph_llm::provider::{LlmProvider as _, Message, MessageMetadata, Role};
514
515    let system = "You are a memory relevance judge. Given a task goal and a candidate memory, \
516        rate how relevant the memory is to the goal on a scale of 0.0 to 1.0. \
517        Respond with ONLY a decimal number between 0.0 and 1.0, nothing else.";
518
519    let user = format!(
520        "Goal: {}\nMemory: {}\n\nRelevance score (0.0-1.0):",
521        goal_text.chars().take(200).collect::<String>(),
522        content.chars().take(300).collect::<String>(),
523    );
524
525    let messages = vec![
526        Message {
527            role: Role::System,
528            content: system.to_owned(),
529            parts: vec![],
530            metadata: MessageMetadata::default(),
531        },
532        Message {
533            role: Role::User,
534            content: user,
535            parts: vec![],
536            metadata: MessageMetadata::default(),
537        },
538    ];
539
540    let result = match tokio::time::timeout(Duration::from_secs(6), provider.chat(&messages)).await
541    {
542        Ok(Ok(r)) => r,
543        Ok(Err(e)) => {
544            tracing::debug!(error = %e, "goal_utility LLM refinement failed, using embedding sim");
545            return embedding_sim;
546        }
547        Err(_) => {
548            tracing::debug!("goal_utility LLM refinement timed out, using embedding sim");
549            return embedding_sim;
550        }
551    };
552
553    result
554        .trim()
555        .parse::<f32>()
556        .unwrap_or(embedding_sim)
557        .clamp(0.0, 1.0)
558}
559
560/// Log an admission decision to the audit log via `tracing`.
561///
562/// Rejections are always logged at debug level. Admissions are trace-level.
563pub fn log_admission_decision(
564    decision: &AdmissionDecision,
565    content_preview: &str,
566    role: &str,
567    threshold: f32,
568) {
569    if decision.admitted {
570        tracing::trace!(
571            role,
572            composite_score = decision.composite_score,
573            threshold,
574            content_preview,
575            "A-MAC: admitted"
576        );
577    } else {
578        tracing::debug!(
579            role,
580            composite_score = decision.composite_score,
581            threshold,
582            future_utility = decision.factors.future_utility,
583            factual_confidence = decision.factors.factual_confidence,
584            semantic_novelty = decision.factors.semantic_novelty,
585            content_type_prior = decision.factors.content_type_prior,
586            content_preview,
587            "A-MAC: rejected"
588        );
589    }
590}
591
592/// Error type for admission-rejected persists.
593#[derive(Debug)]
594pub struct AdmissionRejected {
595    pub composite_score: f32,
596    pub threshold: f32,
597}
598
599impl std::fmt::Display for AdmissionRejected {
600    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
601        write!(
602            f,
603            "A-MAC admission rejected (score={:.3} < threshold={:.3})",
604            self.composite_score, self.threshold
605        )
606    }
607}
608
609#[cfg(test)]
610mod tests {
611    use super::*;
612
613    #[test]
614    fn factual_confidence_no_hedging() {
615        assert!((compute_factual_confidence("The server uses TLS 1.3.") - 1.0).abs() < 0.01);
616    }
617
618    #[test]
619    fn factual_confidence_with_one_marker() {
620        let score = compute_factual_confidence("Maybe we should use TLS 1.3.");
621        assert!((score - 0.9).abs() < 0.01);
622    }
623
624    #[test]
625    fn factual_confidence_many_markers_floors_at_0_2() {
626        let content = "maybe i think perhaps possibly might not sure i believe";
627        let score = compute_factual_confidence(content);
628        assert!(score >= 0.2);
629        assert!(score < 0.5);
630    }
631
632    #[test]
633    fn content_type_prior_values() {
634        assert!((compute_content_type_prior("user") - 0.7).abs() < 0.01);
635        assert!((compute_content_type_prior("assistant") - 0.6).abs() < 0.01);
636        assert!((compute_content_type_prior("tool") - 0.8).abs() < 0.01);
637        assert!((compute_content_type_prior("system") - 0.3).abs() < 0.01);
638        assert!((compute_content_type_prior("unknown") - 0.5).abs() < 0.01);
639    }
640
641    #[test]
642    fn admission_control_admits_high_score() {
643        let weights = AdmissionWeights {
644            future_utility: 0.30,
645            factual_confidence: 0.15,
646            semantic_novelty: 0.30,
647            temporal_recency: 0.10,
648            content_type_prior: 0.15,
649            goal_utility: 0.0,
650        };
651        let ctrl = AdmissionControl::new(0.40, 0.15, weights);
652        // Score all factors at 1.0 → composite = 1.0.
653        let score = ctrl.weighted_score(1.0, 1.0, 1.0, 1.0, 1.0, 0.0);
654        assert!(score >= 0.99);
655        // Admitted when score >= threshold.
656        let admitted = score >= ctrl.threshold;
657        assert!(admitted);
658    }
659
660    #[test]
661    fn admission_control_rejects_low_score() {
662        let weights = AdmissionWeights {
663            future_utility: 0.30,
664            factual_confidence: 0.15,
665            semantic_novelty: 0.30,
666            temporal_recency: 0.10,
667            content_type_prior: 0.15,
668            goal_utility: 0.0,
669        };
670        let ctrl = AdmissionControl::new(0.40, 0.15, weights);
671        // Score all factors at 0.0 → composite = 0.0.
672        let score = ctrl.weighted_score(0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
673        assert!(score < ctrl.threshold);
674    }
675
676    // Test: fast-path score above threshold + margin bypasses slow-path (LLM call skipped).
677    // We verify the branch logic in weighted_score: if heuristic >= threshold + margin, admitted.
678    #[test]
679    fn fast_path_admits_when_heuristic_above_threshold_plus_margin() {
680        let weights = AdmissionWeights {
681            future_utility: 0.20,
682            factual_confidence: 0.20,
683            semantic_novelty: 0.20,
684            temporal_recency: 0.20,
685            content_type_prior: 0.20,
686            goal_utility: 0.0,
687        };
688        let threshold = 0.40f32;
689        let margin = 0.15f32;
690        let ctrl = AdmissionControl::new(threshold, margin, weights);
691
692        // All non-future_utility factors at 1.0; future_utility treated as 0.5 (fast path neutral).
693        let heuristic = ctrl.weighted_score(0.5, 1.0, 1.0, 1.0, 1.0, 0.0);
694        // heuristic = 0.5*0.2 + 1.0*0.2 + 1.0*0.2 + 1.0*0.2 + 1.0*0.2 = 0.1 + 0.8 = 0.9
695        assert!(
696            heuristic >= threshold + margin,
697            "heuristic {heuristic} must exceed threshold+margin {}",
698            threshold + margin
699        );
700        // In evaluate(), admitted = composite >= threshold || heuristic >= threshold + margin.
701        let admitted = heuristic >= threshold + margin;
702        assert!(admitted, "fast path must admit without LLM call");
703    }
704
705    // Test: slow-path engages when heuristic is below threshold + margin.
706    #[test]
707    fn slow_path_required_when_heuristic_below_threshold_plus_margin() {
708        let weights = AdmissionWeights {
709            future_utility: 0.40,
710            factual_confidence: 0.15,
711            semantic_novelty: 0.15,
712            temporal_recency: 0.15,
713            content_type_prior: 0.15,
714            goal_utility: 0.0,
715        };
716        let threshold = 0.50f32;
717        let margin = 0.20f32;
718        let ctrl = AdmissionControl::new(threshold, margin, weights);
719
720        // All factors low — heuristic will be below threshold + margin.
721        let heuristic = ctrl.weighted_score(0.5, 0.3, 0.3, 0.3, 0.3, 0.0);
722        assert!(
723            heuristic < threshold + margin,
724            "heuristic {heuristic} must be below threshold+margin {}",
725            threshold + margin
726        );
727    }
728
729    // Test: log_admission_decision runs without panic for both admitted and rejected.
730    #[test]
731    fn log_admission_decision_does_not_panic() {
732        let admitted_decision = AdmissionDecision {
733            admitted: true,
734            composite_score: 0.75,
735            factors: AdmissionFactors {
736                future_utility: 0.8,
737                factual_confidence: 0.9,
738                semantic_novelty: 0.7,
739                temporal_recency: 1.0,
740                content_type_prior: 0.7,
741                goal_utility: 0.0,
742            },
743        };
744        log_admission_decision(&admitted_decision, "preview text", "user", 0.40);
745
746        let rejected_decision = AdmissionDecision {
747            admitted: false,
748            composite_score: 0.20,
749            factors: AdmissionFactors {
750                future_utility: 0.1,
751                factual_confidence: 0.2,
752                semantic_novelty: 0.3,
753                temporal_recency: 1.0,
754                content_type_prior: 0.3,
755                goal_utility: 0.0,
756            },
757        };
758        log_admission_decision(&rejected_decision, "maybe short content", "assistant", 0.40);
759    }
760
761    // Test: AdmissionRejected Display formats correctly.
762    #[test]
763    fn admission_rejected_display() {
764        let err = AdmissionRejected {
765            composite_score: 0.25,
766            threshold: 0.45,
767        };
768        let msg = format!("{err}");
769        assert!(msg.contains("0.250"));
770        assert!(msg.contains("0.450"));
771    }
772
773    // Test: threshold() accessor returns the configured value.
774    #[test]
775    fn threshold_accessor() {
776        let weights = AdmissionWeights {
777            future_utility: 0.20,
778            factual_confidence: 0.20,
779            semantic_novelty: 0.20,
780            temporal_recency: 0.20,
781            content_type_prior: 0.20,
782            goal_utility: 0.0,
783        };
784        let ctrl = AdmissionControl::new(0.55, 0.10, weights);
785        assert!((ctrl.threshold() - 0.55).abs() < 0.001);
786    }
787
788    // Test: content_type_prior for "tool_result" alias.
789    #[test]
790    fn content_type_prior_tool_result_alias() {
791        assert!((compute_content_type_prior("tool_result") - 0.8).abs() < 0.01);
792    }
793
794    // ── cosine_similarity tests ───────────────────────────────────────────────
795
796    #[test]
797    fn cosine_similarity_identical_vectors() {
798        let v = vec![1.0f32, 0.0, 0.0];
799        assert!((cosine_similarity(&v, &v) - 1.0).abs() < 1e-6);
800    }
801
802    #[test]
803    fn cosine_similarity_orthogonal_vectors() {
804        let a = vec![1.0f32, 0.0];
805        let b = vec![0.0f32, 1.0];
806        assert!(cosine_similarity(&a, &b).abs() < 1e-6);
807    }
808
809    #[test]
810    fn cosine_similarity_zero_vector_returns_zero() {
811        let z = vec![0.0f32, 0.0, 0.0];
812        let v = vec![1.0f32, 2.0, 3.0];
813        assert!(cosine_similarity(&z, &v).abs() < f32::EPSILON);
814    }
815
816    #[test]
817    fn cosine_similarity_length_mismatch_returns_zero() {
818        let a = vec![1.0f32, 0.0];
819        let b = vec![1.0f32, 0.0, 0.0];
820        assert!(cosine_similarity(&a, &b).abs() < f32::EPSILON);
821    }
822
823    // ── with_goal_gate tests ──────────────────────────────────────────────────
824
825    #[test]
826    fn with_goal_gate_sets_goal_utility_weight() {
827        let weights = AdmissionWeights {
828            future_utility: 0.30,
829            factual_confidence: 0.15,
830            semantic_novelty: 0.30,
831            temporal_recency: 0.10,
832            content_type_prior: 0.15,
833            goal_utility: 0.0,
834        };
835        let ctrl = AdmissionControl::new(0.40, 0.15, weights);
836        let config = GoalGateConfig {
837            weight: 0.20,
838            threshold: 0.5,
839            provider: None,
840        };
841        let ctrl = ctrl.with_goal_gate(config);
842        assert!(
843            ctrl.weights.goal_utility > 0.0,
844            "goal_utility must be nonzero after with_goal_gate"
845        );
846        // Normalized weights must sum to ~1.0.
847        let w = &ctrl.weights;
848        let total = w.future_utility
849            + w.factual_confidence
850            + w.semantic_novelty
851            + w.temporal_recency
852            + w.content_type_prior
853            + w.goal_utility;
854        assert!(
855            (total - 1.0).abs() < 0.01,
856            "normalized weights must sum to 1.0, got {total}"
857        );
858    }
859
860    #[test]
861    fn with_goal_gate_zero_weight_leaves_goal_utility_at_zero() {
862        let weights = AdmissionWeights {
863            future_utility: 0.30,
864            factual_confidence: 0.15,
865            semantic_novelty: 0.30,
866            temporal_recency: 0.10,
867            content_type_prior: 0.15,
868            goal_utility: 0.0,
869        };
870        let ctrl = AdmissionControl::new(0.40, 0.15, weights);
871        let config = GoalGateConfig {
872            weight: 0.0,
873            threshold: 0.5,
874            provider: None,
875        };
876        let ctrl = ctrl.with_goal_gate(config);
877        assert!(ctrl.weights.goal_utility.abs() < f32::EPSILON);
878    }
879
880    // ── timeout regression tests (#4212) ─────────────────────────────────────
881
882    #[tokio::test]
883    async fn compute_semantic_novelty_returns_one_on_embed_timeout() {
884        tokio::time::pause();
885        let mock = zeph_llm::mock::MockProvider::default()
886            .with_embed_delay(10_000)
887            .with_embedding(vec![0.0; 4]);
888        let provider = zeph_llm::any::AnyProvider::Mock(mock);
889        let handle =
890            tokio::spawn(async move { compute_semantic_novelty("hello", &provider, None).await });
891        tokio::time::advance(std::time::Duration::from_secs(6)).await;
892        let result = handle.await.expect("task panicked");
893        assert!(
894            (result - 1.0).abs() < f32::EPSILON,
895            "expected 1.0 on embed timeout, got {result}"
896        );
897    }
898
899    #[tokio::test]
900    async fn compute_goal_utility_returns_zero_on_embed_timeout() {
901        tokio::time::pause();
902        let mock = zeph_llm::mock::MockProvider::default()
903            .with_embed_delay(10_000)
904            .with_embedding(vec![0.0; 4]);
905        let provider = zeph_llm::any::AnyProvider::Mock(mock);
906        let gate = GoalGateConfig {
907            weight: 0.5,
908            threshold: 0.5,
909            provider: None,
910        };
911        let handle = tokio::spawn(async move {
912            compute_goal_utility("content", "goal", &gate, &provider, None).await
913        });
914        tokio::time::advance(std::time::Duration::from_secs(6)).await;
915        let result = handle.await.expect("task panicked");
916        assert!(
917            result.abs() < f32::EPSILON,
918            "expected 0.0 on embed timeout, got {result}"
919        );
920    }
921}