Skip to main content

zeph_memory/
admission.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! A-MAC adaptive memory admission control (#2317).
5//!
6//! Write-time gate inserted before `SQLite` persistence in `remember()` and `remember_with_parts()`.
7//! Evaluates 5 factors and rejects messages below the configured threshold.
8
9use std::sync::Arc;
10use std::time::Duration;
11
12use zeph_llm::any::AnyProvider;
13use zeph_llm::provider::LlmProvider as _;
14
15use crate::embedding_store::EmbeddingStore;
16use zeph_common::math::cosine_similarity;
17
18/// Per-factor scores for the admission decision.
19#[derive(Debug, Clone, serde::Serialize)]
20pub struct AdmissionFactors {
21    /// LLM-estimated reuse probability. `[0, 1]`. Set to 0.5 on fast path or LLM failure.
22    pub future_utility: f32,
23    /// Inverse hedging heuristic: high confidence → high score. `[0, 1]`.
24    pub factual_confidence: f32,
25    /// `1.0 - max_similarity_to_top3_neighbors`. `[0, 1]`. 1.0 when memory is empty.
26    pub semantic_novelty: f32,
27    /// Always `1.0` at write time (decay applied at recall). `[0, 1]`.
28    pub temporal_recency: f32,
29    /// Prior based on message role. `[0, 1]`.
30    pub content_type_prior: f32,
31    /// Goal-conditioned utility (#2408). Cosine similarity between goal embedding and
32    /// candidate memory. `0.0` when `goal_conditioned_write = false` or goal text is absent/trivial.
33    pub goal_utility: f32,
34}
35
36/// Result of an admission evaluation.
37#[derive(Debug, Clone)]
38pub struct AdmissionDecision {
39    pub admitted: bool,
40    pub composite_score: f32,
41    pub factors: AdmissionFactors,
42}
43
44/// Normalized weights for the composite score.
45#[derive(Debug, Clone, Copy)]
46pub struct AdmissionWeights {
47    pub future_utility: f32,
48    pub factual_confidence: f32,
49    pub semantic_novelty: f32,
50    pub temporal_recency: f32,
51    pub content_type_prior: f32,
52    /// Goal-conditioned utility weight. `0.0` when goal gate is disabled.
53    pub goal_utility: f32,
54}
55
56impl AdmissionWeights {
57    /// Return a copy with all fields clamped to `>= 0.0` and normalized so they sum to `1.0`.
58    ///
59    /// Falls back to equal weights when the sum is effectively zero (all fields were zero/negative).
60    #[must_use]
61    pub fn normalized(&self) -> Self {
62        let fu = self.future_utility.max(0.0);
63        let fc = self.factual_confidence.max(0.0);
64        let sn = self.semantic_novelty.max(0.0);
65        let tr = self.temporal_recency.max(0.0);
66        let cp = self.content_type_prior.max(0.0);
67        let gu = self.goal_utility.max(0.0);
68        let sum = fu + fc + sn + tr + cp + gu;
69        if sum <= f32::EPSILON {
70            // Equal fallback weights (6 factors when goal gate is enabled).
71            return Self {
72                future_utility: 0.2,
73                factual_confidence: 0.2,
74                semantic_novelty: 0.2,
75                temporal_recency: 0.2,
76                content_type_prior: 0.2,
77                goal_utility: 0.0,
78            };
79        }
80        Self {
81            future_utility: fu / sum,
82            factual_confidence: fc / sum,
83            semantic_novelty: sn / sum,
84            temporal_recency: tr / sum,
85            content_type_prior: cp / sum,
86            goal_utility: gu / sum,
87        }
88    }
89}
90
91/// Goal-conditioned write gate configuration for `AdmissionControl`.
92#[derive(Debug, Clone)]
93pub struct GoalGateConfig {
94    /// Minimum cosine similarity to consider memory goal-relevant.
95    pub threshold: f32,
96    /// LLM provider for borderline refinement (similarity within 0.1 of threshold).
97    pub provider: Option<AnyProvider>,
98    /// Weight of the `goal_utility` factor in the composite score.
99    pub weight: f32,
100}
101
102/// A-MAC adaptive memory admission controller (#2317).
103///
104/// Evaluates five factors (future utility, factual confidence, semantic novelty,
105/// temporal recency, content-type prior) and rejects messages below the configured
106/// composite score threshold before they are persisted.
107///
108/// Optionally extended with a goal-conditioned write gate (#2408) that adds a
109/// sixth factor based on the cosine similarity between the current goal embedding
110/// and the candidate memory.
111///
112/// # Examples
113///
114/// ```rust,no_run
115/// use zeph_memory::{AdmissionControl, AdmissionWeights};
116///
117/// let weights = AdmissionWeights {
118///     future_utility: 0.3,
119///     factual_confidence: 0.2,
120///     semantic_novelty: 0.2,
121///     temporal_recency: 0.1,
122///     content_type_prior: 0.2,
123///     goal_utility: 0.0,
124/// };
125/// let controller = AdmissionControl::new(0.4, 0.1, weights);
126/// ```
127pub struct AdmissionControl {
128    threshold: f32,
129    fast_path_margin: f32,
130    weights: AdmissionWeights,
131    /// Dedicated provider for LLM-based evaluation. Falls back to the caller-supplied provider
132    /// when `None` (e.g. in tests or when `admission_provider` is not configured).
133    provider: Option<AnyProvider>,
134    /// Goal-conditioned write gate. `None` when `goal_conditioned_write = false`.
135    goal_gate: Option<GoalGateConfig>,
136    /// Per-call timeout for every `embed()` invocation. Default: 5 s.
137    embed_timeout: Duration,
138}
139
140impl AdmissionControl {
141    /// Create a new admission controller.
142    ///
143    /// - `threshold` — composite score `[0, 1]` below which messages are rejected.
144    /// - `fast_path_margin` — when all non-LLM factors already push the score far above
145    ///   the threshold (by at least this margin), the LLM `future_utility` call is skipped.
146    /// - `weights` — factor weights; normalized automatically so they sum to `1.0`.
147    #[must_use]
148    pub fn new(threshold: f32, fast_path_margin: f32, weights: AdmissionWeights) -> Self {
149        Self {
150            threshold,
151            fast_path_margin,
152            weights: weights.normalized(),
153            provider: None,
154            goal_gate: None,
155            embed_timeout: Duration::from_secs(5),
156        }
157    }
158
159    /// Set the per-call timeout for every `embed()` invocation.
160    ///
161    /// Default: 5 s. Must be non-zero; the minimum effective value is 1 s.
162    #[must_use]
163    pub fn with_embed_timeout(mut self, timeout_secs: u64) -> Self {
164        self.embed_timeout = Duration::from_secs(timeout_secs.max(1));
165        self
166    }
167
168    /// Attach a dedicated LLM provider for `future_utility` evaluation.
169    ///
170    /// When set, this provider is used instead of the caller-supplied fallback.
171    #[must_use]
172    pub fn with_provider(mut self, provider: AnyProvider) -> Self {
173        self.provider = Some(provider);
174        self
175    }
176
177    /// Enable goal-conditioned write gate (#2408).
178    #[must_use]
179    pub fn with_goal_gate(mut self, config: GoalGateConfig) -> Self {
180        // Redistribute goal_utility weight from future_utility.
181        let gu = config.weight.clamp(0.0, 1.0);
182        let mut weights = self.weights;
183        weights.goal_utility = gu;
184        // Reduce future_utility by the same amount (soft redistribution).
185        weights.future_utility = (weights.future_utility - gu).max(0.0);
186        self.weights = weights.normalized();
187        self.goal_gate = Some(config);
188        self
189    }
190
191    /// Return the configured admission threshold.
192    #[must_use]
193    pub fn threshold(&self) -> f32 {
194        self.threshold
195    }
196
197    /// Evaluate admission for a message.
198    ///
199    /// `goal_text`: optional current-turn goal context for goal-conditioned scoring.
200    /// Ignored when the goal gate is disabled or `goal_text` is `None`/trivial (< 10 chars).
201    ///
202    /// Fast path: skips LLM when heuristic-only score is already above `threshold + fast_path_margin`.
203    /// Slow path: calls LLM for `future_utility` when borderline.
204    ///
205    /// On LLM failure, `future_utility` defaults to `0.5` (neutral).
206    #[cfg_attr(
207        feature = "profiling",
208        tracing::instrument(name = "memory.admission", skip_all)
209    )]
210    pub async fn evaluate(
211        &self,
212        content: &str,
213        role: &str,
214        fallback_provider: &AnyProvider,
215        qdrant: Option<&Arc<EmbeddingStore>>,
216        goal_text: Option<&str>,
217    ) -> AdmissionDecision {
218        let effective_provider = self.provider.as_ref().unwrap_or(fallback_provider);
219        let factual_confidence = compute_factual_confidence(content);
220        let temporal_recency = 1.0f32;
221        let content_type_prior = compute_content_type_prior(role);
222
223        // Semantic novelty requires an async embedding search.
224        let semantic_novelty =
225            compute_semantic_novelty(content, effective_provider, qdrant, self.embed_timeout).await;
226
227        // Goal-conditioned utility (W3.1 fix: skip trivial goal text < 10 chars).
228        let goal_utility = match &self.goal_gate {
229            Some(gate) => {
230                let effective_goal = goal_text.filter(|t| t.trim().len() >= 10);
231                if let Some(goal) = effective_goal {
232                    compute_goal_utility(
233                        content,
234                        goal,
235                        gate,
236                        effective_provider,
237                        qdrant,
238                        self.embed_timeout,
239                    )
240                    .await
241                } else {
242                    0.0
243                }
244            }
245            None => 0.0,
246        };
247
248        // Heuristic-only composite (future_utility treated as 0.5 neutral placeholder).
249        let heuristic_score = self.weighted_score(
250            0.5,
251            factual_confidence,
252            semantic_novelty,
253            temporal_recency,
254            content_type_prior,
255            goal_utility,
256        );
257
258        // Fast path: admit without LLM if score is clearly above threshold + margin.
259        let future_utility = if heuristic_score >= self.threshold + self.fast_path_margin {
260            0.5 // not used in final score since we admit early, but kept for audit
261        } else {
262            compute_future_utility(content, role, effective_provider).await
263        };
264
265        let composite_score = self.weighted_score(
266            future_utility,
267            factual_confidence,
268            semantic_novelty,
269            temporal_recency,
270            content_type_prior,
271            goal_utility,
272        );
273
274        let admitted = composite_score >= self.threshold
275            || heuristic_score >= self.threshold + self.fast_path_margin;
276
277        AdmissionDecision {
278            admitted,
279            composite_score,
280            factors: AdmissionFactors {
281                future_utility,
282                factual_confidence,
283                semantic_novelty,
284                temporal_recency,
285                content_type_prior,
286                goal_utility,
287            },
288        }
289    }
290
291    fn weighted_score(
292        &self,
293        future_utility: f32,
294        factual_confidence: f32,
295        semantic_novelty: f32,
296        temporal_recency: f32,
297        content_type_prior: f32,
298        goal_utility: f32,
299    ) -> f32 {
300        future_utility * self.weights.future_utility
301            + factual_confidence * self.weights.factual_confidence
302            + semantic_novelty * self.weights.semantic_novelty
303            + temporal_recency * self.weights.temporal_recency
304            + content_type_prior * self.weights.content_type_prior
305            + goal_utility * self.weights.goal_utility
306    }
307}
308
309/// Heuristic: detect hedging markers and compute confidence score.
310///
311/// Returns `1.0` for confident content, lower for content with hedging language.
312#[must_use]
313pub fn compute_factual_confidence(content: &str) -> f32 {
314    // Common English hedging markers. Content in other languages scores 1.0 (no penalty).
315    const HEDGING_MARKERS: &[&str] = &[
316        "maybe",
317        "might",
318        "perhaps",
319        "i think",
320        "i believe",
321        "not sure",
322        "could be",
323        "possibly",
324        "probably",
325        "uncertain",
326        "not certain",
327        "i'm not sure",
328        "im not sure",
329        "not confident",
330    ];
331    let lower = content.to_lowercase();
332    let matches = HEDGING_MARKERS
333        .iter()
334        .filter(|&&m| lower.contains(m))
335        .count();
336    // Each hedging marker reduces confidence by 0.1, min 0.2.
337    #[allow(clippy::cast_precision_loss)]
338    let penalty = (matches as f32) * 0.1;
339    (1.0 - penalty).max(0.2)
340}
341
342/// Prior score based on message role.
343///
344/// Tool results (role "tool") are treated as high-value since they contain factual data.
345/// The table is not symmetric to role importance — it's calibrated by typical content density.
346#[must_use]
347pub fn compute_content_type_prior(role: &str) -> f32 {
348    match role {
349        "user" => 0.7,
350        "assistant" => 0.6,
351        "tool" | "tool_result" => 0.8,
352        "system" => 0.3,
353        _ => 0.5,
354    }
355}
356
357/// Compute semantic novelty as `1.0 - max_cosine_similarity_to_top3_neighbors`.
358///
359/// Returns `1.0` when the memory is empty (everything is novel at cold start).
360#[tracing::instrument(name = "memory.admission.semantic_novelty", skip_all)]
361async fn compute_semantic_novelty(
362    content: &str,
363    provider: &AnyProvider,
364    qdrant: Option<&Arc<EmbeddingStore>>,
365    embed_timeout: Duration,
366) -> f32 {
367    let Some(store) = qdrant else {
368        return 1.0;
369    };
370    if !provider.supports_embeddings() {
371        return 1.0;
372    }
373    let vector = match tokio::time::timeout(embed_timeout, provider.embed(content)).await {
374        Ok(Ok(v)) => v,
375        Ok(Err(e)) => {
376            tracing::debug!(error = %e, "A-MAC: failed to embed for novelty, using 1.0");
377            return 1.0;
378        }
379        Err(_) => {
380            tracing::warn!("A-MAC: embed timed out in semantic_novelty, using 1.0");
381            return 1.0;
382        }
383    };
384    let Ok(vector_size) = u64::try_from(vector.len()) else {
385        return 1.0;
386    };
387    if let Err(e) = store.ensure_collection(vector_size).await {
388        tracing::debug!(error = %e, "A-MAC: collection not ready for novelty check");
389        return 1.0;
390    }
391    let results = match store.search(&vector, 3, None).await {
392        Ok(r) => r,
393        Err(e) => {
394            tracing::debug!(error = %e, "A-MAC: novelty search failed, using 1.0");
395            return 1.0;
396        }
397    };
398    let max_sim = results.iter().map(|r| r.score).fold(0.0f32, f32::max);
399    (1.0 - max_sim).max(0.0)
400}
401
402/// LLM-based future utility estimate.
403///
404/// On timeout or error, returns `0.5` (neutral — no bias toward admit or reject).
405#[tracing::instrument(name = "memory.admission.future_utility_llm", skip_all)]
406async fn compute_future_utility(content: &str, role: &str, provider: &AnyProvider) -> f32 {
407    use zeph_llm::provider::{Message, MessageMetadata, Role};
408
409    let system = "You are a memory relevance judge. Rate how likely this message will be \
410        referenced in future conversations on a scale of 0.0 to 1.0. \
411        Respond with ONLY a decimal number between 0.0 and 1.0, nothing else.";
412
413    let user = format!(
414        "Role: {role}\nContent: {}\n\nFuture utility score (0.0-1.0):",
415        content.chars().take(500).collect::<String>()
416    );
417
418    let messages = vec![
419        Message {
420            role: Role::System,
421            content: system.to_owned(),
422            parts: vec![],
423            metadata: MessageMetadata::default(),
424        },
425        Message {
426            role: Role::User,
427            content: user,
428            parts: vec![],
429            metadata: MessageMetadata::default(),
430        },
431    ];
432
433    let result = match tokio::time::timeout(Duration::from_secs(8), provider.chat(&messages)).await
434    {
435        Ok(Ok(r)) => r,
436        Ok(Err(e)) => {
437            tracing::debug!(error = %e, "A-MAC: future_utility LLM call failed, using 0.5");
438            return 0.5;
439        }
440        Err(_) => {
441            tracing::debug!("A-MAC: future_utility LLM timed out, using 0.5");
442            return 0.5;
443        }
444    };
445
446    result.trim().parse::<f32>().unwrap_or(0.5).clamp(0.0, 1.0)
447}
448
449/// Compute goal-conditioned utility for a candidate memory.
450///
451/// Embeds the goal text and candidate content, then returns cosine similarity.
452/// For borderline cases (similarity within 0.1 of threshold), optionally calls
453/// the LLM for refinement if a `goal_utility_provider` is configured.
454///
455/// Returns a soft-floored score: min similarity is 0.1 to avoid fully eliminating
456/// memories that are somewhat off-goal but otherwise high-value (W3.4 fix).
457///
458/// Returns `0.0` on embedding failure (safe admission without goal factor).
459async fn compute_goal_utility(
460    content: &str,
461    goal_text: &str,
462    gate: &GoalGateConfig,
463    provider: &AnyProvider,
464    qdrant: Option<&Arc<EmbeddingStore>>,
465    embed_timeout: Duration,
466) -> f32 {
467    use zeph_llm::provider::LlmProvider as _;
468
469    if !provider.supports_embeddings() {
470        return 0.0;
471    }
472
473    let goal_emb = match tokio::time::timeout(embed_timeout, provider.embed(goal_text)).await {
474        Ok(Ok(v)) => v,
475        Ok(Err(e)) => {
476            tracing::debug!(error = %e, "goal_utility: failed to embed goal text, using 0.0");
477            return 0.0;
478        }
479        Err(_) => {
480            tracing::warn!("A-MAC: embed timed out in goal_utility (goal text), using 0.0");
481            return 0.0;
482        }
483    };
484    let content_emb = match tokio::time::timeout(embed_timeout, provider.embed(content)).await {
485        Ok(Ok(v)) => v,
486        Ok(Err(e)) => {
487            tracing::debug!(error = %e, "goal_utility: failed to embed content, using 0.0");
488            return 0.0;
489        }
490        Err(_) => {
491            tracing::warn!("A-MAC: embed timed out in goal_utility (content), using 0.0");
492            return 0.0;
493        }
494    };
495
496    // Qdrant is used for novelty search, not for goal utility — we compute cosine directly.
497    let _ = qdrant; // unused here; kept for API symmetry
498
499    let similarity = cosine_similarity(&goal_emb, &content_emb);
500
501    // Borderline: call LLM for refinement when configured (W3.5: skipped when no provider).
502    let borderline_lo = gate.threshold - 0.1;
503    let borderline_hi = gate.threshold + 0.1;
504    let in_borderline = similarity >= borderline_lo && similarity <= borderline_hi;
505
506    let final_similarity = if in_borderline {
507        if let Some(ref goal_provider) = gate.provider {
508            refine_goal_utility_llm(content, goal_text, similarity, goal_provider).await
509        } else {
510            similarity
511        }
512    } else {
513        similarity
514    };
515
516    // Hard gate below threshold; soft floor of 0.1 above threshold (W3.4 fix).
517    if final_similarity < gate.threshold {
518        0.0
519    } else {
520        final_similarity.max(0.1)
521    }
522}
523
524/// LLM-based goal utility refinement for borderline cases.
525///
526/// Returns the original `embedding_sim` on failure (safe fallback).
527#[tracing::instrument(name = "memory.admission.goal_utility_refine_llm", skip_all)]
528async fn refine_goal_utility_llm(
529    content: &str,
530    goal_text: &str,
531    embedding_sim: f32,
532    provider: &AnyProvider,
533) -> f32 {
534    use zeph_llm::provider::{LlmProvider as _, Message, MessageMetadata, Role};
535
536    let system = "You are a memory relevance judge. Given a task goal and a candidate memory, \
537        rate how relevant the memory is to the goal on a scale of 0.0 to 1.0. \
538        Respond with ONLY a decimal number between 0.0 and 1.0, nothing else.";
539
540    let user = format!(
541        "Goal: {}\nMemory: {}\n\nRelevance score (0.0-1.0):",
542        goal_text.chars().take(200).collect::<String>(),
543        content.chars().take(300).collect::<String>(),
544    );
545
546    let messages = vec![
547        Message {
548            role: Role::System,
549            content: system.to_owned(),
550            parts: vec![],
551            metadata: MessageMetadata::default(),
552        },
553        Message {
554            role: Role::User,
555            content: user,
556            parts: vec![],
557            metadata: MessageMetadata::default(),
558        },
559    ];
560
561    let result = match tokio::time::timeout(Duration::from_secs(6), provider.chat(&messages)).await
562    {
563        Ok(Ok(r)) => r,
564        Ok(Err(e)) => {
565            tracing::debug!(error = %e, "goal_utility LLM refinement failed, using embedding sim");
566            return embedding_sim;
567        }
568        Err(_) => {
569            tracing::debug!("goal_utility LLM refinement timed out, using embedding sim");
570            return embedding_sim;
571        }
572    };
573
574    result
575        .trim()
576        .parse::<f32>()
577        .unwrap_or(embedding_sim)
578        .clamp(0.0, 1.0)
579}
580
581/// Log an admission decision to the audit log via `tracing`.
582///
583/// Rejections are always logged at debug level. Admissions are trace-level.
584pub fn log_admission_decision(
585    decision: &AdmissionDecision,
586    content_preview: &str,
587    role: &str,
588    threshold: f32,
589) {
590    if decision.admitted {
591        tracing::trace!(
592            role,
593            composite_score = decision.composite_score,
594            threshold,
595            content_preview,
596            "A-MAC: admitted"
597        );
598    } else {
599        tracing::debug!(
600            role,
601            composite_score = decision.composite_score,
602            threshold,
603            future_utility = decision.factors.future_utility,
604            factual_confidence = decision.factors.factual_confidence,
605            semantic_novelty = decision.factors.semantic_novelty,
606            content_type_prior = decision.factors.content_type_prior,
607            content_preview,
608            "A-MAC: rejected"
609        );
610    }
611}
612
613/// Error type for admission-rejected persists.
614#[derive(Debug)]
615pub struct AdmissionRejected {
616    pub composite_score: f32,
617    pub threshold: f32,
618}
619
620impl std::fmt::Display for AdmissionRejected {
621    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
622        write!(
623            f,
624            "A-MAC admission rejected (score={:.3} < threshold={:.3})",
625            self.composite_score, self.threshold
626        )
627    }
628}
629
630#[cfg(test)]
631mod tests {
632    use super::*;
633
634    #[test]
635    fn factual_confidence_no_hedging() {
636        assert!((compute_factual_confidence("The server uses TLS 1.3.") - 1.0).abs() < 0.01);
637    }
638
639    #[test]
640    fn factual_confidence_with_one_marker() {
641        let score = compute_factual_confidence("Maybe we should use TLS 1.3.");
642        assert!((score - 0.9).abs() < 0.01);
643    }
644
645    #[test]
646    fn factual_confidence_many_markers_floors_at_0_2() {
647        let content = "maybe i think perhaps possibly might not sure i believe";
648        let score = compute_factual_confidence(content);
649        assert!(score >= 0.2);
650        assert!(score < 0.5);
651    }
652
653    #[test]
654    fn content_type_prior_values() {
655        assert!((compute_content_type_prior("user") - 0.7).abs() < 0.01);
656        assert!((compute_content_type_prior("assistant") - 0.6).abs() < 0.01);
657        assert!((compute_content_type_prior("tool") - 0.8).abs() < 0.01);
658        assert!((compute_content_type_prior("system") - 0.3).abs() < 0.01);
659        assert!((compute_content_type_prior("unknown") - 0.5).abs() < 0.01);
660    }
661
662    #[test]
663    fn admission_control_admits_high_score() {
664        let weights = AdmissionWeights {
665            future_utility: 0.30,
666            factual_confidence: 0.15,
667            semantic_novelty: 0.30,
668            temporal_recency: 0.10,
669            content_type_prior: 0.15,
670            goal_utility: 0.0,
671        };
672        let ctrl = AdmissionControl::new(0.40, 0.15, weights);
673        // Score all factors at 1.0 → composite = 1.0.
674        let score = ctrl.weighted_score(1.0, 1.0, 1.0, 1.0, 1.0, 0.0);
675        assert!(score >= 0.99);
676        // Admitted when score >= threshold.
677        let admitted = score >= ctrl.threshold;
678        assert!(admitted);
679    }
680
681    #[test]
682    fn admission_control_rejects_low_score() {
683        let weights = AdmissionWeights {
684            future_utility: 0.30,
685            factual_confidence: 0.15,
686            semantic_novelty: 0.30,
687            temporal_recency: 0.10,
688            content_type_prior: 0.15,
689            goal_utility: 0.0,
690        };
691        let ctrl = AdmissionControl::new(0.40, 0.15, weights);
692        // Score all factors at 0.0 → composite = 0.0.
693        let score = ctrl.weighted_score(0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
694        assert!(score < ctrl.threshold);
695    }
696
697    // Test: fast-path score above threshold + margin bypasses slow-path (LLM call skipped).
698    // We verify the branch logic in weighted_score: if heuristic >= threshold + margin, admitted.
699    #[test]
700    fn fast_path_admits_when_heuristic_above_threshold_plus_margin() {
701        let weights = AdmissionWeights {
702            future_utility: 0.20,
703            factual_confidence: 0.20,
704            semantic_novelty: 0.20,
705            temporal_recency: 0.20,
706            content_type_prior: 0.20,
707            goal_utility: 0.0,
708        };
709        let threshold = 0.40f32;
710        let margin = 0.15f32;
711        let ctrl = AdmissionControl::new(threshold, margin, weights);
712
713        // All non-future_utility factors at 1.0; future_utility treated as 0.5 (fast path neutral).
714        let heuristic = ctrl.weighted_score(0.5, 1.0, 1.0, 1.0, 1.0, 0.0);
715        // heuristic = 0.5*0.2 + 1.0*0.2 + 1.0*0.2 + 1.0*0.2 + 1.0*0.2 = 0.1 + 0.8 = 0.9
716        assert!(
717            heuristic >= threshold + margin,
718            "heuristic {heuristic} must exceed threshold+margin {}",
719            threshold + margin
720        );
721        // In evaluate(), admitted = composite >= threshold || heuristic >= threshold + margin.
722        let admitted = heuristic >= threshold + margin;
723        assert!(admitted, "fast path must admit without LLM call");
724    }
725
726    // Test: slow-path engages when heuristic is below threshold + margin.
727    #[test]
728    fn slow_path_required_when_heuristic_below_threshold_plus_margin() {
729        let weights = AdmissionWeights {
730            future_utility: 0.40,
731            factual_confidence: 0.15,
732            semantic_novelty: 0.15,
733            temporal_recency: 0.15,
734            content_type_prior: 0.15,
735            goal_utility: 0.0,
736        };
737        let threshold = 0.50f32;
738        let margin = 0.20f32;
739        let ctrl = AdmissionControl::new(threshold, margin, weights);
740
741        // All factors low — heuristic will be below threshold + margin.
742        let heuristic = ctrl.weighted_score(0.5, 0.3, 0.3, 0.3, 0.3, 0.0);
743        assert!(
744            heuristic < threshold + margin,
745            "heuristic {heuristic} must be below threshold+margin {}",
746            threshold + margin
747        );
748    }
749
750    // Test: log_admission_decision runs without panic for both admitted and rejected.
751    #[test]
752    fn log_admission_decision_does_not_panic() {
753        let admitted_decision = AdmissionDecision {
754            admitted: true,
755            composite_score: 0.75,
756            factors: AdmissionFactors {
757                future_utility: 0.8,
758                factual_confidence: 0.9,
759                semantic_novelty: 0.7,
760                temporal_recency: 1.0,
761                content_type_prior: 0.7,
762                goal_utility: 0.0,
763            },
764        };
765        log_admission_decision(&admitted_decision, "preview text", "user", 0.40);
766
767        let rejected_decision = AdmissionDecision {
768            admitted: false,
769            composite_score: 0.20,
770            factors: AdmissionFactors {
771                future_utility: 0.1,
772                factual_confidence: 0.2,
773                semantic_novelty: 0.3,
774                temporal_recency: 1.0,
775                content_type_prior: 0.3,
776                goal_utility: 0.0,
777            },
778        };
779        log_admission_decision(&rejected_decision, "maybe short content", "assistant", 0.40);
780    }
781
782    // Test: AdmissionRejected Display formats correctly.
783    #[test]
784    fn admission_rejected_display() {
785        let err = AdmissionRejected {
786            composite_score: 0.25,
787            threshold: 0.45,
788        };
789        let msg = format!("{err}");
790        assert!(msg.contains("0.250"));
791        assert!(msg.contains("0.450"));
792    }
793
794    // Test: threshold() accessor returns the configured value.
795    #[test]
796    fn threshold_accessor() {
797        let weights = AdmissionWeights {
798            future_utility: 0.20,
799            factual_confidence: 0.20,
800            semantic_novelty: 0.20,
801            temporal_recency: 0.20,
802            content_type_prior: 0.20,
803            goal_utility: 0.0,
804        };
805        let ctrl = AdmissionControl::new(0.55, 0.10, weights);
806        assert!((ctrl.threshold() - 0.55).abs() < 0.001);
807    }
808
809    // Test: content_type_prior for "tool_result" alias.
810    #[test]
811    fn content_type_prior_tool_result_alias() {
812        assert!((compute_content_type_prior("tool_result") - 0.8).abs() < 0.01);
813    }
814
815    // ── cosine_similarity tests ───────────────────────────────────────────────
816
817    #[test]
818    fn cosine_similarity_identical_vectors() {
819        let v = vec![1.0f32, 0.0, 0.0];
820        assert!((cosine_similarity(&v, &v) - 1.0).abs() < 1e-6);
821    }
822
823    #[test]
824    fn cosine_similarity_orthogonal_vectors() {
825        let a = vec![1.0f32, 0.0];
826        let b = vec![0.0f32, 1.0];
827        assert!(cosine_similarity(&a, &b).abs() < 1e-6);
828    }
829
830    #[test]
831    fn cosine_similarity_zero_vector_returns_zero() {
832        let z = vec![0.0f32, 0.0, 0.0];
833        let v = vec![1.0f32, 2.0, 3.0];
834        assert!(cosine_similarity(&z, &v).abs() < f32::EPSILON);
835    }
836
837    #[test]
838    fn cosine_similarity_length_mismatch_returns_zero() {
839        let a = vec![1.0f32, 0.0];
840        let b = vec![1.0f32, 0.0, 0.0];
841        assert!(cosine_similarity(&a, &b).abs() < f32::EPSILON);
842    }
843
844    // ── with_goal_gate tests ──────────────────────────────────────────────────
845
846    #[test]
847    fn with_goal_gate_sets_goal_utility_weight() {
848        let weights = AdmissionWeights {
849            future_utility: 0.30,
850            factual_confidence: 0.15,
851            semantic_novelty: 0.30,
852            temporal_recency: 0.10,
853            content_type_prior: 0.15,
854            goal_utility: 0.0,
855        };
856        let ctrl = AdmissionControl::new(0.40, 0.15, weights);
857        let config = GoalGateConfig {
858            weight: 0.20,
859            threshold: 0.5,
860            provider: None,
861        };
862        let ctrl = ctrl.with_goal_gate(config);
863        assert!(
864            ctrl.weights.goal_utility > 0.0,
865            "goal_utility must be nonzero after with_goal_gate"
866        );
867        // Normalized weights must sum to ~1.0.
868        let w = &ctrl.weights;
869        let total = w.future_utility
870            + w.factual_confidence
871            + w.semantic_novelty
872            + w.temporal_recency
873            + w.content_type_prior
874            + w.goal_utility;
875        assert!(
876            (total - 1.0).abs() < 0.01,
877            "normalized weights must sum to 1.0, got {total}"
878        );
879    }
880
881    #[test]
882    fn with_goal_gate_zero_weight_leaves_goal_utility_at_zero() {
883        let weights = AdmissionWeights {
884            future_utility: 0.30,
885            factual_confidence: 0.15,
886            semantic_novelty: 0.30,
887            temporal_recency: 0.10,
888            content_type_prior: 0.15,
889            goal_utility: 0.0,
890        };
891        let ctrl = AdmissionControl::new(0.40, 0.15, weights);
892        let config = GoalGateConfig {
893            weight: 0.0,
894            threshold: 0.5,
895            provider: None,
896        };
897        let ctrl = ctrl.with_goal_gate(config);
898        assert!(ctrl.weights.goal_utility.abs() < f32::EPSILON);
899    }
900
901    // ── timeout regression tests (#4212) ─────────────────────────────────────
902
903    #[tokio::test]
904    async fn compute_semantic_novelty_returns_one_on_embed_timeout() {
905        tokio::time::pause();
906        let mock = zeph_llm::mock::MockProvider::default()
907            .with_embed_delay(10_000)
908            .with_embedding(vec![0.0; 4]);
909        let provider = zeph_llm::any::AnyProvider::Mock(mock);
910        let handle = tokio::spawn(async move {
911            compute_semantic_novelty("hello", &provider, None, Duration::from_secs(5)).await
912        });
913        tokio::time::advance(std::time::Duration::from_secs(6)).await;
914        let result = handle.await.expect("task panicked");
915        assert!(
916            (result - 1.0).abs() < f32::EPSILON,
917            "expected 1.0 on embed timeout, got {result}"
918        );
919    }
920
921    #[tokio::test]
922    async fn compute_goal_utility_returns_zero_on_embed_timeout() {
923        tokio::time::pause();
924        let mock = zeph_llm::mock::MockProvider::default()
925            .with_embed_delay(10_000)
926            .with_embedding(vec![0.0; 4]);
927        let provider = zeph_llm::any::AnyProvider::Mock(mock);
928        let gate = GoalGateConfig {
929            weight: 0.5,
930            threshold: 0.5,
931            provider: None,
932        };
933        let handle = tokio::spawn(async move {
934            compute_goal_utility(
935                "content",
936                "goal",
937                &gate,
938                &provider,
939                None,
940                Duration::from_secs(5),
941            )
942            .await
943        });
944        tokio::time::advance(std::time::Duration::from_secs(6)).await;
945        let result = handle.await.expect("task panicked");
946        assert!(
947            result.abs() < f32::EPSILON,
948            "expected 0.0 on embed timeout, got {result}"
949        );
950    }
951}