oris_evolution/
confidence.rs

1//! Confidence Lifecycle Scheduler
2//!
3//! This module implements automatic confidence decay and lifecycle management
4//! for genes and capsules within the evolution crate.
5
6use std::collections::HashMap;
7
8use serde::{Deserialize, Serialize};
9use thiserror::Error;
10
11use crate::core::{
12    AssetState, Capsule, GeneId, MIN_REPLAY_CONFIDENCE, REPLAY_CONFIDENCE_DECAY_RATE_PER_HOUR,
13};
14
15/// Confidence scheduler configuration
16#[derive(Clone, Debug, Serialize, Deserialize)]
17pub struct ConfidenceSchedulerConfig {
18    /// How often to run the decay check (in seconds)
19    pub check_interval_secs: u64,
20    /// Maximum confidence boost per reuse success
21    pub confidence_boost_per_success: f32,
22    /// Maximum confidence (capped at 1.0)
23    pub max_confidence: f32,
24    /// Enable/disable the scheduler
25    pub enabled: bool,
26}
27
28impl Default for ConfidenceSchedulerConfig {
29    fn default() -> Self {
30        Self {
31            check_interval_secs: 3600, // 1 hour
32            confidence_boost_per_success: 0.1,
33            max_confidence: 1.0,
34            enabled: true,
35        }
36    }
37}
38
39/// Confidence update action
40#[derive(Clone, Debug)]
41pub enum ConfidenceAction {
42    /// Apply decay to a capsule
43    DecayCapsule {
44        capsule_id: String,
45        gene_id: GeneId,
46        old_confidence: f32,
47        new_confidence: f32,
48    },
49    /// Demote asset to quarantined due to low confidence
50    DemoteToQuarantined { asset_id: String, confidence: f32 },
51    /// Boost confidence on successful reuse
52    BoostConfidence {
53        asset_id: String,
54        old_confidence: f32,
55        new_confidence: f32,
56    },
57}
58
59/// Scheduler errors
60#[derive(Error, Debug)]
61pub enum SchedulerError {
62    #[error("Scheduler not running")]
63    NotRunning,
64
65    #[error("IO error: {0}")]
66    IoError(String),
67
68    #[error("Store error: {0}")]
69    StoreError(String),
70}
71
72/// Trait for confidence lifecycle management
73pub trait ConfidenceScheduler: Send + Sync {
74    /// Apply confidence decay to a single capsule
75    fn apply_decay_to_capsule(&self, capsule_confidence: f32, age_hours: f32) -> f32;
76
77    /// Boost confidence on successful reuse
78    fn boost_confidence(&self, current: f32) -> f32;
79
80    /// Check if confidence is below minimum threshold
81    fn should_quarantine(&self, confidence: f32) -> bool;
82}
83
84/// Standard implementation of confidence scheduler
85pub struct StandardConfidenceScheduler {
86    config: ConfidenceSchedulerConfig,
87}
88
89impl StandardConfidenceScheduler {
90    pub fn new(config: ConfidenceSchedulerConfig) -> Self {
91        Self { config }
92    }
93
94    pub fn with_default_config() -> Self {
95        Self::new(ConfidenceSchedulerConfig::default())
96    }
97
98    /// Calculate decayed confidence
99    pub fn calculate_decay(confidence: f32, hours: f32) -> f32 {
100        if confidence <= 0.0 {
101            return 0.0;
102        }
103        let decay = (-REPLAY_CONFIDENCE_DECAY_RATE_PER_HOUR * hours).exp();
104        (confidence * decay).clamp(0.0, 1.0)
105    }
106
107    /// Calculate age in hours from a timestamp
108    pub fn calculate_age_hours(created_at_ms: i64, now_ms: i64) -> f32 {
109        let diff_ms = now_ms - created_at_ms;
110        let diff_secs = diff_ms / 1000;
111        diff_secs as f32 / 3600.0
112    }
113}
114
115impl ConfidenceScheduler for StandardConfidenceScheduler {
116    fn apply_decay_to_capsule(&self, capsule_confidence: f32, age_hours: f32) -> f32 {
117        Self::calculate_decay(capsule_confidence, age_hours)
118    }
119
120    fn boost_confidence(&self, current: f32) -> f32 {
121        let new_confidence = current + self.config.confidence_boost_per_success;
122        new_confidence.min(self.config.max_confidence)
123    }
124
125    fn should_quarantine(&self, confidence: f32) -> bool {
126        confidence < MIN_REPLAY_CONFIDENCE
127    }
128}
129
130/// Confidence metrics
131#[derive(Clone, Debug, Default, Serialize, Deserialize)]
132pub struct ConfidenceMetrics {
133    pub decay_checks_total: u64,
134    pub capsules_decayed_total: u64,
135    pub capsules_quarantined_total: u64,
136    pub confidence_boosts_total: u64,
137}
138
139/// Apply decay to a capsule and return actions
140pub fn process_capsule_confidence(
141    scheduler: &dyn ConfidenceScheduler,
142    capsule_id: &str,
143    gene_id: &GeneId,
144    confidence: f32,
145    created_at_ms: i64,
146    current_time_ms: i64,
147    state: AssetState,
148) -> Vec<ConfidenceAction> {
149    let mut actions = Vec::new();
150
151    // Only process promoted capsules
152    if state != AssetState::Promoted {
153        return actions;
154    }
155
156    let age_hours =
157        StandardConfidenceScheduler::calculate_age_hours(created_at_ms, current_time_ms);
158
159    if age_hours > 0.0 {
160        let old_conf = confidence;
161        let new_conf = scheduler.apply_decay_to_capsule(old_conf, age_hours);
162
163        if (new_conf - old_conf).abs() > 0.001 {
164            actions.push(ConfidenceAction::DecayCapsule {
165                capsule_id: capsule_id.to_string(),
166                gene_id: gene_id.clone(),
167                old_confidence: old_conf,
168                new_confidence: new_conf,
169            });
170        }
171
172        // Check quarantine threshold
173        if scheduler.should_quarantine(new_conf) {
174            actions.push(ConfidenceAction::DemoteToQuarantined {
175                asset_id: capsule_id.to_string(),
176                confidence: new_conf,
177            });
178        }
179    }
180
181    actions
182}
183
184// ---------------------------------------------------------------------------
185// ConfidenceController — continuous failure-rate-based confidence tracking
186// ---------------------------------------------------------------------------
187
188/// A single outcome record associated with an asset.
189#[derive(Clone, Debug, Serialize, Deserialize)]
190pub struct OutcomeRecord {
191    pub asset_id: String,
192    pub success: bool,
193    pub recorded_at_ms: i64,
194}
195
196/// Configuration for [`ConfidenceController`].
197#[derive(Clone, Debug, Serialize, Deserialize)]
198pub struct ControllerConfig {
199    /// Rolling time window in milliseconds used to compute failure rate
200    /// (default: 3 600 000 ms = 1 hour).
201    pub window_ms: i64,
202    /// Failure-rate threshold in [0.0, 1.0] that triggers a downgrade step
203    /// (default: 0.5).
204    pub failure_rate_threshold: f32,
205    /// Minimum number of outcomes inside the window before any downgrade
206    /// decision is made (default: 3).
207    pub min_samples: usize,
208    /// Confidence amount subtracted per downgrade step, also used as the
209    /// recovery boost per success (default: 0.15).
210    pub downgrade_penalty: f32,
211    /// Assets with confidence strictly below this value are considered
212    /// non-selectable and require re-validation (default:
213    /// `MIN_REPLAY_CONFIDENCE`).
214    pub min_selectable_confidence: f32,
215    /// Confidence assigned to assets that are not yet tracked (default: 1.0).
216    pub initial_confidence: f32,
217}
218
219impl Default for ControllerConfig {
220    fn default() -> Self {
221        Self {
222            window_ms: 3_600_000,
223            failure_rate_threshold: 0.5,
224            min_samples: 3,
225            downgrade_penalty: 0.15,
226            min_selectable_confidence: MIN_REPLAY_CONFIDENCE,
227            initial_confidence: 1.0,
228        }
229    }
230}
231
232/// An observability event emitted whenever an asset is automatically
233/// downgraded by the [`ConfidenceController`].
234#[derive(Clone, Debug, Serialize, Deserialize)]
235pub struct DowngradeEvent {
236    pub asset_id: String,
237    pub old_confidence: f32,
238    pub new_confidence: f32,
239    /// Observed failure rate inside the rolling window.
240    pub failure_rate: f32,
241    /// Number of outcomes that were inside the window.
242    pub window_samples: usize,
243    pub event_at_ms: i64,
244    /// `true` when `new_confidence` fell below `min_selectable_confidence`,
245    /// indicating that re-validation should be triggered.
246    pub revalidation_required: bool,
247}
248
249/// Continuous confidence controller for genes and capsules.
250///
251/// Tracks per-asset success / failure outcomes within a rolling time window.
252/// When the failure rate exceeds the configured threshold and the minimum
253/// sample count is met, the asset's confidence score is automatically
254/// downgraded and a [`DowngradeEvent`] is appended to an internal
255/// observability log.  Successive successes can recover confidence.
256///
257/// # Selector integration
258///
259/// Call [`is_selectable`](ConfidenceController::is_selectable) before
260/// choosing a gene/capsule for reuse.  Assets below
261/// [`ControllerConfig::min_selectable_confidence`] return `false` and
262/// should be skipped until they have been re-validated.
263pub struct ConfidenceController {
264    config: ControllerConfig,
265    scores: HashMap<String, f32>,
266    history: HashMap<String, Vec<OutcomeRecord>>,
267    downgrade_log: Vec<DowngradeEvent>,
268}
269
270impl ConfidenceController {
271    /// Create a new controller with the given configuration.
272    pub fn new(config: ControllerConfig) -> Self {
273        Self {
274            config,
275            scores: HashMap::new(),
276            history: HashMap::new(),
277            downgrade_log: Vec::new(),
278        }
279    }
280
281    /// Create a controller using [`ControllerConfig::default`].
282    pub fn with_default_config() -> Self {
283        Self::new(ControllerConfig::default())
284    }
285
286    /// Current confidence score for `asset_id`.
287    /// Returns [`ControllerConfig::initial_confidence`] for unknown assets.
288    pub fn confidence(&self, asset_id: &str) -> f32 {
289        self.scores
290            .get(asset_id)
291            .copied()
292            .unwrap_or(self.config.initial_confidence)
293    }
294
295    /// Returns `true` when the asset's confidence is at or above
296    /// [`ControllerConfig::min_selectable_confidence`].
297    pub fn is_selectable(&self, asset_id: &str) -> bool {
298        self.confidence(asset_id) >= self.config.min_selectable_confidence
299    }
300
301    /// Record a **successful** outcome for `asset_id` at `now_ms`.
302    ///
303    /// Applies a recovery boost (capped at `initial_confidence`).
304    pub fn record_success(&mut self, asset_id: &str, now_ms: i64) {
305        self.history
306            .entry(asset_id.to_string())
307            .or_default()
308            .push(OutcomeRecord {
309                asset_id: asset_id.to_string(),
310                success: true,
311                recorded_at_ms: now_ms,
312            });
313        let initial = self.config.initial_confidence;
314        let penalty = self.config.downgrade_penalty;
315        let entry = self.scores.entry(asset_id.to_string()).or_insert(initial);
316        *entry = (*entry + penalty).min(initial);
317    }
318
319    /// Record a **failure** outcome for `asset_id` at `now_ms`.
320    ///
321    /// Immediately evaluates the rolling-window failure rate and downgrades
322    /// confidence if the threshold is exceeded.
323    pub fn record_failure(&mut self, asset_id: &str, now_ms: i64) {
324        self.history
325            .entry(asset_id.to_string())
326            .or_default()
327            .push(OutcomeRecord {
328                asset_id: asset_id.to_string(),
329                success: false,
330                recorded_at_ms: now_ms,
331            });
332        if let Some(evt) =
333            Self::compute_downgrade(&self.history, &self.scores, asset_id, now_ms, &self.config)
334        {
335            *self
336                .scores
337                .entry(asset_id.to_string())
338                .or_insert(evt.old_confidence) = evt.new_confidence;
339            self.downgrade_log.push(evt);
340        }
341    }
342
343    /// Sweep all tracked assets and apply downgrade logic at `now_ms`.
344    ///
345    /// Returns every [`DowngradeEvent`] generated in this sweep (also
346    /// appended to the internal log).
347    pub fn run_downgrade_check(&mut self, now_ms: i64) -> Vec<DowngradeEvent> {
348        let asset_ids: Vec<String> = self.history.keys().cloned().collect();
349        let mut events = Vec::new();
350        for id in &asset_ids {
351            if let Some(evt) =
352                Self::compute_downgrade(&self.history, &self.scores, id, now_ms, &self.config)
353            {
354                *self.scores.entry(id.clone()).or_insert(evt.old_confidence) = evt.new_confidence;
355                self.downgrade_log.push(evt.clone());
356                events.push(evt);
357            }
358        }
359        events
360    }
361
362    /// Full observability log of every downgrade event since construction.
363    pub fn downgrade_log(&self) -> &[DowngradeEvent] {
364        &self.downgrade_log
365    }
366
367    /// Asset IDs whose confidence has fallen below
368    /// [`ControllerConfig::min_selectable_confidence`] and therefore require
369    /// re-validation before they can be reused.
370    pub fn assets_requiring_revalidation(&self) -> Vec<String> {
371        self.scores
372            .iter()
373            .filter(|(_, &v)| v < self.config.min_selectable_confidence)
374            .map(|(k, _)| k.clone())
375            .collect()
376    }
377
378    // --- private helpers ---
379
380    /// Pure function: decide whether `asset_id` should be downgraded given
381    /// current `history` and `scores`.  Returns `None` when no action is
382    /// needed.
383    fn compute_downgrade(
384        history: &HashMap<String, Vec<OutcomeRecord>>,
385        scores: &HashMap<String, f32>,
386        asset_id: &str,
387        now_ms: i64,
388        config: &ControllerConfig,
389    ) -> Option<DowngradeEvent> {
390        let window_start = now_ms - config.window_ms;
391        let records = history.get(asset_id)?;
392        let window: Vec<&OutcomeRecord> = records
393            .iter()
394            .filter(|r| r.recorded_at_ms >= window_start)
395            .collect();
396        let total = window.len();
397        if total < config.min_samples {
398            return None;
399        }
400        let failures = window.iter().filter(|r| !r.success).count();
401        let rate = failures as f32 / total as f32;
402        if rate < config.failure_rate_threshold {
403            return None;
404        }
405        let old = scores
406            .get(asset_id)
407            .copied()
408            .unwrap_or(config.initial_confidence);
409        let new_val = (old - config.downgrade_penalty).max(0.0);
410        Some(DowngradeEvent {
411            asset_id: asset_id.to_string(),
412            old_confidence: old,
413            new_confidence: new_val,
414            failure_rate: rate,
415            window_samples: total,
416            event_at_ms: now_ms,
417            revalidation_required: new_val < config.min_selectable_confidence,
418        })
419    }
420}
421
422// ---------------------------------------------------------------------------
423// BayesianConfidenceUpdater and ConfidenceSnapshot
424// ---------------------------------------------------------------------------
425
426/// A snapshot of the current Bayesian posterior for an asset's confidence.
427#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
428pub struct ConfidenceSnapshot {
429    /// Posterior mean: α / (α + β).
430    pub mean: f32,
431    /// Posterior variance: αβ / ((α+β)²(α+β+1)).
432    pub variance: f32,
433    /// Total observations (successes + failures) since the updater was created.
434    pub sample_count: u32,
435    /// `true` when `sample_count ≥ 10` and `variance < 0.01`, indicating a stable
436    /// credible interval.
437    pub is_stable: bool,
438}
439
440/// Per-class Beta-distribution prior.
441#[derive(Clone, Debug, Serialize, Deserialize)]
442pub struct BetaPrior {
443    /// Alpha parameter (pseudo-success count).
444    pub alpha: f32,
445    /// Beta parameter (pseudo-failure count).
446    pub beta: f32,
447}
448
449impl BetaPrior {
450    pub fn new(alpha: f32, beta: f32) -> Self {
451        assert!(
452            alpha > 0.0 && beta > 0.0,
453            "Beta distribution parameters must be positive"
454        );
455        Self { alpha, beta }
456    }
457}
458
459/// Return the canonical built-in priors.
460///
461/// Encodes a weak prior leaning toward success (α=2, β=1) reflecting that
462/// genes entering the pool should already have passed static validation.
463pub fn builtin_priors() -> BetaPrior {
464    BetaPrior::new(2.0, 1.0)
465}
466
467/// Bayesian confidence updater using a Beta-Bernoulli conjugate model.
468///
469/// # Model
470///
471/// Maintains parameters `(α, β)` of a Beta distribution.  On each observation:
472/// - Success: `α += 1`
473/// - Failure: `β += 1`
474///
475/// The posterior mean `α / (α + β)` is used as the point estimate.
476pub struct BayesianConfidenceUpdater {
477    alpha: f32,
478    beta: f32,
479}
480
481impl BayesianConfidenceUpdater {
482    /// Create an updater with an explicit prior.
483    pub fn new(prior: BetaPrior) -> Self {
484        Self {
485            alpha: prior.alpha,
486            beta: prior.beta,
487        }
488    }
489
490    /// Create an updater with the `builtin_priors()` prior.
491    pub fn with_builtin_prior() -> Self {
492        Self::new(builtin_priors())
493    }
494
495    /// Record a success observation (`α += 1`).
496    pub fn update_success(&mut self) {
497        self.alpha += 1.0;
498    }
499
500    /// Record a failure observation (`β += 1`).
501    pub fn update_failure(&mut self) {
502        self.beta += 1.0;
503    }
504
505    /// Apply `successes` and `failures` in bulk.
506    pub fn update(&mut self, successes: u32, failures: u32) {
507        self.alpha += successes as f32;
508        self.beta += failures as f32;
509    }
510
511    /// Posterior mean: `α / (α + β)`.
512    pub fn posterior_mean(&self) -> f32 {
513        self.alpha / (self.alpha + self.beta)
514    }
515
516    /// Posterior variance: `αβ / ((α+β)²(α+β+1))`.
517    pub fn posterior_variance(&self) -> f32 {
518        let ab = self.alpha + self.beta;
519        (self.alpha * self.beta) / (ab * ab * (ab + 1.0))
520    }
521
522    /// Total observations recorded above the prior (i.e. `(α - α₀) + (β - β₀)`).
523    pub fn sample_count(&self, prior: &BetaPrior) -> u32 {
524        let raw = (self.alpha - prior.alpha) + (self.beta - prior.beta);
525        raw.round().max(0.0) as u32
526    }
527
528    /// Build a `ConfidenceSnapshot` from the current posterior state.
529    ///
530    /// `prior` is used only to compute `sample_count`; pass `builtin_priors()`
531    /// unless you constructed this updater with a custom prior.
532    pub fn snapshot(&self, prior: &BetaPrior) -> ConfidenceSnapshot {
533        let mean = self.posterior_mean();
534        let variance = self.posterior_variance();
535        let count = self.sample_count(prior);
536        let is_stable = count >= 10 && variance < 0.01;
537        ConfidenceSnapshot {
538            mean,
539            variance,
540            sample_count: count,
541            is_stable,
542        }
543    }
544
545    /// Current alpha parameter (useful for serialisation/inspection).
546    pub fn alpha(&self) -> f32 {
547        self.alpha
548    }
549
550    /// Current beta parameter (useful for serialisation/inspection).
551    pub fn beta(&self) -> f32 {
552        self.beta
553    }
554}
555
556#[cfg(test)]
557mod tests {
558    use super::*;
559
560    #[test]
561    fn test_calculate_decay() {
562        // Initial confidence 1.0, after 0 hours should be 1.0
563        let conf = StandardConfidenceScheduler::calculate_decay(1.0, 0.0);
564        assert!((conf - 1.0).abs() < 0.001);
565
566        // After ~13.86 hours (ln(2)/0.05), confidence should be ~0.5
567        let conf = StandardConfidenceScheduler::calculate_decay(1.0, 13.86);
568        assert!((conf - 0.5).abs() < 0.01);
569
570        // After 24 hours: e^(-0.05*24) ≈ 0.30
571        let conf = StandardConfidenceScheduler::calculate_decay(1.0, 24.0);
572        assert!((conf - 0.30).abs() < 0.02);
573
574        // Zero confidence stays zero
575        let conf = StandardConfidenceScheduler::calculate_decay(0.0, 100.0);
576        assert!(conf.abs() < 0.001);
577    }
578
579    #[test]
580    fn test_should_quarantine() {
581        let scheduler = StandardConfidenceScheduler::with_default_config();
582
583        // Above threshold - should not quarantine
584        assert!(!scheduler.should_quarantine(0.5));
585        assert!(!scheduler.should_quarantine(0.35));
586        assert!(!scheduler.should_quarantine(0.36));
587
588        // Below threshold - should quarantine
589        assert!(scheduler.should_quarantine(0.34));
590        assert!(scheduler.should_quarantine(0.0));
591    }
592
593    #[test]
594    fn test_boost_confidence() {
595        let scheduler = StandardConfidenceScheduler::with_default_config();
596
597        // Boost from 0.5 should be 0.6
598        let conf = scheduler.boost_confidence(0.5);
599        assert!((conf - 0.6).abs() < 0.001);
600
601        // Boost should cap at 1.0
602        let conf = scheduler.boost_confidence(1.0);
603        assert!((conf - 1.0).abs() < 0.001);
604
605        // Boost from 0.95 should cap at 1.0
606        let conf = scheduler.boost_confidence(0.95);
607        assert!((conf - 1.0).abs() < 0.001);
608    }
609
610    #[test]
611    fn test_default_config() {
612        let config = ConfidenceSchedulerConfig::default();
613        assert!(config.enabled);
614        assert_eq!(config.check_interval_secs, 3600);
615        assert!((config.confidence_boost_per_success - 0.1).abs() < 0.001);
616    }
617
618    #[test]
619    fn test_calculate_age_hours() {
620        // 1 hour = 3600 seconds = 3600000 ms
621        let age = StandardConfidenceScheduler::calculate_age_hours(0, 3600000);
622        assert!((age - 1.0).abs() < 0.001);
623
624        // 24 hours
625        let age = StandardConfidenceScheduler::calculate_age_hours(0, 86400000);
626        assert!((age - 24.0).abs() < 0.001);
627
628        // Less than an hour
629        let age = StandardConfidenceScheduler::calculate_age_hours(0, 1800000);
630        assert!((age - 0.5).abs() < 0.001);
631    }
632
633    // -----------------------------------------------------------------------
634    // ConfidenceController tests
635    // -----------------------------------------------------------------------
636
637    const NOW: i64 = 1_000_000_000_000; // arbitrary fixed "now" in ms
638    const WINDOW: i64 = 3_600_000; // 1 hour window
639
640    fn controller_with_3_samples() -> ConfidenceController {
641        ConfidenceController::new(ControllerConfig {
642            window_ms: WINDOW,
643            failure_rate_threshold: 0.5,
644            min_samples: 3,
645            downgrade_penalty: 0.15,
646            min_selectable_confidence: MIN_REPLAY_CONFIDENCE,
647            initial_confidence: 1.0,
648        })
649    }
650
651    #[test]
652    fn test_controller_initial_confidence_is_one() {
653        let ctrl = controller_with_3_samples();
654        // Unknown asset → initial confidence
655        assert!((ctrl.confidence("gene-1") - 1.0).abs() < 0.001);
656        assert!(ctrl.is_selectable("gene-1"));
657    }
658
659    #[test]
660    fn test_controller_successive_failures_downgrade() {
661        let mut ctrl = controller_with_3_samples();
662        // 3 failures in the window → failure rate 1.0 ≥ 0.5 → first downgrade
663        ctrl.record_failure("gene-x", NOW);
664        ctrl.record_failure("gene-x", NOW + 1);
665        ctrl.record_failure("gene-x", NOW + 2);
666        let c = ctrl.confidence("gene-x");
667        // Expected: 1.0 - 0.15 = 0.85
668        assert!((c - 0.85).abs() < 0.01, "expected ~0.85, got {c}");
669        assert_eq!(ctrl.downgrade_log().len(), 1);
670    }
671
672    #[test]
673    fn test_controller_below_min_not_selectable() {
674        let mut ctrl = ConfidenceController::new(ControllerConfig {
675            window_ms: WINDOW,
676            failure_rate_threshold: 0.5,
677            min_samples: 2,
678            downgrade_penalty: 0.35,
679            min_selectable_confidence: MIN_REPLAY_CONFIDENCE,
680            initial_confidence: 0.5, // start near the edge
681        });
682        // Two failures → rate 1.0 ≥ 0.5, 2 ≥ min_samples=2 → downgrade
683        ctrl.record_failure("gene-low", NOW);
684        ctrl.record_failure("gene-low", NOW + 1);
685        // 0.5 - 0.35 = 0.15 < MIN_REPLAY_CONFIDENCE (0.35)
686        assert!(!ctrl.is_selectable("gene-low"));
687        assert_eq!(ctrl.downgrade_log()[0].revalidation_required, true);
688        let rv = ctrl.assets_requiring_revalidation();
689        assert!(rv.contains(&"gene-low".to_string()));
690    }
691
692    #[test]
693    fn test_controller_recovery_via_successes() {
694        let mut ctrl = controller_with_3_samples();
695        // Drive confidence down first
696        ctrl.record_failure("gene-r", NOW);
697        ctrl.record_failure("gene-r", NOW + 1);
698        ctrl.record_failure("gene-r", NOW + 2);
699        let after_failures = ctrl.confidence("gene-r");
700        // Now record two successes to partially recover
701        ctrl.record_success("gene-r", NOW + 3);
702        ctrl.record_success("gene-r", NOW + 4);
703        let after_recovery = ctrl.confidence("gene-r");
704        assert!(
705            after_recovery > after_failures,
706            "recovery expected: {after_recovery} > {after_failures}"
707        );
708    }
709
710    #[test]
711    fn test_controller_no_downgrade_below_min_samples() {
712        let mut ctrl = controller_with_3_samples();
713        // Only 2 failures — below min_samples=3 → no downgrade
714        ctrl.record_failure("gene-few", NOW);
715        ctrl.record_failure("gene-few", NOW + 1);
716        assert!((ctrl.confidence("gene-few") - 1.0).abs() < 0.001);
717        assert!(ctrl.downgrade_log().is_empty());
718    }
719
720    #[test]
721    fn test_controller_failures_outside_window_ignored() {
722        let mut ctrl = controller_with_3_samples();
723        // 2 failures far outside the 1-hour window (below min_samples=3 so no
724        // downgrade fires when they are recorded).
725        let old = NOW - WINDOW - 1;
726        ctrl.record_failure("gene-old", old);
727        ctrl.record_failure("gene-old", old + 1);
728        // run_downgrade_check at NOW: these 2 records are outside the window,
729        // count = 0 → below min_samples → no new downgrade event.
730        let events = ctrl.run_downgrade_check(NOW);
731        assert!(events.is_empty(), "expected no downgrade, got {events:?}");
732        assert!((ctrl.confidence("gene-old") - 1.0).abs() < 0.001);
733        assert!(ctrl.downgrade_log().is_empty());
734    }
735
736    #[test]
737    fn test_controller_run_downgrade_check_batch() {
738        let mut ctrl = controller_with_3_samples();
739        // Seed failures for two assets
740        for i in 0..3 {
741            ctrl.history
742                .entry("asset-a".to_string())
743                .or_default()
744                .push(OutcomeRecord {
745                    asset_id: "asset-a".to_string(),
746                    success: false,
747                    recorded_at_ms: NOW + i,
748                });
749            ctrl.history
750                .entry("asset-b".to_string())
751                .or_default()
752                .push(OutcomeRecord {
753                    asset_id: "asset-b".to_string(),
754                    success: false,
755                    recorded_at_ms: NOW + i,
756                });
757        }
758        let events = ctrl.run_downgrade_check(NOW + 10);
759        assert_eq!(events.len(), 2);
760        assert_eq!(ctrl.downgrade_log().len(), 2);
761    }
762
763    #[test]
764    fn test_controller_downgrade_event_fields() {
765        let mut ctrl = controller_with_3_samples();
766        ctrl.record_failure("gene-fields", NOW);
767        ctrl.record_failure("gene-fields", NOW + 1);
768        ctrl.record_failure("gene-fields", NOW + 2);
769        let log = ctrl.downgrade_log();
770        assert_eq!(log.len(), 1);
771        let evt = &log[0];
772        assert_eq!(evt.asset_id, "gene-fields");
773        assert!((evt.old_confidence - 1.0).abs() < 0.001);
774        assert!((evt.new_confidence - 0.85).abs() < 0.01);
775        assert!((evt.failure_rate - 1.0).abs() < 0.001);
776        assert_eq!(evt.window_samples, 3);
777        assert_eq!(evt.event_at_ms, NOW + 2);
778    }
779
780    // -----------------------------------------------------------------------
781    // BayesianConfidenceUpdater tests
782    // -----------------------------------------------------------------------
783
784    #[test]
785    fn bayesian_updater_prior_mean() {
786        // builtin_priors: α=2, β=1 → mean = 2/3 ≈ 0.667
787        let updater = BayesianConfidenceUpdater::with_builtin_prior();
788        let mean = updater.posterior_mean();
789        assert!((mean - 2.0 / 3.0).abs() < 0.001, "mean={mean}");
790    }
791
792    #[test]
793    fn bayesian_updater_converges_to_true_rate() {
794        // 100 observations at 70% success-rate; posterior mean should approach 0.70.
795        let mut updater = BayesianConfidenceUpdater::with_builtin_prior();
796        updater.update(70, 30);
797        let mean = updater.posterior_mean();
798        assert!(
799            (mean - 0.70).abs() < 0.02,
800            "expected mean ≈ 0.70, got {mean}"
801        );
802    }
803
804    #[test]
805    fn bayesian_updater_sample_count() {
806        let prior = builtin_priors();
807        let mut updater = BayesianConfidenceUpdater::with_builtin_prior();
808        updater.update(5, 5);
809        assert_eq!(updater.sample_count(&prior), 10);
810    }
811
812    #[test]
813    fn bayesian_updater_snapshot_is_stable_after_observations() {
814        let prior = builtin_priors();
815        let mut updater = BayesianConfidenceUpdater::with_builtin_prior();
816        // 50 successes, 50 failures → balanced → variance should be small after many samples
817        updater.update(50, 50);
818        let snap = updater.snapshot(&prior);
819        assert_eq!(snap.sample_count, 100);
820        // variance = αβ/((α+β)²(α+β+1)); with α=52,β=51 → very small
821        assert!(snap.is_stable, "should be stable with 100 samples");
822    }
823
824    #[test]
825    fn bayesian_updater_sequential_updates_equal_bulk() {
826        let mut seq = BayesianConfidenceUpdater::with_builtin_prior();
827        for _ in 0..7 {
828            seq.update_success();
829        }
830        for _ in 0..3 {
831            seq.update_failure();
832        }
833
834        let mut bulk = BayesianConfidenceUpdater::with_builtin_prior();
835        bulk.update(7, 3);
836
837        assert!((seq.posterior_mean() - bulk.posterior_mean()).abs() < 1e-6);
838        assert!((seq.posterior_variance() - bulk.posterior_variance()).abs() < 1e-9);
839    }
840}
oris_evolution/confidence.rs

oris_evolution/
confidence.rs