Skip to main content

oris_evolution/
confidence.rs

1//! Confidence Lifecycle Scheduler
2//!
3//! This module implements automatic confidence decay and lifecycle management
4//! for genes and capsules within the evolution crate.
5
6use std::collections::HashMap;
7
8use serde::{Deserialize, Serialize};
9use thiserror::Error;
10
11use crate::core::{
12    AssetState, Capsule, GeneId, MIN_REPLAY_CONFIDENCE, REPLAY_CONFIDENCE_DECAY_RATE_PER_HOUR,
13};
14
15/// Confidence scheduler configuration
16#[derive(Clone, Debug, Serialize, Deserialize)]
17pub struct ConfidenceSchedulerConfig {
18    /// How often to run the decay check (in seconds)
19    pub check_interval_secs: u64,
20    /// Maximum confidence boost per reuse success
21    pub confidence_boost_per_success: f32,
22    /// Maximum confidence (capped at 1.0)
23    pub max_confidence: f32,
24    /// Enable/disable the scheduler
25    pub enabled: bool,
26}
27
28impl Default for ConfidenceSchedulerConfig {
29    fn default() -> Self {
30        Self {
31            check_interval_secs: 3600, // 1 hour
32            confidence_boost_per_success: 0.1,
33            max_confidence: 1.0,
34            enabled: true,
35        }
36    }
37}
38
39/// Confidence update action
40#[derive(Clone, Debug)]
41pub enum ConfidenceAction {
42    /// Apply decay to a capsule
43    DecayCapsule {
44        capsule_id: String,
45        gene_id: GeneId,
46        old_confidence: f32,
47        new_confidence: f32,
48    },
49    /// Demote asset to quarantined due to low confidence
50    DemoteToQuarantined { asset_id: String, confidence: f32 },
51    /// Boost confidence on successful reuse
52    BoostConfidence {
53        asset_id: String,
54        old_confidence: f32,
55        new_confidence: f32,
56    },
57}
58
59/// Scheduler errors
60#[derive(Error, Debug)]
61pub enum SchedulerError {
62    #[error("Scheduler not running")]
63    NotRunning,
64
65    #[error("IO error: {0}")]
66    IoError(String),
67
68    #[error("Store error: {0}")]
69    StoreError(String),
70}
71
72/// Trait for confidence lifecycle management
73pub trait ConfidenceScheduler: Send + Sync {
74    /// Apply confidence decay to a single capsule
75    fn apply_decay_to_capsule(&self, capsule_confidence: f32, age_hours: f32) -> f32;
76
77    /// Boost confidence on successful reuse
78    fn boost_confidence(&self, current: f32) -> f32;
79
80    /// Check if confidence is below minimum threshold
81    fn should_quarantine(&self, confidence: f32) -> bool;
82}
83
84/// Standard implementation of confidence scheduler
85pub struct StandardConfidenceScheduler {
86    config: ConfidenceSchedulerConfig,
87}
88
89impl StandardConfidenceScheduler {
90    pub fn new(config: ConfidenceSchedulerConfig) -> Self {
91        Self { config }
92    }
93
94    pub fn with_default_config() -> Self {
95        Self::new(ConfidenceSchedulerConfig::default())
96    }
97
98    /// Calculate decayed confidence
99    pub fn calculate_decay(confidence: f32, hours: f32) -> f32 {
100        if confidence <= 0.0 {
101            return 0.0;
102        }
103        let decay = (-REPLAY_CONFIDENCE_DECAY_RATE_PER_HOUR * hours).exp();
104        (confidence * decay).clamp(0.0, 1.0)
105    }
106
107    /// Calculate age in hours from a timestamp
108    pub fn calculate_age_hours(created_at_ms: i64, now_ms: i64) -> f32 {
109        let diff_ms = now_ms - created_at_ms;
110        let diff_secs = diff_ms / 1000;
111        diff_secs as f32 / 3600.0
112    }
113}
114
115impl ConfidenceScheduler for StandardConfidenceScheduler {
116    fn apply_decay_to_capsule(&self, capsule_confidence: f32, age_hours: f32) -> f32 {
117        Self::calculate_decay(capsule_confidence, age_hours)
118    }
119
120    fn boost_confidence(&self, current: f32) -> f32 {
121        let new_confidence = current + self.config.confidence_boost_per_success;
122        new_confidence.min(self.config.max_confidence)
123    }
124
125    fn should_quarantine(&self, confidence: f32) -> bool {
126        confidence < MIN_REPLAY_CONFIDENCE
127    }
128}
129
130/// Confidence metrics
131#[derive(Clone, Debug, Default, Serialize, Deserialize)]
132pub struct ConfidenceMetrics {
133    pub decay_checks_total: u64,
134    pub capsules_decayed_total: u64,
135    pub capsules_quarantined_total: u64,
136    pub confidence_boosts_total: u64,
137}
138
139/// Apply decay to a capsule and return actions
140pub fn process_capsule_confidence(
141    scheduler: &dyn ConfidenceScheduler,
142    capsule_id: &str,
143    gene_id: &GeneId,
144    confidence: f32,
145    created_at_ms: i64,
146    current_time_ms: i64,
147    state: AssetState,
148) -> Vec<ConfidenceAction> {
149    let mut actions = Vec::new();
150
151    // Only process promoted capsules
152    if state != AssetState::Promoted {
153        return actions;
154    }
155
156    let age_hours =
157        StandardConfidenceScheduler::calculate_age_hours(created_at_ms, current_time_ms);
158
159    if age_hours > 0.0 {
160        let old_conf = confidence;
161        let new_conf = scheduler.apply_decay_to_capsule(old_conf, age_hours);
162
163        if (new_conf - old_conf).abs() > 0.001 {
164            actions.push(ConfidenceAction::DecayCapsule {
165                capsule_id: capsule_id.to_string(),
166                gene_id: gene_id.clone(),
167                old_confidence: old_conf,
168                new_confidence: new_conf,
169            });
170        }
171
172        // Check quarantine threshold
173        if scheduler.should_quarantine(new_conf) {
174            actions.push(ConfidenceAction::DemoteToQuarantined {
175                asset_id: capsule_id.to_string(),
176                confidence: new_conf,
177            });
178        }
179    }
180
181    actions
182}
183
184// ---------------------------------------------------------------------------
185// ConfidenceController — continuous failure-rate-based confidence tracking
186// ---------------------------------------------------------------------------
187
188/// A single outcome record associated with an asset.
189#[derive(Clone, Debug, Serialize, Deserialize)]
190pub struct OutcomeRecord {
191    pub asset_id: String,
192    pub success: bool,
193    pub recorded_at_ms: i64,
194}
195
196/// Configuration for [`ConfidenceController`].
197#[derive(Clone, Debug, Serialize, Deserialize)]
198pub struct ControllerConfig {
199    /// Rolling time window in milliseconds used to compute failure rate
200    /// (default: 3 600 000 ms = 1 hour).
201    pub window_ms: i64,
202    /// Failure-rate threshold in [0.0, 1.0] that triggers a downgrade step
203    /// (default: 0.5).
204    pub failure_rate_threshold: f32,
205    /// Minimum number of outcomes inside the window before any downgrade
206    /// decision is made (default: 3).
207    pub min_samples: usize,
208    /// Confidence amount subtracted per downgrade step, also used as the
209    /// recovery boost per success (default: 0.15).
210    pub downgrade_penalty: f32,
211    /// Assets with confidence strictly below this value are considered
212    /// non-selectable and require re-validation (default:
213    /// `MIN_REPLAY_CONFIDENCE`).
214    pub min_selectable_confidence: f32,
215    /// Confidence assigned to assets that are not yet tracked (default: 1.0).
216    pub initial_confidence: f32,
217}
218
219impl Default for ControllerConfig {
220    fn default() -> Self {
221        Self {
222            window_ms: 3_600_000,
223            failure_rate_threshold: 0.5,
224            min_samples: 3,
225            downgrade_penalty: 0.15,
226            min_selectable_confidence: MIN_REPLAY_CONFIDENCE,
227            initial_confidence: 1.0,
228        }
229    }
230}
231
232/// An observability event emitted whenever an asset is automatically
233/// downgraded by the [`ConfidenceController`].
234#[derive(Clone, Debug, Serialize, Deserialize)]
235pub struct DowngradeEvent {
236    pub asset_id: String,
237    pub old_confidence: f32,
238    pub new_confidence: f32,
239    /// Observed failure rate inside the rolling window.
240    pub failure_rate: f32,
241    /// Number of outcomes that were inside the window.
242    pub window_samples: usize,
243    pub event_at_ms: i64,
244    /// `true` when `new_confidence` fell below `min_selectable_confidence`,
245    /// indicating that re-validation should be triggered.
246    pub revalidation_required: bool,
247}
248
249/// Continuous confidence controller for genes and capsules.
250///
251/// Tracks per-asset success / failure outcomes within a rolling time window.
252/// When the failure rate exceeds the configured threshold and the minimum
253/// sample count is met, the asset's confidence score is automatically
254/// downgraded and a [`DowngradeEvent`] is appended to an internal
255/// observability log.  Successive successes can recover confidence.
256///
257/// # Selector integration
258///
259/// Call [`is_selectable`](ConfidenceController::is_selectable) before
260/// choosing a gene/capsule for reuse.  Assets below
261/// [`ControllerConfig::min_selectable_confidence`] return `false` and
262/// should be skipped until they have been re-validated.
263pub struct ConfidenceController {
264    config: ControllerConfig,
265    scores: HashMap<String, f32>,
266    history: HashMap<String, Vec<OutcomeRecord>>,
267    downgrade_log: Vec<DowngradeEvent>,
268}
269
270impl ConfidenceController {
271    /// Create a new controller with the given configuration.
272    pub fn new(config: ControllerConfig) -> Self {
273        Self {
274            config,
275            scores: HashMap::new(),
276            history: HashMap::new(),
277            downgrade_log: Vec::new(),
278        }
279    }
280
281    /// Create a controller using [`ControllerConfig::default`].
282    pub fn with_default_config() -> Self {
283        Self::new(ControllerConfig::default())
284    }
285
286    /// Current confidence score for `asset_id`.
287    /// Returns [`ControllerConfig::initial_confidence`] for unknown assets.
288    pub fn confidence(&self, asset_id: &str) -> f32 {
289        self.scores
290            .get(asset_id)
291            .copied()
292            .unwrap_or(self.config.initial_confidence)
293    }
294
295    /// Returns `true` when the asset's confidence is at or above
296    /// [`ControllerConfig::min_selectable_confidence`].
297    pub fn is_selectable(&self, asset_id: &str) -> bool {
298        self.confidence(asset_id) >= self.config.min_selectable_confidence
299    }
300
301    /// Record a **successful** outcome for `asset_id` at `now_ms`.
302    ///
303    /// Applies a recovery boost (capped at `initial_confidence`).
304    pub fn record_success(&mut self, asset_id: &str, now_ms: i64) {
305        self.history
306            .entry(asset_id.to_string())
307            .or_default()
308            .push(OutcomeRecord {
309                asset_id: asset_id.to_string(),
310                success: true,
311                recorded_at_ms: now_ms,
312            });
313        let initial = self.config.initial_confidence;
314        let penalty = self.config.downgrade_penalty;
315        let entry = self.scores.entry(asset_id.to_string()).or_insert(initial);
316        *entry = (*entry + penalty).min(initial);
317    }
318
319    /// Record a **failure** outcome for `asset_id` at `now_ms`.
320    ///
321    /// Immediately evaluates the rolling-window failure rate and downgrades
322    /// confidence if the threshold is exceeded.
323    pub fn record_failure(&mut self, asset_id: &str, now_ms: i64) {
324        self.history
325            .entry(asset_id.to_string())
326            .or_default()
327            .push(OutcomeRecord {
328                asset_id: asset_id.to_string(),
329                success: false,
330                recorded_at_ms: now_ms,
331            });
332        if let Some(evt) =
333            Self::compute_downgrade(&self.history, &self.scores, asset_id, now_ms, &self.config)
334        {
335            *self
336                .scores
337                .entry(asset_id.to_string())
338                .or_insert(evt.old_confidence) = evt.new_confidence;
339            self.downgrade_log.push(evt);
340        }
341    }
342
343    /// Sweep all tracked assets and apply downgrade logic at `now_ms`.
344    ///
345    /// Returns every [`DowngradeEvent`] generated in this sweep (also
346    /// appended to the internal log).
347    pub fn run_downgrade_check(&mut self, now_ms: i64) -> Vec<DowngradeEvent> {
348        let asset_ids: Vec<String> = self.history.keys().cloned().collect();
349        let mut events = Vec::new();
350        for id in &asset_ids {
351            if let Some(evt) =
352                Self::compute_downgrade(&self.history, &self.scores, id, now_ms, &self.config)
353            {
354                *self.scores.entry(id.clone()).or_insert(evt.old_confidence) = evt.new_confidence;
355                self.downgrade_log.push(evt.clone());
356                events.push(evt);
357            }
358        }
359        events
360    }
361
362    /// Full observability log of every downgrade event since construction.
363    pub fn downgrade_log(&self) -> &[DowngradeEvent] {
364        &self.downgrade_log
365    }
366
367    /// Asset IDs whose confidence has fallen below
368    /// [`ControllerConfig::min_selectable_confidence`] and therefore require
369    /// re-validation before they can be reused.
370    pub fn assets_requiring_revalidation(&self) -> Vec<String> {
371        self.scores
372            .iter()
373            .filter(|(_, &v)| v < self.config.min_selectable_confidence)
374            .map(|(k, _)| k.clone())
375            .collect()
376    }
377
378    // --- private helpers ---
379
380    /// Pure function: decide whether `asset_id` should be downgraded given
381    /// current `history` and `scores`.  Returns `None` when no action is
382    /// needed.
383    fn compute_downgrade(
384        history: &HashMap<String, Vec<OutcomeRecord>>,
385        scores: &HashMap<String, f32>,
386        asset_id: &str,
387        now_ms: i64,
388        config: &ControllerConfig,
389    ) -> Option<DowngradeEvent> {
390        let window_start = now_ms - config.window_ms;
391        let records = history.get(asset_id)?;
392        let window: Vec<&OutcomeRecord> = records
393            .iter()
394            .filter(|r| r.recorded_at_ms >= window_start)
395            .collect();
396        let total = window.len();
397        if total < config.min_samples {
398            return None;
399        }
400        let failures = window.iter().filter(|r| !r.success).count();
401        let rate = failures as f32 / total as f32;
402        if rate < config.failure_rate_threshold {
403            return None;
404        }
405        let old = scores
406            .get(asset_id)
407            .copied()
408            .unwrap_or(config.initial_confidence);
409        let new_val = (old - config.downgrade_penalty).max(0.0);
410        Some(DowngradeEvent {
411            asset_id: asset_id.to_string(),
412            old_confidence: old,
413            new_confidence: new_val,
414            failure_rate: rate,
415            window_samples: total,
416            event_at_ms: now_ms,
417            revalidation_required: new_val < config.min_selectable_confidence,
418        })
419    }
420}
421
422#[cfg(test)]
423mod tests {
424    use super::*;
425
426    #[test]
427    fn test_calculate_decay() {
428        // Initial confidence 1.0, after 0 hours should be 1.0
429        let conf = StandardConfidenceScheduler::calculate_decay(1.0, 0.0);
430        assert!((conf - 1.0).abs() < 0.001);
431
432        // After ~13.86 hours (ln(2)/0.05), confidence should be ~0.5
433        let conf = StandardConfidenceScheduler::calculate_decay(1.0, 13.86);
434        assert!((conf - 0.5).abs() < 0.01);
435
436        // After 24 hours: e^(-0.05*24) ≈ 0.30
437        let conf = StandardConfidenceScheduler::calculate_decay(1.0, 24.0);
438        assert!((conf - 0.30).abs() < 0.02);
439
440        // Zero confidence stays zero
441        let conf = StandardConfidenceScheduler::calculate_decay(0.0, 100.0);
442        assert!(conf.abs() < 0.001);
443    }
444
445    #[test]
446    fn test_should_quarantine() {
447        let scheduler = StandardConfidenceScheduler::with_default_config();
448
449        // Above threshold - should not quarantine
450        assert!(!scheduler.should_quarantine(0.5));
451        assert!(!scheduler.should_quarantine(0.35));
452        assert!(!scheduler.should_quarantine(0.36));
453
454        // Below threshold - should quarantine
455        assert!(scheduler.should_quarantine(0.34));
456        assert!(scheduler.should_quarantine(0.0));
457    }
458
459    #[test]
460    fn test_boost_confidence() {
461        let scheduler = StandardConfidenceScheduler::with_default_config();
462
463        // Boost from 0.5 should be 0.6
464        let conf = scheduler.boost_confidence(0.5);
465        assert!((conf - 0.6).abs() < 0.001);
466
467        // Boost should cap at 1.0
468        let conf = scheduler.boost_confidence(1.0);
469        assert!((conf - 1.0).abs() < 0.001);
470
471        // Boost from 0.95 should cap at 1.0
472        let conf = scheduler.boost_confidence(0.95);
473        assert!((conf - 1.0).abs() < 0.001);
474    }
475
476    #[test]
477    fn test_default_config() {
478        let config = ConfidenceSchedulerConfig::default();
479        assert!(config.enabled);
480        assert_eq!(config.check_interval_secs, 3600);
481        assert!((config.confidence_boost_per_success - 0.1).abs() < 0.001);
482    }
483
484    #[test]
485    fn test_calculate_age_hours() {
486        // 1 hour = 3600 seconds = 3600000 ms
487        let age = StandardConfidenceScheduler::calculate_age_hours(0, 3600000);
488        assert!((age - 1.0).abs() < 0.001);
489
490        // 24 hours
491        let age = StandardConfidenceScheduler::calculate_age_hours(0, 86400000);
492        assert!((age - 24.0).abs() < 0.001);
493
494        // Less than an hour
495        let age = StandardConfidenceScheduler::calculate_age_hours(0, 1800000);
496        assert!((age - 0.5).abs() < 0.001);
497    }
498
499    // -----------------------------------------------------------------------
500    // ConfidenceController tests
501    // -----------------------------------------------------------------------
502
503    const NOW: i64 = 1_000_000_000_000; // arbitrary fixed "now" in ms
504    const WINDOW: i64 = 3_600_000; // 1 hour window
505
506    fn controller_with_3_samples() -> ConfidenceController {
507        ConfidenceController::new(ControllerConfig {
508            window_ms: WINDOW,
509            failure_rate_threshold: 0.5,
510            min_samples: 3,
511            downgrade_penalty: 0.15,
512            min_selectable_confidence: MIN_REPLAY_CONFIDENCE,
513            initial_confidence: 1.0,
514        })
515    }
516
517    #[test]
518    fn test_controller_initial_confidence_is_one() {
519        let ctrl = controller_with_3_samples();
520        // Unknown asset → initial confidence
521        assert!((ctrl.confidence("gene-1") - 1.0).abs() < 0.001);
522        assert!(ctrl.is_selectable("gene-1"));
523    }
524
525    #[test]
526    fn test_controller_successive_failures_downgrade() {
527        let mut ctrl = controller_with_3_samples();
528        // 3 failures in the window → failure rate 1.0 ≥ 0.5 → first downgrade
529        ctrl.record_failure("gene-x", NOW);
530        ctrl.record_failure("gene-x", NOW + 1);
531        ctrl.record_failure("gene-x", NOW + 2);
532        let c = ctrl.confidence("gene-x");
533        // Expected: 1.0 - 0.15 = 0.85
534        assert!((c - 0.85).abs() < 0.01, "expected ~0.85, got {c}");
535        assert_eq!(ctrl.downgrade_log().len(), 1);
536    }
537
538    #[test]
539    fn test_controller_below_min_not_selectable() {
540        let mut ctrl = ConfidenceController::new(ControllerConfig {
541            window_ms: WINDOW,
542            failure_rate_threshold: 0.5,
543            min_samples: 2,
544            downgrade_penalty: 0.35,
545            min_selectable_confidence: MIN_REPLAY_CONFIDENCE,
546            initial_confidence: 0.5, // start near the edge
547        });
548        // Two failures → rate 1.0 ≥ 0.5, 2 ≥ min_samples=2 → downgrade
549        ctrl.record_failure("gene-low", NOW);
550        ctrl.record_failure("gene-low", NOW + 1);
551        // 0.5 - 0.35 = 0.15 < MIN_REPLAY_CONFIDENCE (0.35)
552        assert!(!ctrl.is_selectable("gene-low"));
553        assert_eq!(ctrl.downgrade_log()[0].revalidation_required, true);
554        let rv = ctrl.assets_requiring_revalidation();
555        assert!(rv.contains(&"gene-low".to_string()));
556    }
557
558    #[test]
559    fn test_controller_recovery_via_successes() {
560        let mut ctrl = controller_with_3_samples();
561        // Drive confidence down first
562        ctrl.record_failure("gene-r", NOW);
563        ctrl.record_failure("gene-r", NOW + 1);
564        ctrl.record_failure("gene-r", NOW + 2);
565        let after_failures = ctrl.confidence("gene-r");
566        // Now record two successes to partially recover
567        ctrl.record_success("gene-r", NOW + 3);
568        ctrl.record_success("gene-r", NOW + 4);
569        let after_recovery = ctrl.confidence("gene-r");
570        assert!(
571            after_recovery > after_failures,
572            "recovery expected: {after_recovery} > {after_failures}"
573        );
574    }
575
576    #[test]
577    fn test_controller_no_downgrade_below_min_samples() {
578        let mut ctrl = controller_with_3_samples();
579        // Only 2 failures — below min_samples=3 → no downgrade
580        ctrl.record_failure("gene-few", NOW);
581        ctrl.record_failure("gene-few", NOW + 1);
582        assert!((ctrl.confidence("gene-few") - 1.0).abs() < 0.001);
583        assert!(ctrl.downgrade_log().is_empty());
584    }
585
586    #[test]
587    fn test_controller_failures_outside_window_ignored() {
588        let mut ctrl = controller_with_3_samples();
589        // 2 failures far outside the 1-hour window (below min_samples=3 so no
590        // downgrade fires when they are recorded).
591        let old = NOW - WINDOW - 1;
592        ctrl.record_failure("gene-old", old);
593        ctrl.record_failure("gene-old", old + 1);
594        // run_downgrade_check at NOW: these 2 records are outside the window,
595        // count = 0 → below min_samples → no new downgrade event.
596        let events = ctrl.run_downgrade_check(NOW);
597        assert!(events.is_empty(), "expected no downgrade, got {events:?}");
598        assert!((ctrl.confidence("gene-old") - 1.0).abs() < 0.001);
599        assert!(ctrl.downgrade_log().is_empty());
600    }
601
602    #[test]
603    fn test_controller_run_downgrade_check_batch() {
604        let mut ctrl = controller_with_3_samples();
605        // Seed failures for two assets
606        for i in 0..3 {
607            ctrl.history
608                .entry("asset-a".to_string())
609                .or_default()
610                .push(OutcomeRecord {
611                    asset_id: "asset-a".to_string(),
612                    success: false,
613                    recorded_at_ms: NOW + i,
614                });
615            ctrl.history
616                .entry("asset-b".to_string())
617                .or_default()
618                .push(OutcomeRecord {
619                    asset_id: "asset-b".to_string(),
620                    success: false,
621                    recorded_at_ms: NOW + i,
622                });
623        }
624        let events = ctrl.run_downgrade_check(NOW + 10);
625        assert_eq!(events.len(), 2);
626        assert_eq!(ctrl.downgrade_log().len(), 2);
627    }
628
629    #[test]
630    fn test_controller_downgrade_event_fields() {
631        let mut ctrl = controller_with_3_samples();
632        ctrl.record_failure("gene-fields", NOW);
633        ctrl.record_failure("gene-fields", NOW + 1);
634        ctrl.record_failure("gene-fields", NOW + 2);
635        let log = ctrl.downgrade_log();
636        assert_eq!(log.len(), 1);
637        let evt = &log[0];
638        assert_eq!(evt.asset_id, "gene-fields");
639        assert!((evt.old_confidence - 1.0).abs() < 0.001);
640        assert!((evt.new_confidence - 0.85).abs() < 0.01);
641        assert!((evt.failure_rate - 1.0).abs() < 0.001);
642        assert_eq!(evt.window_samples, 3);
643        assert_eq!(evt.event_at_ms, NOW + 2);
644    }
645}