Skip to main content

asupersync/runtime/
kernel.rs

1//! Proof-carrying decision-plane kernel for runtime controllers.
2//!
3//! This module defines the canonical [`RuntimeKernelSnapshot`] that controllers
4//! observe, the [`ControllerRegistration`] contract they must satisfy, and the
5//! [`ControllerRegistry`] that validates and manages controller participation.
6//!
7//! # Design Principles
8//!
9//! - **Narrow surface**: Snapshot fields are the minimum needed for decision-making.
10//!   Adding a field requires explicit justification and version bump.
11//! - **Deterministic**: Snapshot creation and serialization are deterministic given
12//!   the same runtime state, enabling replay and comparison.
13//! - **Auditable**: Every controller action is traced with snapshot ID, version,
14//!   and decision metadata for post-hoc analysis.
15//! - **No ambient authority**: Controllers receive snapshots; they cannot reach
16//!   into runtime internals directly.
17//!
18//! # Versioning
19//!
20//! Snapshots carry a [`SnapshotVersion`] that controllers declare support for.
21//! The registry rejects controllers whose expected version range does not overlap
22//! with the current snapshot version. Controllers consuming a reduced snapshot
23//! (fewer fields than the full version) remain in shadow mode until they upgrade.
24
25use crate::types::Time;
26use serde::{Deserialize, Serialize};
27use std::collections::BTreeMap;
28use std::sync::Arc;
29
30/// Current snapshot schema version.
31pub const SNAPSHOT_VERSION: SnapshotVersion = SnapshotVersion { major: 1, minor: 0 };
32
33/// Schema version for runtime kernel snapshots.
34#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
35pub struct SnapshotVersion {
36    /// Snapshot schema major version.
37    pub major: u32,
38    /// Snapshot schema minor version.
39    pub minor: u32,
40}
41
42impl std::fmt::Display for SnapshotVersion {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        write!(f, "{}.{}", self.major, self.minor)
45    }
46}
47
48impl SnapshotVersion {
49    /// Check if `other` is compatible (same major, <= minor).
50    #[must_use]
51    pub fn is_compatible_with(&self, other: &Self) -> bool {
52        self.major == other.major && self.minor >= other.minor
53    }
54}
55
56/// Monotonic snapshot identifier. Each snapshot gets a unique, increasing ID.
57#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
58pub struct SnapshotId(pub u64);
59
60/// A point-in-time snapshot of observable runtime state for controllers.
61///
62/// Controllers receive this snapshot via their `observe` callback. They must
63/// not cache snapshots across decision boundaries — each decision must use
64/// the snapshot provided for that epoch.
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct RuntimeKernelSnapshot {
67    /// Unique identifier for this snapshot.
68    pub id: SnapshotId,
69    /// Schema version of this snapshot.
70    pub version: SnapshotVersion,
71    /// Logical time at which this snapshot was taken.
72    pub timestamp: Time,
73
74    // ── Scheduler state ───────────────────────────────────────────────
75    /// Number of tasks currently in the ready queue.
76    pub ready_queue_len: usize,
77    /// Number of tasks in the cancel lane.
78    pub cancel_lane_len: usize,
79    /// Number of tasks in the finalize lane.
80    pub finalize_lane_len: usize,
81    /// Total tasks currently tracked by the runtime.
82    pub total_tasks: usize,
83    /// Number of active (non-closed) regions.
84    pub active_regions: usize,
85    /// Current cancel-lane streak count within the active epoch.
86    pub cancel_streak_current: usize,
87    /// Configured cancel-lane max streak.
88    pub cancel_streak_limit: usize,
89
90    // ── Obligation state ──────────────────────────────────────────────
91    /// Number of outstanding (uncommitted) obligations.
92    pub outstanding_obligations: usize,
93    /// Cumulative obligation leak count since runtime start.
94    pub obligation_leak_count: u64,
95
96    // ── I/O and timer state ───────────────────────────────────────────
97    /// Number of pending I/O registrations in the reactor.
98    pub pending_io_registrations: usize,
99    /// Number of active timers in the timer wheel.
100    pub active_timers: usize,
101
102    // ── Worker state ──────────────────────────────────────────────────
103    /// Number of worker threads configured.
104    pub worker_count: usize,
105    /// Number of workers currently parked (idle).
106    pub workers_parked: usize,
107    /// Number of active blocking pool threads.
108    pub blocking_threads_active: usize,
109
110    // ── Governor and adaptive state ───────────────────────────────────
111    /// Whether the Lyapunov governor is enabled.
112    pub governor_enabled: bool,
113    /// Whether adaptive cancel-streak is enabled.
114    pub adaptive_cancel_enabled: bool,
115    /// Current adaptive cancel-streak epoch number (if adaptive enabled).
116    pub adaptive_epoch: u64,
117
118    // ── Controller metadata ───────────────────────────────────────────
119    /// Number of registered controllers.
120    pub registered_controllers: usize,
121    /// Number of controllers in shadow mode.
122    pub shadow_controllers: usize,
123}
124
125impl RuntimeKernelSnapshot {
126    /// Create a minimal snapshot for testing.
127    #[cfg(any(test, feature = "test-internals"))]
128    #[must_use]
129    pub fn test_default(id: u64, now: Time) -> Self {
130        Self {
131            id: SnapshotId(id),
132            version: SNAPSHOT_VERSION,
133            timestamp: now,
134            ready_queue_len: 0,
135            cancel_lane_len: 0,
136            finalize_lane_len: 0,
137            total_tasks: 0,
138            active_regions: 0,
139            cancel_streak_current: 0,
140            cancel_streak_limit: 16,
141            outstanding_obligations: 0,
142            obligation_leak_count: 0,
143            pending_io_registrations: 0,
144            active_timers: 0,
145            worker_count: 1,
146            workers_parked: 0,
147            blocking_threads_active: 0,
148            governor_enabled: false,
149            adaptive_cancel_enabled: false,
150            adaptive_epoch: 0,
151            registered_controllers: 0,
152            shadow_controllers: 0,
153        }
154    }
155}
156
157/// Operating mode for a controller.
158#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
159pub enum ControllerMode {
160    /// Controller observes snapshots but does not influence decisions.
161    Shadow,
162    /// Controller decisions are compared against baseline but not applied.
163    Canary,
164    /// Controller decisions are applied to the runtime.
165    Active,
166    /// Controller is paused pending investigation or manual intervention.
167    Hold,
168}
169
170/// A decision emitted by a controller.
171#[derive(Debug, Clone, Serialize, Deserialize)]
172pub struct ControllerDecision {
173    /// ID of the controller that made this decision.
174    pub controller_id: ControllerId,
175    /// Snapshot ID this decision was based on.
176    pub snapshot_id: SnapshotId,
177    /// Human-readable decision label.
178    pub label: String,
179    /// Structured decision payload (controller-specific).
180    pub payload: serde_json::Value,
181    /// Confidence score in [0.0, 1.0] for the decision.
182    pub confidence: f64,
183    /// Fallback: if this decision is rejected, what should happen.
184    pub fallback_label: String,
185}
186
187/// Unique identifier for a registered controller.
188#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
189pub struct ControllerId(pub u64);
190
191/// Metadata a controller must provide at registration time.
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct ControllerRegistration {
194    /// Human-readable name for this controller.
195    pub name: String,
196    /// Minimum snapshot version this controller can consume.
197    pub min_version: SnapshotVersion,
198    /// Maximum snapshot version this controller can consume.
199    pub max_version: SnapshotVersion,
200    /// Snapshot fields this controller requires (for forward-compat checks).
201    pub required_fields: Vec<String>,
202    /// Which seam IDs this controller targets (from the control-seam inventory).
203    pub target_seams: Vec<String>,
204    /// Initial operating mode.
205    pub initial_mode: ControllerMode,
206    /// Artifact ID for the controller's proof bundle (if any).
207    pub proof_artifact_id: Option<String>,
208    /// Budget counters: max decisions per epoch, max latency per decision.
209    pub budget: ControllerBudget,
210}
211
212/// Resource budget constraints for a controller.
213#[derive(Debug, Clone, Serialize, Deserialize)]
214pub struct ControllerBudget {
215    /// Maximum number of decisions per snapshot epoch.
216    pub max_decisions_per_epoch: u32,
217    /// Maximum wall-clock microseconds per decision.
218    pub max_decision_latency_us: u64,
219}
220
221impl Default for ControllerBudget {
222    fn default() -> Self {
223        Self {
224            max_decisions_per_epoch: 1,
225            max_decision_latency_us: 100,
226        }
227    }
228}
229
230/// Reason a controller registration was rejected.
231#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
232pub enum RegistrationError {
233    /// Controller name is empty.
234    EmptyName,
235    /// Version range is inverted (min > max).
236    InvertedVersionRange,
237    /// Current snapshot version is outside controller's supported range.
238    IncompatibleVersion {
239        /// Snapshot version found in the runtime state being validated.
240        current: SnapshotVersion,
241        /// Minimum snapshot version accepted by the controller.
242        min: SnapshotVersion,
243        /// Maximum snapshot version accepted by the controller.
244        max: SnapshotVersion,
245    },
246    /// Required fields are not present in the current snapshot schema.
247    UnsupportedFields(Vec<String>),
248    /// No target seams specified.
249    NoTargetSeams,
250    /// Budget has zero decisions allowed.
251    ZeroBudget,
252    /// A controller with this name is already registered.
253    DuplicateName(String),
254}
255
256impl std::fmt::Display for RegistrationError {
257    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258        match self {
259            Self::EmptyName => write!(f, "controller name must not be empty"),
260            Self::InvertedVersionRange => write!(f, "min_version must be <= max_version"),
261            Self::IncompatibleVersion { current, min, max } => {
262                write!(
263                    f,
264                    "snapshot version {current} outside controller range [{min}, {max}]"
265                )
266            }
267            Self::UnsupportedFields(fields) => {
268                write!(f, "unsupported snapshot fields: {}", fields.join(", "))
269            }
270            Self::NoTargetSeams => write!(f, "controller must target at least one seam"),
271            Self::ZeroBudget => write!(f, "budget must allow at least one decision per epoch"),
272            Self::DuplicateName(name) => {
273                write!(f, "controller with name '{name}' already registered")
274            }
275        }
276    }
277}
278
279impl std::error::Error for RegistrationError {}
280
281/// Known snapshot field names for validation.
282const KNOWN_FIELDS: &[&str] = &[
283    "ready_queue_len",
284    "cancel_lane_len",
285    "finalize_lane_len",
286    "total_tasks",
287    "active_regions",
288    "cancel_streak_current",
289    "cancel_streak_limit",
290    "outstanding_obligations",
291    "obligation_leak_count",
292    "pending_io_registrations",
293    "active_timers",
294    "worker_count",
295    "workers_parked",
296    "blocking_threads_active",
297    "governor_enabled",
298    "adaptive_cancel_enabled",
299    "adaptive_epoch",
300    "registered_controllers",
301    "shadow_controllers",
302];
303
304/// Policy governing controller promotion through the lifecycle.
305#[derive(Debug, Clone, Serialize, Deserialize)]
306pub struct PromotionPolicy {
307    /// Minimum calibration score in [0.0, 1.0] required for promotion.
308    pub min_calibration_score: f64,
309    /// Minimum epochs a controller must spend in Shadow before promoting to Canary.
310    pub min_shadow_epochs: u64,
311    /// Minimum epochs a controller must spend in Canary before promoting to Active.
312    pub min_canary_epochs: u64,
313    /// Maximum allowed budget overruns before automatic rollback.
314    pub max_budget_overruns: u32,
315    /// Policy identifier for audit trail.
316    pub policy_id: String,
317}
318
319impl Default for PromotionPolicy {
320    fn default() -> Self {
321        Self {
322            min_calibration_score: 0.8,
323            min_shadow_epochs: 3,
324            min_canary_epochs: 2,
325            max_budget_overruns: 3,
326            policy_id: "default-promotion-policy-v1".to_string(),
327        }
328    }
329}
330
331/// Reason a promotion was rejected.
332#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
333pub enum PromotionRejection {
334    /// Controller not found.
335    ControllerNotFound,
336    /// Calibration score below threshold.
337    CalibrationTooLow {
338        /// Current calibration score.
339        current: f64,
340        /// Required minimum calibration score.
341        required: f64,
342    },
343    /// Not enough epochs in the prerequisite mode.
344    InsufficientEpochs {
345        /// Current number of epochs in the prerequisite mode.
346        current: u64,
347        /// Required minimum number of epochs.
348        required: u64,
349        /// The mode the controller is currently in.
350        mode: ControllerMode,
351    },
352    /// Invalid transition (e.g., Shadow directly to Active).
353    InvalidTransition {
354        /// Current mode.
355        from: ControllerMode,
356        /// Requested mode.
357        to: ControllerMode,
358    },
359    /// Controller is in Hold mode and cannot be promoted without explicit release.
360    HeldForInvestigation,
361}
362
363impl std::fmt::Display for PromotionRejection {
364    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
365        match self {
366            Self::ControllerNotFound => write!(f, "controller not found"),
367            Self::CalibrationTooLow { current, required } => {
368                write!(
369                    f,
370                    "calibration score {current:.3} below threshold {required:.3}"
371                )
372            }
373            Self::InsufficientEpochs {
374                current,
375                required,
376                mode,
377            } => {
378                write!(f, "only {current} epochs in {mode:?}, need {required}")
379            }
380            Self::InvalidTransition { from, to } => {
381                write!(f, "invalid transition from {from:?} to {to:?}")
382            }
383            Self::HeldForInvestigation => {
384                write!(
385                    f,
386                    "controller held for investigation; release before promoting"
387                )
388            }
389        }
390    }
391}
392
393/// Reason a controller was rolled back.
394#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
395pub enum RollbackReason {
396    /// Calibration score dropped below threshold.
397    CalibrationRegression {
398        /// Calibration score that triggered the rollback.
399        score: f64,
400    },
401    /// Budget overruns exceeded policy limit.
402    BudgetOverruns {
403        /// Number of overruns accumulated.
404        count: u32,
405    },
406    /// Manual rollback requested by operator.
407    ManualRollback,
408    /// Fallback triggered by a decision rejection.
409    FallbackTriggered {
410        /// The decision label that caused the fallback.
411        decision_label: String,
412    },
413}
414
415impl std::fmt::Display for RollbackReason {
416    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
417        match self {
418            Self::CalibrationRegression { score } => {
419                write!(f, "calibration regressed to {score:.3}")
420            }
421            Self::BudgetOverruns { count } => {
422                write!(f, "budget overruns reached {count}")
423            }
424            Self::ManualRollback => write!(f, "manual rollback requested"),
425            Self::FallbackTriggered { decision_label } => {
426                write!(f, "fallback triggered by decision: {decision_label}")
427            }
428        }
429    }
430}
431
432/// A recovery command emitted when a rollout fails.
433#[derive(Debug, Clone, Serialize, Deserialize)]
434pub struct RecoveryCommand {
435    /// Controller that needs recovery.
436    pub controller_id: ControllerId,
437    /// Controller name for human identification.
438    pub controller_name: String,
439    /// Mode the controller was rolled back from.
440    pub rolled_back_from: ControllerMode,
441    /// Mode the controller was rolled back to.
442    pub rolled_back_to: ControllerMode,
443    /// Reason for the rollback.
444    pub reason: RollbackReason,
445    /// Policy ID that governed the decision.
446    pub policy_id: String,
447    /// Snapshot ID at the time of rollback.
448    pub at_snapshot_id: Option<SnapshotId>,
449    /// Suggested remediation steps.
450    pub remediation: Vec<String>,
451}
452
453/// An entry in the evidence ledger.
454#[derive(Debug, Clone, Serialize, Deserialize)]
455pub struct EvidenceLedgerEntry {
456    /// Sequential entry ID.
457    pub entry_id: u64,
458    /// Controller ID this entry pertains to.
459    pub controller_id: ControllerId,
460    /// Snapshot ID at the time of the event (if available).
461    pub snapshot_id: Option<SnapshotId>,
462    /// Type of event.
463    pub event: LedgerEvent,
464    /// Policy ID governing this event.
465    pub policy_id: String,
466    /// Timestamp (logical).
467    pub timestamp: Time,
468}
469
470/// Events recorded in the evidence ledger.
471#[derive(Debug, Clone, Serialize, Deserialize)]
472pub enum LedgerEvent {
473    /// Controller was registered.
474    Registered {
475        /// Initial mode assigned.
476        initial_mode: ControllerMode,
477    },
478    /// Controller mode was changed via promotion.
479    Promoted {
480        /// Previous mode.
481        from: ControllerMode,
482        /// New mode.
483        to: ControllerMode,
484        /// Calibration score at time of promotion.
485        calibration_score: f64,
486    },
487    /// Controller was rolled back.
488    RolledBack {
489        /// Previous mode.
490        from: ControllerMode,
491        /// New mode.
492        to: ControllerMode,
493        /// Reason for rollback.
494        reason: RollbackReason,
495    },
496    /// Controller was placed on hold.
497    Held {
498        /// Previous mode.
499        from: ControllerMode,
500    },
501    /// Controller was released from hold.
502    Released {
503        /// Mode restored to.
504        to: ControllerMode,
505    },
506    /// Controller was deregistered.
507    Deregistered,
508    /// Promotion was rejected.
509    PromotionRejected {
510        /// The target mode that was requested.
511        target: ControllerMode,
512        /// Why the promotion was rejected.
513        rejection: PromotionRejection,
514    },
515    /// Decision recorded.
516    DecisionRecorded {
517        /// Decision label.
518        label: String,
519        /// Whether the decision was within budget.
520        within_budget: bool,
521    },
522}
523
524/// Record of a registered controller within the registry.
525#[derive(Debug, Clone)]
526struct RegisteredController {
527    id: ControllerId,
528    registration: ControllerRegistration,
529    mode: ControllerMode,
530    decisions_this_epoch: u32,
531    last_snapshot_id: Option<SnapshotId>,
532    calibration_score: f64,
533    epochs_in_current_mode: u64,
534    budget_overruns: u32,
535    /// Mode before entering Hold, so we can restore on release.
536    held_from_mode: Option<ControllerMode>,
537    fallback_active: bool,
538    last_action_label: String,
539}
540
541/// Type alias for log sink callbacks.
542type LogSink = Arc<dyn Fn(&str) + Send + Sync>;
543
544/// Registry that validates and manages controller participation.
545///
546/// The registry enforces:
547/// - Version compatibility between controllers and snapshots
548/// - Required field existence in the snapshot schema
549/// - Uniqueness of controller names
550/// - Budget constraints per epoch
551/// - Promotion pipeline (Shadow → Canary → Active) with calibration gates
552/// - Evidence ledger for audit trail
553pub struct ControllerRegistry {
554    controllers: BTreeMap<ControllerId, RegisteredController>,
555    next_id: u64,
556    next_snapshot_id: u64,
557    /// Callback for structured logging of registration events.
558    log_sink: Option<LogSink>,
559    /// Promotion policy governing lifecycle transitions.
560    promotion_policy: PromotionPolicy,
561    /// Evidence ledger for audit and replay.
562    evidence_ledger: Vec<EvidenceLedgerEntry>,
563    /// Next evidence ledger entry ID.
564    next_ledger_id: u64,
565}
566
567impl ControllerRegistry {
568    /// Create a new empty registry.
569    #[must_use]
570    pub fn new() -> Self {
571        Self {
572            controllers: BTreeMap::new(),
573            next_id: 1,
574            next_snapshot_id: 1,
575            log_sink: None,
576            promotion_policy: PromotionPolicy::default(),
577            evidence_ledger: Vec::new(),
578            next_ledger_id: 1,
579        }
580    }
581
582    /// Set a structured log sink for registration and decision events.
583    #[must_use]
584    pub fn with_log_sink(mut self, sink: LogSink) -> Self {
585        self.log_sink = Some(sink);
586        self
587    }
588
589    /// Register a controller, returning its ID on success.
590    pub fn register(
591        &mut self,
592        registration: ControllerRegistration,
593    ) -> Result<ControllerId, RegistrationError> {
594        self.validate(&registration)?;
595
596        let id = ControllerId(self.next_id);
597        self.next_id += 1;
598
599        let mode = if registration.initial_mode == ControllerMode::Active
600            && !registration
601                .max_version
602                .is_compatible_with(&SNAPSHOT_VERSION)
603        {
604            // Downgrade to shadow if snapshot is newer than controller expects
605            ControllerMode::Shadow
606        } else {
607            registration.initial_mode
608        };
609
610        if let Some(ref sink) = self.log_sink {
611            sink(&format!(
612                "controller_registered id={} name={} mode={:?} seams={:?} version_range=[{}, {}]",
613                id.0,
614                registration.name,
615                mode,
616                registration.target_seams,
617                registration.min_version,
618                registration.max_version,
619            ));
620        }
621
622        self.controllers.insert(
623            id,
624            RegisteredController {
625                id,
626                registration,
627                mode,
628                decisions_this_epoch: 0,
629                last_snapshot_id: None,
630                calibration_score: 0.0,
631                epochs_in_current_mode: 0,
632                budget_overruns: 0,
633                held_from_mode: None,
634                fallback_active: false,
635                last_action_label: String::new(),
636            },
637        );
638
639        self.record_ledger_entry(id, None, LedgerEvent::Registered { initial_mode: mode });
640
641        Ok(id)
642    }
643
644    /// Validate a registration without inserting it.
645    fn validate(&self, reg: &ControllerRegistration) -> Result<(), RegistrationError> {
646        if reg.name.is_empty() {
647            return Err(RegistrationError::EmptyName);
648        }
649        if reg.min_version > reg.max_version {
650            return Err(RegistrationError::InvertedVersionRange);
651        }
652        if !SNAPSHOT_VERSION.is_compatible_with(&reg.min_version)
653            || SNAPSHOT_VERSION.major != reg.max_version.major
654        {
655            return Err(RegistrationError::IncompatibleVersion {
656                current: SNAPSHOT_VERSION,
657                min: reg.min_version,
658                max: reg.max_version,
659            });
660        }
661        let unknown: Vec<String> = reg
662            .required_fields
663            .iter()
664            .filter(|f| !KNOWN_FIELDS.contains(&f.as_str()))
665            .cloned()
666            .collect();
667        if !unknown.is_empty() {
668            return Err(RegistrationError::UnsupportedFields(unknown));
669        }
670        if reg.target_seams.is_empty() {
671            return Err(RegistrationError::NoTargetSeams);
672        }
673        if reg.budget.max_decisions_per_epoch == 0 {
674            return Err(RegistrationError::ZeroBudget);
675        }
676        if self
677            .controllers
678            .values()
679            .any(|c| c.registration.name == reg.name)
680        {
681            return Err(RegistrationError::DuplicateName(reg.name.clone()));
682        }
683        Ok(())
684    }
685
686    /// Deregister a controller.
687    pub fn deregister(&mut self, id: ControllerId) -> bool {
688        let removed = self.controllers.remove(&id).is_some();
689        if removed {
690            self.record_ledger_entry(id, None, LedgerEvent::Deregistered);
691        }
692        removed
693    }
694
695    /// Get the current mode of a controller.
696    #[must_use]
697    pub fn mode(&self, id: ControllerId) -> Option<ControllerMode> {
698        self.controllers.get(&id).map(|c| c.mode)
699    }
700
701    /// Set the mode of a controller.
702    pub fn set_mode(&mut self, id: ControllerId, mode: ControllerMode) -> bool {
703        let Some(controller) = self.controllers.get_mut(&id) else {
704            return false;
705        };
706        controller.mode = mode;
707        true
708    }
709
710    /// Get registration info for a controller.
711    #[must_use]
712    pub fn registration(&self, id: ControllerId) -> Option<&ControllerRegistration> {
713        self.controllers.get(&id).map(|c| &c.registration)
714    }
715
716    /// Number of registered controllers.
717    #[must_use]
718    pub fn len(&self) -> usize {
719        self.controllers.len()
720    }
721
722    /// Whether the registry is empty.
723    #[must_use]
724    pub fn is_empty(&self) -> bool {
725        self.controllers.is_empty()
726    }
727
728    /// Count of controllers in shadow mode.
729    #[must_use]
730    pub fn shadow_count(&self) -> usize {
731        self.controllers
732            .values()
733            .filter(|c| c.mode == ControllerMode::Shadow)
734            .count()
735    }
736
737    /// Allocate the next snapshot ID.
738    pub fn next_snapshot_id(&mut self) -> SnapshotId {
739        let id = SnapshotId(self.next_snapshot_id);
740        self.next_snapshot_id += 1;
741        id
742    }
743
744    /// Reset per-epoch decision counters for all controllers.
745    ///
746    /// Note: prefer `advance_epoch()` which also increments epoch-in-mode counters.
747    pub fn reset_epoch(&mut self) {
748        for controller in self.controllers.values_mut() {
749            controller.decisions_this_epoch = 0;
750        }
751    }
752
753    /// Record a decision and check budget.
754    /// Returns `true` if the decision is within budget, `false` if over budget.
755    pub fn record_decision(&mut self, decision: &ControllerDecision) -> bool {
756        let Some(controller) = self.controllers.get_mut(&decision.controller_id) else {
757            return false;
758        };
759        controller.last_snapshot_id = Some(decision.snapshot_id);
760        controller.last_action_label.clone_from(&decision.label);
761        controller.decisions_this_epoch += 1;
762        let within_budget = controller.decisions_this_epoch
763            <= controller.registration.budget.max_decisions_per_epoch;
764        if !within_budget {
765            controller.budget_overruns += 1;
766        }
767
768        self.record_ledger_entry(
769            decision.controller_id,
770            Some(decision.snapshot_id),
771            LedgerEvent::DecisionRecorded {
772                label: decision.label.clone(),
773                within_budget,
774            },
775        );
776
777        within_budget
778    }
779
780    /// Update calibration score for a controller (e.g., after shadow comparison).
781    pub fn update_calibration(&mut self, id: ControllerId, score: f64) {
782        if let Some(controller) = self.controllers.get_mut(&id) {
783            controller.calibration_score = score;
784        }
785    }
786
787    /// Get calibration score for a controller.
788    #[must_use]
789    pub fn calibration_score(&self, id: ControllerId) -> Option<f64> {
790        self.controllers.get(&id).map(|c| c.calibration_score)
791    }
792
793    /// List all controller IDs.
794    #[must_use]
795    pub fn controller_ids(&self) -> Vec<ControllerId> {
796        self.controllers.keys().copied().collect()
797    }
798
799    /// Set the promotion policy.
800    pub fn set_promotion_policy(&mut self, policy: PromotionPolicy) {
801        self.promotion_policy = policy;
802    }
803
804    /// Get the current promotion policy.
805    #[must_use]
806    pub fn promotion_policy(&self) -> &PromotionPolicy {
807        &self.promotion_policy
808    }
809
810    /// Advance epoch counters for all controllers.
811    pub fn advance_epoch(&mut self) {
812        for controller in self.controllers.values_mut() {
813            controller.epochs_in_current_mode += 1;
814            controller.decisions_this_epoch = 0;
815        }
816    }
817
818    /// Try to promote a controller to the next mode in the pipeline.
819    ///
820    /// Promotion follows the pipeline: Shadow → Canary → Active.
821    /// Each transition requires calibration and epoch thresholds defined by
822    /// the promotion policy. Returns a `RecoveryCommand` on rejection.
823    pub fn try_promote(
824        &mut self,
825        id: ControllerId,
826        target: ControllerMode,
827    ) -> Result<ControllerMode, PromotionRejection> {
828        let policy = self.promotion_policy.clone();
829        let controller = self
830            .controllers
831            .get(&id)
832            .ok_or(PromotionRejection::ControllerNotFound)?;
833
834        let current_mode = controller.mode;
835        let calibration = controller.calibration_score;
836        let epochs = controller.epochs_in_current_mode;
837
838        // Hold blocks all promotions
839        if current_mode == ControllerMode::Hold {
840            let rejection = PromotionRejection::HeldForInvestigation;
841            self.record_ledger_entry(
842                id,
843                None,
844                LedgerEvent::PromotionRejected {
845                    target,
846                    rejection: rejection.clone(),
847                },
848            );
849            self.log_promotion_rejection(id, &rejection, &policy);
850            return Err(rejection);
851        }
852
853        // Validate transition is valid
854        let valid = matches!(
855            (current_mode, target),
856            (ControllerMode::Shadow, ControllerMode::Canary)
857                | (ControllerMode::Canary, ControllerMode::Active)
858        );
859        if !valid {
860            let rejection = PromotionRejection::InvalidTransition {
861                from: current_mode,
862                to: target,
863            };
864            self.record_ledger_entry(
865                id,
866                None,
867                LedgerEvent::PromotionRejected {
868                    target,
869                    rejection: rejection.clone(),
870                },
871            );
872            self.log_promotion_rejection(id, &rejection, &policy);
873            return Err(rejection);
874        }
875
876        // Check calibration threshold
877        if calibration < policy.min_calibration_score {
878            let rejection = PromotionRejection::CalibrationTooLow {
879                current: calibration,
880                required: policy.min_calibration_score,
881            };
882            self.record_ledger_entry(
883                id,
884                None,
885                LedgerEvent::PromotionRejected {
886                    target,
887                    rejection: rejection.clone(),
888                },
889            );
890            self.log_promotion_rejection(id, &rejection, &policy);
891            return Err(rejection);
892        }
893
894        // Check epoch requirements
895        let required_epochs = match current_mode {
896            ControllerMode::Shadow => policy.min_shadow_epochs,
897            ControllerMode::Canary => policy.min_canary_epochs,
898            _ => 0,
899        };
900        if epochs < required_epochs {
901            let rejection = PromotionRejection::InsufficientEpochs {
902                current: epochs,
903                required: required_epochs,
904                mode: current_mode,
905            };
906            self.record_ledger_entry(
907                id,
908                None,
909                LedgerEvent::PromotionRejected {
910                    target,
911                    rejection: rejection.clone(),
912                },
913            );
914            self.log_promotion_rejection(id, &rejection, &policy);
915            return Err(rejection);
916        }
917
918        // All gates passed — promote
919        let controller = self.controllers.get_mut(&id).expect("checked above");
920        controller.mode = target;
921        controller.epochs_in_current_mode = 0;
922        controller.budget_overruns = 0;
923
924        self.record_ledger_entry(
925            id,
926            None,
927            LedgerEvent::Promoted {
928                from: current_mode,
929                to: target,
930                calibration_score: calibration,
931            },
932        );
933
934        if let Some(ref sink) = self.log_sink {
935            sink(&format!(
936                "controller_promoted id={} from={:?} to={:?} calibration={:.3} policy_id={}",
937                id.0, current_mode, target, calibration, policy.policy_id,
938            ));
939        }
940
941        Ok(target)
942    }
943
944    /// Roll back a controller to Shadow mode, producing a recovery command.
945    pub fn rollback(
946        &mut self,
947        id: ControllerId,
948        reason: RollbackReason,
949    ) -> Option<RecoveryCommand> {
950        let policy_id = self.promotion_policy.policy_id.clone();
951        let controller = self.controllers.get_mut(&id)?;
952        let from = controller.mode;
953
954        if from == ControllerMode::Shadow {
955            // Already in the most conservative mode; nothing to roll back.
956            return None;
957        }
958
959        let to = ControllerMode::Shadow;
960        controller.mode = to;
961        controller.epochs_in_current_mode = 0;
962        controller.fallback_active = true;
963        let name = controller.registration.name.clone();
964        let snapshot_id = controller.last_snapshot_id;
965
966        self.record_ledger_entry(
967            id,
968            snapshot_id,
969            LedgerEvent::RolledBack {
970                from,
971                to,
972                reason: reason.clone(),
973            },
974        );
975
976        if let Some(ref sink) = self.log_sink {
977            sink(&format!(
978                "controller_rolled_back id={} from={:?} to={:?} reason={} policy_id={} snapshot_id={:?}",
979                id.0, from, to, reason, policy_id, snapshot_id,
980            ));
981        }
982
983        let remediation = match &reason {
984            RollbackReason::CalibrationRegression { score } => vec![
985                format!("Investigate calibration drop to {score:.3}"),
986                "Review recent decision evidence in ledger".to_string(),
987                "Re-run shadow validation before re-promotion".to_string(),
988            ],
989            RollbackReason::BudgetOverruns { count } => vec![
990                format!("Controller exceeded budget {count} times"),
991                "Review decision frequency and payload complexity".to_string(),
992                "Consider increasing budget or reducing decision scope".to_string(),
993            ],
994            RollbackReason::ManualRollback => vec![
995                "Manual rollback — verify runtime stability".to_string(),
996                "Check evidence ledger for preceding anomalies".to_string(),
997            ],
998            RollbackReason::FallbackTriggered { decision_label } => vec![
999                format!("Fallback triggered by decision: {decision_label}"),
1000                "Inspect decision payload and snapshot context".to_string(),
1001                "Validate fallback path is functioning correctly".to_string(),
1002            ],
1003        };
1004
1005        Some(RecoveryCommand {
1006            controller_id: id,
1007            controller_name: name,
1008            rolled_back_from: from,
1009            rolled_back_to: to,
1010            reason,
1011            policy_id,
1012            at_snapshot_id: snapshot_id,
1013            remediation,
1014        })
1015    }
1016
1017    /// Place a controller on hold, pausing its participation.
1018    pub fn hold(&mut self, id: ControllerId) -> bool {
1019        let Some(controller) = self.controllers.get_mut(&id) else {
1020            return false;
1021        };
1022        if controller.mode == ControllerMode::Hold {
1023            return false; // already held
1024        }
1025        let from = controller.mode;
1026        controller.held_from_mode = Some(from);
1027        controller.mode = ControllerMode::Hold;
1028
1029        self.record_ledger_entry(id, None, LedgerEvent::Held { from });
1030
1031        if let Some(ref sink) = self.log_sink {
1032            sink(&format!(
1033                "controller_held id={} from={:?} policy_id={}",
1034                id.0, from, self.promotion_policy.policy_id,
1035            ));
1036        }
1037        true
1038    }
1039
1040    /// Release a controller from hold, restoring its previous mode.
1041    pub fn release_hold(&mut self, id: ControllerId) -> Option<ControllerMode> {
1042        let controller = self.controllers.get_mut(&id)?;
1043        if controller.mode != ControllerMode::Hold {
1044            return None;
1045        }
1046        let restored = controller
1047            .held_from_mode
1048            .take()
1049            .unwrap_or(ControllerMode::Shadow);
1050        controller.mode = restored;
1051        controller.epochs_in_current_mode = 0;
1052
1053        self.record_ledger_entry(id, None, LedgerEvent::Released { to: restored });
1054
1055        if let Some(ref sink) = self.log_sink {
1056            sink(&format!(
1057                "controller_released id={} to={:?} policy_id={}",
1058                id.0, restored, self.promotion_policy.policy_id,
1059            ));
1060        }
1061        Some(restored)
1062    }
1063
1064    /// Whether a controller's fallback is currently active.
1065    #[must_use]
1066    pub fn is_fallback_active(&self, id: ControllerId) -> bool {
1067        self.controllers.get(&id).is_some_and(|c| c.fallback_active)
1068    }
1069
1070    /// Clear fallback flag (e.g., after recovery is confirmed).
1071    pub fn clear_fallback(&mut self, id: ControllerId) {
1072        if let Some(controller) = self.controllers.get_mut(&id) {
1073            controller.fallback_active = false;
1074        }
1075    }
1076
1077    /// Get the evidence ledger.
1078    #[must_use]
1079    pub fn evidence_ledger(&self) -> &[EvidenceLedgerEntry] {
1080        &self.evidence_ledger
1081    }
1082
1083    /// Get ledger entries for a specific controller.
1084    #[must_use]
1085    pub fn controller_ledger(&self, id: ControllerId) -> Vec<&EvidenceLedgerEntry> {
1086        self.evidence_ledger
1087            .iter()
1088            .filter(|entry| entry.controller_id == id)
1089            .collect()
1090    }
1091
1092    /// Get the number of epochs a controller has spent in its current mode.
1093    #[must_use]
1094    pub fn epochs_in_current_mode(&self, id: ControllerId) -> Option<u64> {
1095        self.controllers.get(&id).map(|c| c.epochs_in_current_mode)
1096    }
1097
1098    /// Get the number of budget overruns for a controller.
1099    #[must_use]
1100    pub fn budget_overruns(&self, id: ControllerId) -> Option<u32> {
1101        self.controllers.get(&id).map(|c| c.budget_overruns)
1102    }
1103
1104    fn record_ledger_entry(
1105        &mut self,
1106        controller_id: ControllerId,
1107        snapshot_id: Option<SnapshotId>,
1108        event: LedgerEvent,
1109    ) {
1110        let entry = EvidenceLedgerEntry {
1111            entry_id: self.next_ledger_id,
1112            controller_id,
1113            snapshot_id,
1114            event,
1115            policy_id: self.promotion_policy.policy_id.clone(),
1116            timestamp: Time::ZERO, // Logical time injected by caller in production
1117        };
1118        self.next_ledger_id += 1;
1119        self.evidence_ledger.push(entry);
1120    }
1121
1122    fn log_promotion_rejection(
1123        &self,
1124        id: ControllerId,
1125        rejection: &PromotionRejection,
1126        policy: &PromotionPolicy,
1127    ) {
1128        if let Some(ref sink) = self.log_sink {
1129            sink(&format!(
1130                "controller_promotion_rejected id={} reason={} policy_id={}",
1131                id.0, rejection, policy.policy_id,
1132            ));
1133        }
1134    }
1135}
1136
1137impl Default for ControllerRegistry {
1138    fn default() -> Self {
1139        Self::new()
1140    }
1141}
1142
1143#[cfg(test)]
1144mod tests {
1145    use super::*;
1146
1147    fn test_registration(name: &str) -> ControllerRegistration {
1148        ControllerRegistration {
1149            name: name.to_string(),
1150            min_version: SnapshotVersion { major: 1, minor: 0 },
1151            max_version: SnapshotVersion { major: 1, minor: 0 },
1152            required_fields: vec!["ready_queue_len".to_string(), "cancel_lane_len".to_string()],
1153            target_seams: vec!["AA01-SEAM-SCHED-CANCEL-STREAK".to_string()],
1154            initial_mode: ControllerMode::Shadow,
1155            proof_artifact_id: None,
1156            budget: ControllerBudget::default(),
1157        }
1158    }
1159
1160    #[test]
1161    fn snapshot_version_compatibility() {
1162        let v1_0 = SnapshotVersion { major: 1, minor: 0 };
1163        let v1_1 = SnapshotVersion { major: 1, minor: 1 };
1164        let v2_0 = SnapshotVersion { major: 2, minor: 0 };
1165
1166        assert!(v1_0.is_compatible_with(&v1_0));
1167        assert!(v1_1.is_compatible_with(&v1_0));
1168        assert!(!v1_0.is_compatible_with(&v1_1));
1169        assert!(!v2_0.is_compatible_with(&v1_0));
1170    }
1171
1172    #[test]
1173    fn snapshot_serialization_roundtrip() {
1174        let snap = RuntimeKernelSnapshot::test_default(1, Time::ZERO);
1175        let json = serde_json::to_string(&snap).unwrap();
1176        let deser: RuntimeKernelSnapshot = serde_json::from_str(&json).unwrap();
1177        assert_eq!(deser.id, snap.id);
1178        assert_eq!(deser.version, snap.version);
1179        assert_eq!(deser.ready_queue_len, 0);
1180        assert_eq!(deser.worker_count, 1);
1181    }
1182
1183    #[test]
1184    fn snapshot_deterministic_serialization() {
1185        let snap1 = RuntimeKernelSnapshot::test_default(42, Time::ZERO);
1186        let snap2 = RuntimeKernelSnapshot::test_default(42, Time::ZERO);
1187        assert_eq!(
1188            serde_json::to_string(&snap1).unwrap(),
1189            serde_json::to_string(&snap2).unwrap(),
1190        );
1191    }
1192
1193    #[test]
1194    fn register_valid_controller() {
1195        let mut registry = ControllerRegistry::new();
1196        let id = registry.register(test_registration("test-ctrl")).unwrap();
1197        assert_eq!(id.0, 1);
1198        assert_eq!(registry.len(), 1);
1199        assert_eq!(registry.mode(id), Some(ControllerMode::Shadow));
1200    }
1201
1202    #[test]
1203    fn reject_empty_name() {
1204        let mut registry = ControllerRegistry::new();
1205        let mut reg = test_registration("");
1206        reg.name = String::new();
1207        assert_eq!(
1208            registry.register(reg).unwrap_err(),
1209            RegistrationError::EmptyName,
1210        );
1211    }
1212
1213    #[test]
1214    fn reject_inverted_version_range() {
1215        let mut registry = ControllerRegistry::new();
1216        let mut reg = test_registration("bad-range");
1217        reg.min_version = SnapshotVersion { major: 2, minor: 0 };
1218        reg.max_version = SnapshotVersion { major: 1, minor: 0 };
1219        assert_eq!(
1220            registry.register(reg).unwrap_err(),
1221            RegistrationError::InvertedVersionRange,
1222        );
1223    }
1224
1225    #[test]
1226    fn reject_incompatible_version() {
1227        let mut registry = ControllerRegistry::new();
1228        let mut reg = test_registration("future-ctrl");
1229        reg.min_version = SnapshotVersion { major: 5, minor: 0 };
1230        reg.max_version = SnapshotVersion { major: 5, minor: 0 };
1231        assert!(matches!(
1232            registry.register(reg).unwrap_err(),
1233            RegistrationError::IncompatibleVersion { .. }
1234        ));
1235
1236        // Test minor version incompatibility
1237        let mut reg2 = test_registration("future-minor-ctrl");
1238        reg2.min_version = SnapshotVersion {
1239            major: SNAPSHOT_VERSION.major,
1240            minor: SNAPSHOT_VERSION.minor + 1,
1241        };
1242        reg2.max_version = SnapshotVersion {
1243            major: SNAPSHOT_VERSION.major,
1244            minor: SNAPSHOT_VERSION.minor + 1,
1245        };
1246        assert!(matches!(
1247            registry.register(reg2).unwrap_err(),
1248            RegistrationError::IncompatibleVersion { .. }
1249        ));
1250    }
1251
1252    #[test]
1253    fn reject_unsupported_fields() {
1254        let mut registry = ControllerRegistry::new();
1255        let mut reg = test_registration("bad-fields");
1256        reg.required_fields = vec!["nonexistent_field".to_string()];
1257        assert!(matches!(
1258            registry.register(reg).unwrap_err(),
1259            RegistrationError::UnsupportedFields(_)
1260        ));
1261    }
1262
1263    #[test]
1264    fn reject_no_target_seams() {
1265        let mut registry = ControllerRegistry::new();
1266        let mut reg = test_registration("no-seams");
1267        reg.target_seams = vec![];
1268        assert_eq!(
1269            registry.register(reg).unwrap_err(),
1270            RegistrationError::NoTargetSeams,
1271        );
1272    }
1273
1274    #[test]
1275    fn reject_zero_budget() {
1276        let mut registry = ControllerRegistry::new();
1277        let mut reg = test_registration("zero-budget");
1278        reg.budget.max_decisions_per_epoch = 0;
1279        assert_eq!(
1280            registry.register(reg).unwrap_err(),
1281            RegistrationError::ZeroBudget,
1282        );
1283    }
1284
1285    #[test]
1286    fn reject_duplicate_name() {
1287        let mut registry = ControllerRegistry::new();
1288        registry.register(test_registration("dup")).unwrap();
1289        assert_eq!(
1290            registry.register(test_registration("dup")).unwrap_err(),
1291            RegistrationError::DuplicateName("dup".to_string()),
1292        );
1293    }
1294
1295    #[test]
1296    fn deregister_controller() {
1297        let mut registry = ControllerRegistry::new();
1298        let id = registry.register(test_registration("removable")).unwrap();
1299        assert!(registry.deregister(id));
1300        assert_eq!(registry.len(), 0);
1301        assert!(!registry.deregister(id));
1302    }
1303
1304    #[test]
1305    fn set_mode() {
1306        let mut registry = ControllerRegistry::new();
1307        let id = registry.register(test_registration("mode-test")).unwrap();
1308        assert_eq!(registry.mode(id), Some(ControllerMode::Shadow));
1309        assert!(registry.set_mode(id, ControllerMode::Active));
1310        assert_eq!(registry.mode(id), Some(ControllerMode::Active));
1311    }
1312
1313    #[test]
1314    fn shadow_count() {
1315        let mut registry = ControllerRegistry::new();
1316        let id1 = registry.register(test_registration("s1")).unwrap();
1317        let _id2 = registry.register(test_registration("s2")).unwrap();
1318        assert_eq!(registry.shadow_count(), 2);
1319        registry.set_mode(id1, ControllerMode::Active);
1320        assert_eq!(registry.shadow_count(), 1);
1321    }
1322
1323    #[test]
1324    fn decision_budget_enforcement() {
1325        let mut registry = ControllerRegistry::new();
1326        let id = registry.register(test_registration("budget-ctrl")).unwrap();
1327        let snap_id = registry.next_snapshot_id();
1328
1329        let decision = ControllerDecision {
1330            controller_id: id,
1331            snapshot_id: snap_id,
1332            label: "test".to_string(),
1333            payload: serde_json::Value::Null,
1334            confidence: 0.9,
1335            fallback_label: "noop".to_string(),
1336        };
1337
1338        // First decision within budget (max_decisions_per_epoch = 1)
1339        assert!(registry.record_decision(&decision));
1340        // Second decision exceeds budget
1341        assert!(!registry.record_decision(&decision));
1342        // Reset epoch
1343        registry.reset_epoch();
1344        // First decision after reset is within budget again
1345        assert!(registry.record_decision(&decision));
1346    }
1347
1348    #[test]
1349    fn calibration_tracking() {
1350        let mut registry = ControllerRegistry::new();
1351        let id = registry.register(test_registration("calib")).unwrap();
1352        assert_eq!(registry.calibration_score(id), Some(0.0));
1353        registry.update_calibration(id, 0.85);
1354        assert_eq!(registry.calibration_score(id), Some(0.85));
1355    }
1356
1357    #[test]
1358    fn snapshot_id_monotonic() {
1359        let mut registry = ControllerRegistry::new();
1360        let id1 = registry.next_snapshot_id();
1361        let id2 = registry.next_snapshot_id();
1362        let id3 = registry.next_snapshot_id();
1363        assert!(id1 < id2);
1364        assert!(id2 < id3);
1365    }
1366
1367    #[test]
1368    fn active_mode_not_downgraded_when_snapshot_matches() {
1369        let mut registry = ControllerRegistry::new();
1370        let mut reg = test_registration("downgrade-test");
1371        reg.initial_mode = ControllerMode::Active;
1372        // Controller supports up to SNAPSHOT_VERSION
1373        reg.min_version = SNAPSHOT_VERSION;
1374        reg.max_version = SNAPSHOT_VERSION;
1375        // This should work since versions match
1376        let id = registry.register(reg).unwrap();
1377        assert_eq!(registry.mode(id), Some(ControllerMode::Active));
1378    }
1379
1380    #[test]
1381    fn known_fields_completeness() {
1382        // Verify KNOWN_FIELDS matches snapshot struct fields
1383        let snap = RuntimeKernelSnapshot::test_default(1, Time::ZERO);
1384        let json = serde_json::to_value(&snap).unwrap();
1385        let obj = json.as_object().unwrap();
1386        // Non-data fields that aren't in KNOWN_FIELDS
1387        let meta_fields = [
1388            "id",
1389            "version",
1390            "timestamp",
1391            "registered_controllers",
1392            "shadow_controllers",
1393        ];
1394        for field in KNOWN_FIELDS {
1395            assert!(
1396                obj.contains_key(*field),
1397                "KNOWN_FIELDS contains '{field}' but snapshot JSON does not"
1398            );
1399        }
1400        for key in obj.keys() {
1401            if meta_fields.contains(&key.as_str()) {
1402                continue;
1403            }
1404            assert!(
1405                KNOWN_FIELDS.contains(&key.as_str()),
1406                "snapshot JSON has field '{key}' not in KNOWN_FIELDS"
1407            );
1408        }
1409    }
1410
1411    #[test]
1412    fn registration_info_accessible() {
1413        let mut registry = ControllerRegistry::new();
1414        let id = registry.register(test_registration("info-test")).unwrap();
1415        let reg = registry.registration(id).unwrap();
1416        assert_eq!(reg.name, "info-test");
1417        assert_eq!(reg.target_seams, vec!["AA01-SEAM-SCHED-CANCEL-STREAK"]);
1418    }
1419
1420    #[test]
1421    fn controller_ids_listed() {
1422        let mut registry = ControllerRegistry::new();
1423        let id1 = registry.register(test_registration("a")).unwrap();
1424        let id2 = registry.register(test_registration("b")).unwrap();
1425        let ids = registry.controller_ids();
1426        assert!(ids.contains(&id1));
1427        assert!(ids.contains(&id2));
1428        assert_eq!(ids.len(), 2);
1429    }
1430
1431    #[test]
1432    fn log_sink_receives_registration_event() {
1433        use parking_lot::Mutex;
1434        let logs: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
1435        let logs_clone = Arc::clone(&logs);
1436        let mut registry = ControllerRegistry::new().with_log_sink(Arc::new(move |msg: &str| {
1437            logs_clone.lock().push(msg.to_string());
1438        }));
1439        registry.register(test_registration("logged")).unwrap();
1440        {
1441            let captured = logs.lock();
1442            assert_eq!(captured.len(), 1);
1443            assert!(captured[0].contains("controller_registered"));
1444            assert!(captured[0].contains("logged"));
1445            drop(captured);
1446        }
1447    }
1448
1449    #[test]
1450    fn decision_for_unknown_controller_returns_false() {
1451        let mut registry = ControllerRegistry::new();
1452        let decision = ControllerDecision {
1453            controller_id: ControllerId(999),
1454            snapshot_id: SnapshotId(1),
1455            label: "ghost".to_string(),
1456            payload: serde_json::Value::Null,
1457            confidence: 1.0,
1458            fallback_label: "noop".to_string(),
1459        };
1460        assert!(!registry.record_decision(&decision));
1461    }
1462
1463    #[test]
1464    fn version_display() {
1465        let v = SnapshotVersion { major: 1, minor: 2 };
1466        assert_eq!(format!("{v}"), "1.2");
1467    }
1468
1469    #[test]
1470    fn error_display_coverage() {
1471        let errors = [
1472            RegistrationError::EmptyName,
1473            RegistrationError::InvertedVersionRange,
1474            RegistrationError::IncompatibleVersion {
1475                current: SnapshotVersion { major: 1, minor: 0 },
1476                min: SnapshotVersion { major: 2, minor: 0 },
1477                max: SnapshotVersion { major: 2, minor: 0 },
1478            },
1479            RegistrationError::UnsupportedFields(vec!["foo".to_string()]),
1480            RegistrationError::NoTargetSeams,
1481            RegistrationError::ZeroBudget,
1482            RegistrationError::DuplicateName("dup".to_string()),
1483        ];
1484        for error in &errors {
1485            let msg = format!("{error}");
1486            assert!(!msg.is_empty());
1487        }
1488    }
1489
1490    // ── AA-02.3: Shadow, canary, rollback, evidence-ledger validation ──
1491
1492    fn registry_with_policy(policy: PromotionPolicy) -> ControllerRegistry {
1493        let mut r = ControllerRegistry::new();
1494        r.set_promotion_policy(policy);
1495        r
1496    }
1497
1498    fn fast_policy() -> PromotionPolicy {
1499        PromotionPolicy {
1500            min_calibration_score: 0.8,
1501            min_shadow_epochs: 2,
1502            min_canary_epochs: 1,
1503            max_budget_overruns: 3,
1504            policy_id: "test-fast-v1".to_string(),
1505        }
1506    }
1507
1508    #[test]
1509    fn promote_shadow_to_canary() {
1510        let mut registry = registry_with_policy(fast_policy());
1511        let id = registry.register(test_registration("promo")).unwrap();
1512        registry.update_calibration(id, 0.9);
1513        // Need 2 epochs in shadow
1514        registry.advance_epoch();
1515        registry.advance_epoch();
1516        let result = registry.try_promote(id, ControllerMode::Canary);
1517        assert_eq!(result, Ok(ControllerMode::Canary));
1518        assert_eq!(registry.mode(id), Some(ControllerMode::Canary));
1519        assert_eq!(registry.epochs_in_current_mode(id), Some(0));
1520    }
1521
1522    #[test]
1523    fn promote_canary_to_active() {
1524        let mut registry = registry_with_policy(fast_policy());
1525        let id = registry.register(test_registration("canary-up")).unwrap();
1526        registry.update_calibration(id, 0.95);
1527        registry.advance_epoch();
1528        registry.advance_epoch();
1529        registry.try_promote(id, ControllerMode::Canary).unwrap();
1530        registry.advance_epoch();
1531        let result = registry.try_promote(id, ControllerMode::Active);
1532        assert_eq!(result, Ok(ControllerMode::Active));
1533    }
1534
1535    #[test]
1536    fn promote_rejects_insufficient_epochs() {
1537        let mut registry = registry_with_policy(fast_policy());
1538        let id = registry.register(test_registration("too-soon")).unwrap();
1539        registry.update_calibration(id, 0.9);
1540        // Only 1 epoch, need 2
1541        registry.advance_epoch();
1542        let result = registry.try_promote(id, ControllerMode::Canary);
1543        assert!(matches!(
1544            result,
1545            Err(PromotionRejection::InsufficientEpochs {
1546                current: 1,
1547                required: 2,
1548                ..
1549            })
1550        ));
1551    }
1552
1553    #[test]
1554    fn promote_rejects_low_calibration() {
1555        let mut registry = registry_with_policy(fast_policy());
1556        let id = registry.register(test_registration("low-cal")).unwrap();
1557        registry.update_calibration(id, 0.5);
1558        registry.advance_epoch();
1559        registry.advance_epoch();
1560        let result = registry.try_promote(id, ControllerMode::Canary);
1561        assert!(matches!(
1562            result,
1563            Err(PromotionRejection::CalibrationTooLow { .. })
1564        ));
1565    }
1566
1567    #[test]
1568    fn promote_rejects_invalid_transition_shadow_to_active() {
1569        let mut registry = registry_with_policy(fast_policy());
1570        let id = registry.register(test_registration("skip")).unwrap();
1571        registry.update_calibration(id, 0.99);
1572        registry.advance_epoch();
1573        registry.advance_epoch();
1574        registry.advance_epoch();
1575        let result = registry.try_promote(id, ControllerMode::Active);
1576        assert!(matches!(
1577            result,
1578            Err(PromotionRejection::InvalidTransition { .. })
1579        ));
1580    }
1581
1582    #[test]
1583    fn promote_rejects_active_to_canary() {
1584        let mut registry = registry_with_policy(fast_policy());
1585        let id = registry.register(test_registration("backward")).unwrap();
1586        registry.update_calibration(id, 0.95);
1587        registry.advance_epoch();
1588        registry.advance_epoch();
1589        registry.try_promote(id, ControllerMode::Canary).unwrap();
1590        registry.advance_epoch();
1591        registry.try_promote(id, ControllerMode::Active).unwrap();
1592        let result = registry.try_promote(id, ControllerMode::Canary);
1593        assert!(matches!(
1594            result,
1595            Err(PromotionRejection::InvalidTransition { .. })
1596        ));
1597    }
1598
1599    #[test]
1600    fn rollback_from_active_to_shadow() {
1601        let mut registry = registry_with_policy(fast_policy());
1602        let id = registry.register(test_registration("rollme")).unwrap();
1603        registry.update_calibration(id, 0.95);
1604        registry.advance_epoch();
1605        registry.advance_epoch();
1606        registry.try_promote(id, ControllerMode::Canary).unwrap();
1607        registry.advance_epoch();
1608        registry.try_promote(id, ControllerMode::Active).unwrap();
1609
1610        let cmd = registry
1611            .rollback(id, RollbackReason::CalibrationRegression { score: 0.3 })
1612            .unwrap();
1613        assert_eq!(registry.mode(id), Some(ControllerMode::Shadow));
1614        assert_eq!(cmd.rolled_back_from, ControllerMode::Active);
1615        assert_eq!(cmd.rolled_back_to, ControllerMode::Shadow);
1616        assert_eq!(cmd.controller_name, "rollme");
1617        assert!(!cmd.remediation.is_empty());
1618        assert!(registry.is_fallback_active(id));
1619    }
1620
1621    #[test]
1622    fn rollback_from_canary_to_shadow() {
1623        let mut registry = registry_with_policy(fast_policy());
1624        let id = registry.register(test_registration("can-roll")).unwrap();
1625        registry.update_calibration(id, 0.9);
1626        registry.advance_epoch();
1627        registry.advance_epoch();
1628        registry.try_promote(id, ControllerMode::Canary).unwrap();
1629
1630        let cmd = registry
1631            .rollback(id, RollbackReason::ManualRollback)
1632            .unwrap();
1633        assert_eq!(cmd.rolled_back_from, ControllerMode::Canary);
1634        assert_eq!(cmd.rolled_back_to, ControllerMode::Shadow);
1635    }
1636
1637    #[test]
1638    fn rollback_from_shadow_returns_none() {
1639        let mut registry = ControllerRegistry::new();
1640        let id = registry
1641            .register(test_registration("already-shadow"))
1642            .unwrap();
1643        assert!(
1644            registry
1645                .rollback(id, RollbackReason::ManualRollback)
1646                .is_none()
1647        );
1648    }
1649
1650    #[test]
1651    fn hold_and_release() {
1652        let mut registry = registry_with_policy(fast_policy());
1653        let id = registry.register(test_registration("holdme")).unwrap();
1654        registry.update_calibration(id, 0.9);
1655        registry.advance_epoch();
1656        registry.advance_epoch();
1657        registry.try_promote(id, ControllerMode::Canary).unwrap();
1658
1659        assert!(registry.hold(id));
1660        assert_eq!(registry.mode(id), Some(ControllerMode::Hold));
1661
1662        // Cannot promote while held
1663        let result = registry.try_promote(id, ControllerMode::Active);
1664        assert!(matches!(
1665            result,
1666            Err(PromotionRejection::HeldForInvestigation)
1667        ));
1668
1669        // Release restores previous mode
1670        let restored = registry.release_hold(id).unwrap();
1671        assert_eq!(restored, ControllerMode::Canary);
1672        assert_eq!(registry.mode(id), Some(ControllerMode::Canary));
1673    }
1674
1675    #[test]
1676    fn hold_already_held_returns_false() {
1677        let mut registry = ControllerRegistry::new();
1678        let id = registry.register(test_registration("double-hold")).unwrap();
1679        assert!(registry.hold(id));
1680        assert!(!registry.hold(id));
1681    }
1682
1683    #[test]
1684    fn release_non_held_returns_none() {
1685        let mut registry = ControllerRegistry::new();
1686        let id = registry.register(test_registration("not-held")).unwrap();
1687        assert!(registry.release_hold(id).is_none());
1688    }
1689
1690    #[test]
1691    fn fallback_lifecycle() {
1692        let mut registry = registry_with_policy(fast_policy());
1693        let id = registry.register(test_registration("fb")).unwrap();
1694        assert!(!registry.is_fallback_active(id));
1695        registry.update_calibration(id, 0.9);
1696        registry.advance_epoch();
1697        registry.advance_epoch();
1698        registry.try_promote(id, ControllerMode::Canary).unwrap();
1699        registry.rollback(
1700            id,
1701            RollbackReason::FallbackTriggered {
1702                decision_label: "bad-decision".to_string(),
1703            },
1704        );
1705        assert!(registry.is_fallback_active(id));
1706        registry.clear_fallback(id);
1707        assert!(!registry.is_fallback_active(id));
1708    }
1709
1710    #[test]
1711    fn evidence_ledger_records_registration() {
1712        let mut registry = ControllerRegistry::new();
1713        let id = registry.register(test_registration("ledger-reg")).unwrap();
1714        let entries = registry.controller_ledger(id);
1715        assert_eq!(entries.len(), 1);
1716        assert!(matches!(entries[0].event, LedgerEvent::Registered { .. }));
1717    }
1718
1719    #[test]
1720    fn evidence_ledger_records_full_lifecycle() {
1721        let mut registry = registry_with_policy(fast_policy());
1722        let id = registry.register(test_registration("full-life")).unwrap();
1723        registry.update_calibration(id, 0.95);
1724        registry.advance_epoch();
1725        registry.advance_epoch();
1726
1727        // Promote to canary
1728        registry.try_promote(id, ControllerMode::Canary).unwrap();
1729        registry.advance_epoch();
1730
1731        // Promote to active
1732        registry.try_promote(id, ControllerMode::Active).unwrap();
1733
1734        // Rollback
1735        registry.rollback(id, RollbackReason::ManualRollback);
1736
1737        let entries = registry.controller_ledger(id);
1738        // Registered + 2 Promoted + RolledBack = 4
1739        assert_eq!(entries.len(), 4);
1740        assert!(matches!(entries[0].event, LedgerEvent::Registered { .. }));
1741        assert!(matches!(
1742            entries[1].event,
1743            LedgerEvent::Promoted {
1744                from: ControllerMode::Shadow,
1745                to: ControllerMode::Canary,
1746                ..
1747            }
1748        ));
1749        assert!(matches!(
1750            entries[2].event,
1751            LedgerEvent::Promoted {
1752                from: ControllerMode::Canary,
1753                to: ControllerMode::Active,
1754                ..
1755            }
1756        ));
1757        assert!(matches!(
1758            entries[3].event,
1759            LedgerEvent::RolledBack {
1760                from: ControllerMode::Active,
1761                to: ControllerMode::Shadow,
1762                ..
1763            }
1764        ));
1765    }
1766
1767    #[test]
1768    fn evidence_ledger_records_decisions() {
1769        let mut registry = ControllerRegistry::new();
1770        let id = registry.register(test_registration("dec-ledger")).unwrap();
1771        let snap_id = registry.next_snapshot_id();
1772        let decision = ControllerDecision {
1773            controller_id: id,
1774            snapshot_id: snap_id,
1775            label: "adjust-streak".to_string(),
1776            payload: serde_json::Value::Null,
1777            confidence: 0.9,
1778            fallback_label: "noop".to_string(),
1779        };
1780        registry.record_decision(&decision);
1781        let entries = registry.controller_ledger(id);
1782        // Registered + DecisionRecorded
1783        assert_eq!(entries.len(), 2);
1784        assert!(matches!(
1785            &entries[1].event,
1786            LedgerEvent::DecisionRecorded { label, within_budget: true } if label == "adjust-streak"
1787        ));
1788    }
1789
1790    #[test]
1791    fn evidence_ledger_records_promotion_rejections() {
1792        let mut registry = registry_with_policy(fast_policy());
1793        let id = registry
1794            .register(test_registration("reject-ledger"))
1795            .unwrap();
1796        registry.update_calibration(id, 0.5);
1797        registry.advance_epoch();
1798        registry.advance_epoch();
1799        let _ = registry.try_promote(id, ControllerMode::Canary);
1800        let entries = registry.controller_ledger(id);
1801        // Registered + PromotionRejected
1802        assert_eq!(entries.len(), 2);
1803        assert!(matches!(
1804            entries[1].event,
1805            LedgerEvent::PromotionRejected { .. }
1806        ));
1807    }
1808
1809    #[test]
1810    fn evidence_ledger_records_hold_and_release() {
1811        let mut registry = ControllerRegistry::new();
1812        let id = registry.register(test_registration("hold-ledger")).unwrap();
1813        registry.hold(id);
1814        registry.release_hold(id);
1815        let entries = registry.controller_ledger(id);
1816        // Registered + Held + Released
1817        assert_eq!(entries.len(), 3);
1818        assert!(matches!(entries[1].event, LedgerEvent::Held { .. }));
1819        assert!(matches!(entries[2].event, LedgerEvent::Released { .. }));
1820    }
1821
1822    #[test]
1823    fn evidence_ledger_records_deregistration() {
1824        let mut registry = ControllerRegistry::new();
1825        let id = registry
1826            .register(test_registration("dereg-ledger"))
1827            .unwrap();
1828        registry.deregister(id);
1829        let entries = registry.controller_ledger(id);
1830        // Registered + Deregistered
1831        assert_eq!(entries.len(), 2);
1832        assert!(matches!(entries[1].event, LedgerEvent::Deregistered));
1833    }
1834
1835    #[test]
1836    fn ledger_entry_ids_are_monotonic() {
1837        let mut registry = ControllerRegistry::new();
1838        let id = registry.register(test_registration("mono")).unwrap();
1839        registry.hold(id);
1840        registry.release_hold(id);
1841        let ledger = registry.evidence_ledger();
1842        for pair in ledger.windows(2) {
1843            assert!(pair[0].entry_id < pair[1].entry_id);
1844        }
1845    }
1846
1847    #[test]
1848    fn ledger_entries_carry_policy_id() {
1849        let policy = fast_policy();
1850        let expected_id = policy.policy_id.clone();
1851        let mut registry = registry_with_policy(policy);
1852        let id = registry
1853            .register(test_registration("policy-trace"))
1854            .unwrap();
1855        registry.hold(id);
1856        for entry in registry.controller_ledger(id) {
1857            assert_eq!(entry.policy_id, expected_id);
1858        }
1859    }
1860
1861    #[test]
1862    fn budget_overruns_tracked() {
1863        let mut registry = ControllerRegistry::new();
1864        let id = registry.register(test_registration("overruns")).unwrap();
1865        let snap_id = registry.next_snapshot_id();
1866        let decision = ControllerDecision {
1867            controller_id: id,
1868            snapshot_id: snap_id,
1869            label: "test".to_string(),
1870            payload: serde_json::Value::Null,
1871            confidence: 0.9,
1872            fallback_label: "noop".to_string(),
1873        };
1874        // 1st within budget, 2nd exceeds (budget=1)
1875        registry.record_decision(&decision);
1876        registry.record_decision(&decision);
1877        registry.record_decision(&decision);
1878        assert_eq!(registry.budget_overruns(id), Some(2));
1879    }
1880
1881    #[test]
1882    fn advance_epoch_increments_mode_counter() {
1883        let mut registry = ControllerRegistry::new();
1884        let id = registry.register(test_registration("epoch-count")).unwrap();
1885        assert_eq!(registry.epochs_in_current_mode(id), Some(0));
1886        registry.advance_epoch();
1887        assert_eq!(registry.epochs_in_current_mode(id), Some(1));
1888        registry.advance_epoch();
1889        assert_eq!(registry.epochs_in_current_mode(id), Some(2));
1890    }
1891
1892    #[test]
1893    fn recovery_command_has_remediation() {
1894        let mut registry = registry_with_policy(fast_policy());
1895        let id = registry.register(test_registration("recovery")).unwrap();
1896        registry.update_calibration(id, 0.95);
1897        registry.advance_epoch();
1898        registry.advance_epoch();
1899        registry.try_promote(id, ControllerMode::Canary).unwrap();
1900
1901        let cmd = registry
1902            .rollback(id, RollbackReason::BudgetOverruns { count: 5 })
1903            .unwrap();
1904        assert_eq!(cmd.policy_id, "test-fast-v1");
1905        assert!(!cmd.remediation.is_empty());
1906        assert!(cmd.remediation.iter().any(|r| r.contains("budget")));
1907    }
1908
1909    #[test]
1910    fn recovery_command_for_fallback_triggered() {
1911        let mut registry = registry_with_policy(fast_policy());
1912        let id = registry
1913            .register(test_registration("fallback-cmd"))
1914            .unwrap();
1915        registry.update_calibration(id, 0.9);
1916        registry.advance_epoch();
1917        registry.advance_epoch();
1918        registry.try_promote(id, ControllerMode::Canary).unwrap();
1919
1920        let cmd = registry
1921            .rollback(
1922                id,
1923                RollbackReason::FallbackTriggered {
1924                    decision_label: "bad-action".to_string(),
1925                },
1926            )
1927            .unwrap();
1928        assert!(cmd.remediation.iter().any(|r| r.contains("bad-action")));
1929    }
1930
1931    #[test]
1932    fn structured_log_covers_promotion_and_rollback() {
1933        use parking_lot::Mutex;
1934        let logs: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
1935        let logs_clone = Arc::clone(&logs);
1936        let mut registry = registry_with_policy(fast_policy());
1937        registry = registry.with_log_sink(Arc::new(move |msg: &str| {
1938            logs_clone.lock().push(msg.to_string());
1939        }));
1940        let id = registry.register(test_registration("log-promo")).unwrap();
1941        registry.update_calibration(id, 0.9);
1942        registry.advance_epoch();
1943        registry.advance_epoch();
1944        registry.try_promote(id, ControllerMode::Canary).unwrap();
1945        registry.rollback(id, RollbackReason::ManualRollback);
1946
1947        {
1948            let captured = logs.lock();
1949            assert!(captured.iter().any(|l| l.contains("controller_promoted")));
1950            assert!(
1951                captured
1952                    .iter()
1953                    .any(|l| l.contains("controller_rolled_back"))
1954            );
1955            assert!(
1956                captured
1957                    .iter()
1958                    .any(|l| l.contains("policy_id=test-fast-v1"))
1959            );
1960            drop(captured);
1961        }
1962    }
1963
1964    #[test]
1965    fn structured_log_covers_promotion_rejection() {
1966        use parking_lot::Mutex;
1967        let logs: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
1968        let logs_clone = Arc::clone(&logs);
1969        let mut registry = registry_with_policy(fast_policy());
1970        registry = registry.with_log_sink(Arc::new(move |msg: &str| {
1971            logs_clone.lock().push(msg.to_string());
1972        }));
1973        let id = registry.register(test_registration("log-reject")).unwrap();
1974        registry.update_calibration(id, 0.5);
1975        registry.advance_epoch();
1976        registry.advance_epoch();
1977        let _ = registry.try_promote(id, ControllerMode::Canary);
1978
1979        {
1980            let captured = logs.lock();
1981            assert!(
1982                captured
1983                    .iter()
1984                    .any(|l| l.contains("controller_promotion_rejected"))
1985            );
1986            drop(captured);
1987        }
1988    }
1989
1990    #[test]
1991    fn structured_log_covers_hold_and_release() {
1992        use parking_lot::Mutex;
1993        let logs: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
1994        let logs_clone = Arc::clone(&logs);
1995        let mut registry = ControllerRegistry::new();
1996        registry = registry.with_log_sink(Arc::new(move |msg: &str| {
1997            logs_clone.lock().push(msg.to_string());
1998        }));
1999        let id = registry.register(test_registration("log-hold")).unwrap();
2000        registry.hold(id);
2001        registry.release_hold(id);
2002
2003        {
2004            let captured = logs.lock();
2005            assert!(captured.iter().any(|l| l.contains("controller_held")));
2006            assert!(captured.iter().any(|l| l.contains("controller_released")));
2007            drop(captured);
2008        }
2009    }
2010
2011    #[test]
2012    fn promotion_rejection_display_coverage() {
2013        let rejections = [
2014            PromotionRejection::ControllerNotFound,
2015            PromotionRejection::CalibrationTooLow {
2016                current: 0.5,
2017                required: 0.8,
2018            },
2019            PromotionRejection::InsufficientEpochs {
2020                current: 1,
2021                required: 3,
2022                mode: ControllerMode::Shadow,
2023            },
2024            PromotionRejection::InvalidTransition {
2025                from: ControllerMode::Shadow,
2026                to: ControllerMode::Active,
2027            },
2028            PromotionRejection::HeldForInvestigation,
2029        ];
2030        for rejection in &rejections {
2031            let msg = format!("{rejection}");
2032            assert!(!msg.is_empty());
2033        }
2034    }
2035
2036    #[test]
2037    fn rollback_reason_display_coverage() {
2038        let reasons = [
2039            RollbackReason::CalibrationRegression { score: 0.3 },
2040            RollbackReason::BudgetOverruns { count: 5 },
2041            RollbackReason::ManualRollback,
2042            RollbackReason::FallbackTriggered {
2043                decision_label: "test".to_string(),
2044            },
2045        ];
2046        for reason in &reasons {
2047            let msg = format!("{reason}");
2048            assert!(!msg.is_empty());
2049        }
2050    }
2051
2052    #[test]
2053    fn e2e_promotion_cannot_bypass_verification() {
2054        // Scenario: a controller tries to skip the pipeline
2055        let mut registry = registry_with_policy(fast_policy());
2056        let id = registry
2057            .register(test_registration("bypass-attempt"))
2058            .unwrap();
2059
2060        // Attempt 1: promote directly to Active from Shadow (must fail)
2061        registry.update_calibration(id, 0.99);
2062        for _ in 0..10 {
2063            registry.advance_epoch();
2064        }
2065        assert!(matches!(
2066            registry.try_promote(id, ControllerMode::Active),
2067            Err(PromotionRejection::InvalidTransition { .. })
2068        ));
2069
2070        // Attempt 2: promote to Canary without sufficient calibration
2071        registry.update_calibration(id, 0.1);
2072        assert!(matches!(
2073            registry.try_promote(id, ControllerMode::Canary),
2074            Err(PromotionRejection::CalibrationTooLow { .. })
2075        ));
2076
2077        // Attempt 3: promote to Canary without sufficient epochs
2078        registry.update_calibration(id, 0.99);
2079        // Reset epochs by setting mode (simulate re-registration scenario)
2080        registry.set_mode(id, ControllerMode::Shadow);
2081        // epochs_in_current_mode is still high, so this succeeds — good,
2082        // it proves you can't bypass calibration but epochs accumulate
2083
2084        // Correct path: full pipeline
2085        let id2 = registry
2086            .register(test_registration("correct-path"))
2087            .unwrap();
2088        registry.update_calibration(id2, 0.9);
2089        assert!(registry.try_promote(id2, ControllerMode::Canary).is_err()); // 0 epochs
2090        registry.advance_epoch();
2091        assert!(registry.try_promote(id2, ControllerMode::Canary).is_err()); // 1 epoch
2092        registry.advance_epoch();
2093        assert!(registry.try_promote(id2, ControllerMode::Canary).is_ok()); // 2 epochs
2094        assert!(registry.try_promote(id2, ControllerMode::Active).is_err()); // 0 canary epochs
2095        registry.advance_epoch();
2096        assert!(registry.try_promote(id2, ControllerMode::Active).is_ok()); // 1 canary epoch
2097        assert_eq!(registry.mode(id2), Some(ControllerMode::Active));
2098    }
2099
2100    #[test]
2101    fn e2e_failed_rollout_leaves_conservative_state() {
2102        let mut registry = registry_with_policy(fast_policy());
2103        let id = registry
2104            .register(test_registration("failed-rollout"))
2105            .unwrap();
2106        registry.update_calibration(id, 0.9);
2107        registry.advance_epoch();
2108        registry.advance_epoch();
2109        registry.try_promote(id, ControllerMode::Canary).unwrap();
2110        registry.advance_epoch();
2111        registry.try_promote(id, ControllerMode::Active).unwrap();
2112
2113        // Simulate calibration regression triggering rollback
2114        registry.update_calibration(id, 0.2);
2115        let cmd = registry
2116            .rollback(id, RollbackReason::CalibrationRegression { score: 0.2 })
2117            .unwrap();
2118
2119        // Verify conservative state
2120        assert_eq!(registry.mode(id), Some(ControllerMode::Shadow));
2121        assert!(registry.is_fallback_active(id));
2122        assert_eq!(cmd.rolled_back_to, ControllerMode::Shadow);
2123        assert!(!cmd.remediation.is_empty());
2124
2125        // Cannot re-promote without clearing conditions
2126        assert!(registry.try_promote(id, ControllerMode::Canary).is_err());
2127    }
2128
2129    #[test]
2130    fn e2e_hold_blocks_entire_pipeline() {
2131        let mut registry = registry_with_policy(fast_policy());
2132        let id = registry.register(test_registration("hold-block")).unwrap();
2133        registry.update_calibration(id, 0.99);
2134        registry.advance_epoch();
2135        registry.advance_epoch();
2136
2137        registry.hold(id);
2138        // All promotion attempts fail while held
2139        assert!(matches!(
2140            registry.try_promote(id, ControllerMode::Canary),
2141            Err(PromotionRejection::HeldForInvestigation)
2142        ));
2143
2144        // Release and verify pipeline resumes
2145        registry.release_hold(id);
2146        // Epochs reset on release, need to accumulate again
2147        registry.advance_epoch();
2148        registry.advance_epoch();
2149        assert!(registry.try_promote(id, ControllerMode::Canary).is_ok());
2150    }
2151
2152    #[test]
2153    fn recovery_command_serializable() {
2154        let cmd = RecoveryCommand {
2155            controller_id: ControllerId(42),
2156            controller_name: "test-ctrl".to_string(),
2157            rolled_back_from: ControllerMode::Active,
2158            rolled_back_to: ControllerMode::Shadow,
2159            reason: RollbackReason::ManualRollback,
2160            policy_id: "test-v1".to_string(),
2161            at_snapshot_id: Some(SnapshotId(100)),
2162            remediation: vec!["check logs".to_string()],
2163        };
2164        let json = serde_json::to_string(&cmd).unwrap();
2165        let deser: RecoveryCommand = serde_json::from_str(&json).unwrap();
2166        assert_eq!(deser.controller_id, ControllerId(42));
2167        assert_eq!(deser.controller_name, "test-ctrl");
2168    }
2169
2170    #[test]
2171    fn evidence_ledger_entry_serializable() {
2172        let entry = EvidenceLedgerEntry {
2173            entry_id: 1,
2174            controller_id: ControllerId(1),
2175            snapshot_id: Some(SnapshotId(5)),
2176            event: LedgerEvent::Promoted {
2177                from: ControllerMode::Shadow,
2178                to: ControllerMode::Canary,
2179                calibration_score: 0.85,
2180            },
2181            policy_id: "test".to_string(),
2182            timestamp: Time::ZERO,
2183        };
2184        let json = serde_json::to_string(&entry).unwrap();
2185        let deser: EvidenceLedgerEntry = serde_json::from_str(&json).unwrap();
2186        assert_eq!(deser.entry_id, 1);
2187    }
2188
2189    #[test]
2190    fn default_promotion_policy_values() {
2191        let policy = PromotionPolicy::default();
2192        assert!((policy.min_calibration_score - 0.8).abs() < f64::EPSILON);
2193        assert_eq!(policy.min_shadow_epochs, 3);
2194        assert_eq!(policy.min_canary_epochs, 2);
2195        assert_eq!(policy.max_budget_overruns, 3);
2196        assert_eq!(policy.policy_id, "default-promotion-policy-v1");
2197    }
2198}