1use crate::types::Time;
26use serde::{Deserialize, Serialize};
27use std::collections::BTreeMap;
28use std::sync::Arc;
29
30pub const SNAPSHOT_VERSION: SnapshotVersion = SnapshotVersion { major: 1, minor: 0 };
32
33#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
35pub struct SnapshotVersion {
36 pub major: u32,
38 pub minor: u32,
40}
41
42impl std::fmt::Display for SnapshotVersion {
43 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44 write!(f, "{}.{}", self.major, self.minor)
45 }
46}
47
48impl SnapshotVersion {
49 #[must_use]
51 pub fn is_compatible_with(&self, other: &Self) -> bool {
52 self.major == other.major && self.minor >= other.minor
53 }
54}
55
56#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
58pub struct SnapshotId(pub u64);
59
60#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct RuntimeKernelSnapshot {
67 pub id: SnapshotId,
69 pub version: SnapshotVersion,
71 pub timestamp: Time,
73
74 pub ready_queue_len: usize,
77 pub cancel_lane_len: usize,
79 pub finalize_lane_len: usize,
81 pub total_tasks: usize,
83 pub active_regions: usize,
85 pub cancel_streak_current: usize,
87 pub cancel_streak_limit: usize,
89
90 pub outstanding_obligations: usize,
93 pub obligation_leak_count: u64,
95
96 pub pending_io_registrations: usize,
99 pub active_timers: usize,
101
102 pub worker_count: usize,
105 pub workers_parked: usize,
107 pub blocking_threads_active: usize,
109
110 pub governor_enabled: bool,
113 pub adaptive_cancel_enabled: bool,
115 pub adaptive_epoch: u64,
117
118 pub registered_controllers: usize,
121 pub shadow_controllers: usize,
123}
124
125impl RuntimeKernelSnapshot {
126 #[cfg(any(test, feature = "test-internals"))]
128 #[must_use]
129 pub fn test_default(id: u64, now: Time) -> Self {
130 Self {
131 id: SnapshotId(id),
132 version: SNAPSHOT_VERSION,
133 timestamp: now,
134 ready_queue_len: 0,
135 cancel_lane_len: 0,
136 finalize_lane_len: 0,
137 total_tasks: 0,
138 active_regions: 0,
139 cancel_streak_current: 0,
140 cancel_streak_limit: 16,
141 outstanding_obligations: 0,
142 obligation_leak_count: 0,
143 pending_io_registrations: 0,
144 active_timers: 0,
145 worker_count: 1,
146 workers_parked: 0,
147 blocking_threads_active: 0,
148 governor_enabled: false,
149 adaptive_cancel_enabled: false,
150 adaptive_epoch: 0,
151 registered_controllers: 0,
152 shadow_controllers: 0,
153 }
154 }
155}
156
157#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
159pub enum ControllerMode {
160 Shadow,
162 Canary,
164 Active,
166 Hold,
168}
169
170#[derive(Debug, Clone, Serialize, Deserialize)]
172pub struct ControllerDecision {
173 pub controller_id: ControllerId,
175 pub snapshot_id: SnapshotId,
177 pub label: String,
179 pub payload: serde_json::Value,
181 pub confidence: f64,
183 pub fallback_label: String,
185}
186
187#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
189pub struct ControllerId(pub u64);
190
191#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct ControllerRegistration {
194 pub name: String,
196 pub min_version: SnapshotVersion,
198 pub max_version: SnapshotVersion,
200 pub required_fields: Vec<String>,
202 pub target_seams: Vec<String>,
204 pub initial_mode: ControllerMode,
206 pub proof_artifact_id: Option<String>,
208 pub budget: ControllerBudget,
210}
211
212#[derive(Debug, Clone, Serialize, Deserialize)]
214pub struct ControllerBudget {
215 pub max_decisions_per_epoch: u32,
217 pub max_decision_latency_us: u64,
219}
220
221impl Default for ControllerBudget {
222 fn default() -> Self {
223 Self {
224 max_decisions_per_epoch: 1,
225 max_decision_latency_us: 100,
226 }
227 }
228}
229
230#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
232pub enum RegistrationError {
233 EmptyName,
235 InvertedVersionRange,
237 IncompatibleVersion {
239 current: SnapshotVersion,
241 min: SnapshotVersion,
243 max: SnapshotVersion,
245 },
246 UnsupportedFields(Vec<String>),
248 NoTargetSeams,
250 ZeroBudget,
252 DuplicateName(String),
254}
255
256impl std::fmt::Display for RegistrationError {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 Self::EmptyName => write!(f, "controller name must not be empty"),
260 Self::InvertedVersionRange => write!(f, "min_version must be <= max_version"),
261 Self::IncompatibleVersion { current, min, max } => {
262 write!(
263 f,
264 "snapshot version {current} outside controller range [{min}, {max}]"
265 )
266 }
267 Self::UnsupportedFields(fields) => {
268 write!(f, "unsupported snapshot fields: {}", fields.join(", "))
269 }
270 Self::NoTargetSeams => write!(f, "controller must target at least one seam"),
271 Self::ZeroBudget => write!(f, "budget must allow at least one decision per epoch"),
272 Self::DuplicateName(name) => {
273 write!(f, "controller with name '{name}' already registered")
274 }
275 }
276 }
277}
278
279impl std::error::Error for RegistrationError {}
280
281const KNOWN_FIELDS: &[&str] = &[
283 "ready_queue_len",
284 "cancel_lane_len",
285 "finalize_lane_len",
286 "total_tasks",
287 "active_regions",
288 "cancel_streak_current",
289 "cancel_streak_limit",
290 "outstanding_obligations",
291 "obligation_leak_count",
292 "pending_io_registrations",
293 "active_timers",
294 "worker_count",
295 "workers_parked",
296 "blocking_threads_active",
297 "governor_enabled",
298 "adaptive_cancel_enabled",
299 "adaptive_epoch",
300 "registered_controllers",
301 "shadow_controllers",
302];
303
304#[derive(Debug, Clone, Serialize, Deserialize)]
306pub struct PromotionPolicy {
307 pub min_calibration_score: f64,
309 pub min_shadow_epochs: u64,
311 pub min_canary_epochs: u64,
313 pub max_budget_overruns: u32,
315 pub policy_id: String,
317}
318
319impl Default for PromotionPolicy {
320 fn default() -> Self {
321 Self {
322 min_calibration_score: 0.8,
323 min_shadow_epochs: 3,
324 min_canary_epochs: 2,
325 max_budget_overruns: 3,
326 policy_id: "default-promotion-policy-v1".to_string(),
327 }
328 }
329}
330
331#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
333pub enum PromotionRejection {
334 ControllerNotFound,
336 CalibrationTooLow {
338 current: f64,
340 required: f64,
342 },
343 InsufficientEpochs {
345 current: u64,
347 required: u64,
349 mode: ControllerMode,
351 },
352 InvalidTransition {
354 from: ControllerMode,
356 to: ControllerMode,
358 },
359 HeldForInvestigation,
361}
362
363impl std::fmt::Display for PromotionRejection {
364 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
365 match self {
366 Self::ControllerNotFound => write!(f, "controller not found"),
367 Self::CalibrationTooLow { current, required } => {
368 write!(
369 f,
370 "calibration score {current:.3} below threshold {required:.3}"
371 )
372 }
373 Self::InsufficientEpochs {
374 current,
375 required,
376 mode,
377 } => {
378 write!(f, "only {current} epochs in {mode:?}, need {required}")
379 }
380 Self::InvalidTransition { from, to } => {
381 write!(f, "invalid transition from {from:?} to {to:?}")
382 }
383 Self::HeldForInvestigation => {
384 write!(
385 f,
386 "controller held for investigation; release before promoting"
387 )
388 }
389 }
390 }
391}
392
393#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
395pub enum RollbackReason {
396 CalibrationRegression {
398 score: f64,
400 },
401 BudgetOverruns {
403 count: u32,
405 },
406 ManualRollback,
408 FallbackTriggered {
410 decision_label: String,
412 },
413}
414
415impl std::fmt::Display for RollbackReason {
416 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
417 match self {
418 Self::CalibrationRegression { score } => {
419 write!(f, "calibration regressed to {score:.3}")
420 }
421 Self::BudgetOverruns { count } => {
422 write!(f, "budget overruns reached {count}")
423 }
424 Self::ManualRollback => write!(f, "manual rollback requested"),
425 Self::FallbackTriggered { decision_label } => {
426 write!(f, "fallback triggered by decision: {decision_label}")
427 }
428 }
429 }
430}
431
432#[derive(Debug, Clone, Serialize, Deserialize)]
434pub struct RecoveryCommand {
435 pub controller_id: ControllerId,
437 pub controller_name: String,
439 pub rolled_back_from: ControllerMode,
441 pub rolled_back_to: ControllerMode,
443 pub reason: RollbackReason,
445 pub policy_id: String,
447 pub at_snapshot_id: Option<SnapshotId>,
449 pub remediation: Vec<String>,
451}
452
453#[derive(Debug, Clone, Serialize, Deserialize)]
455pub struct EvidenceLedgerEntry {
456 pub entry_id: u64,
458 pub controller_id: ControllerId,
460 pub snapshot_id: Option<SnapshotId>,
462 pub event: LedgerEvent,
464 pub policy_id: String,
466 pub timestamp: Time,
468}
469
470#[derive(Debug, Clone, Serialize, Deserialize)]
472pub enum LedgerEvent {
473 Registered {
475 initial_mode: ControllerMode,
477 },
478 Promoted {
480 from: ControllerMode,
482 to: ControllerMode,
484 calibration_score: f64,
486 },
487 RolledBack {
489 from: ControllerMode,
491 to: ControllerMode,
493 reason: RollbackReason,
495 },
496 Held {
498 from: ControllerMode,
500 },
501 Released {
503 to: ControllerMode,
505 },
506 Deregistered,
508 PromotionRejected {
510 target: ControllerMode,
512 rejection: PromotionRejection,
514 },
515 DecisionRecorded {
517 label: String,
519 within_budget: bool,
521 },
522}
523
524#[derive(Debug, Clone)]
526struct RegisteredController {
527 id: ControllerId,
528 registration: ControllerRegistration,
529 mode: ControllerMode,
530 decisions_this_epoch: u32,
531 last_snapshot_id: Option<SnapshotId>,
532 calibration_score: f64,
533 epochs_in_current_mode: u64,
534 budget_overruns: u32,
535 held_from_mode: Option<ControllerMode>,
537 fallback_active: bool,
538 last_action_label: String,
539}
540
541type LogSink = Arc<dyn Fn(&str) + Send + Sync>;
543
544pub struct ControllerRegistry {
554 controllers: BTreeMap<ControllerId, RegisteredController>,
555 next_id: u64,
556 next_snapshot_id: u64,
557 log_sink: Option<LogSink>,
559 promotion_policy: PromotionPolicy,
561 evidence_ledger: Vec<EvidenceLedgerEntry>,
563 next_ledger_id: u64,
565}
566
567impl ControllerRegistry {
568 #[must_use]
570 pub fn new() -> Self {
571 Self {
572 controllers: BTreeMap::new(),
573 next_id: 1,
574 next_snapshot_id: 1,
575 log_sink: None,
576 promotion_policy: PromotionPolicy::default(),
577 evidence_ledger: Vec::new(),
578 next_ledger_id: 1,
579 }
580 }
581
582 #[must_use]
584 pub fn with_log_sink(mut self, sink: LogSink) -> Self {
585 self.log_sink = Some(sink);
586 self
587 }
588
589 pub fn register(
591 &mut self,
592 registration: ControllerRegistration,
593 ) -> Result<ControllerId, RegistrationError> {
594 self.validate(®istration)?;
595
596 let id = ControllerId(self.next_id);
597 self.next_id += 1;
598
599 let mode = if registration.initial_mode == ControllerMode::Active
600 && !registration
601 .max_version
602 .is_compatible_with(&SNAPSHOT_VERSION)
603 {
604 ControllerMode::Shadow
606 } else {
607 registration.initial_mode
608 };
609
610 if let Some(ref sink) = self.log_sink {
611 sink(&format!(
612 "controller_registered id={} name={} mode={:?} seams={:?} version_range=[{}, {}]",
613 id.0,
614 registration.name,
615 mode,
616 registration.target_seams,
617 registration.min_version,
618 registration.max_version,
619 ));
620 }
621
622 self.controllers.insert(
623 id,
624 RegisteredController {
625 id,
626 registration,
627 mode,
628 decisions_this_epoch: 0,
629 last_snapshot_id: None,
630 calibration_score: 0.0,
631 epochs_in_current_mode: 0,
632 budget_overruns: 0,
633 held_from_mode: None,
634 fallback_active: false,
635 last_action_label: String::new(),
636 },
637 );
638
639 self.record_ledger_entry(id, None, LedgerEvent::Registered { initial_mode: mode });
640
641 Ok(id)
642 }
643
644 fn validate(&self, reg: &ControllerRegistration) -> Result<(), RegistrationError> {
646 if reg.name.is_empty() {
647 return Err(RegistrationError::EmptyName);
648 }
649 if reg.min_version > reg.max_version {
650 return Err(RegistrationError::InvertedVersionRange);
651 }
652 if !SNAPSHOT_VERSION.is_compatible_with(®.min_version)
653 || SNAPSHOT_VERSION.major != reg.max_version.major
654 {
655 return Err(RegistrationError::IncompatibleVersion {
656 current: SNAPSHOT_VERSION,
657 min: reg.min_version,
658 max: reg.max_version,
659 });
660 }
661 let unknown: Vec<String> = reg
662 .required_fields
663 .iter()
664 .filter(|f| !KNOWN_FIELDS.contains(&f.as_str()))
665 .cloned()
666 .collect();
667 if !unknown.is_empty() {
668 return Err(RegistrationError::UnsupportedFields(unknown));
669 }
670 if reg.target_seams.is_empty() {
671 return Err(RegistrationError::NoTargetSeams);
672 }
673 if reg.budget.max_decisions_per_epoch == 0 {
674 return Err(RegistrationError::ZeroBudget);
675 }
676 if self
677 .controllers
678 .values()
679 .any(|c| c.registration.name == reg.name)
680 {
681 return Err(RegistrationError::DuplicateName(reg.name.clone()));
682 }
683 Ok(())
684 }
685
686 pub fn deregister(&mut self, id: ControllerId) -> bool {
688 let removed = self.controllers.remove(&id).is_some();
689 if removed {
690 self.record_ledger_entry(id, None, LedgerEvent::Deregistered);
691 }
692 removed
693 }
694
695 #[must_use]
697 pub fn mode(&self, id: ControllerId) -> Option<ControllerMode> {
698 self.controllers.get(&id).map(|c| c.mode)
699 }
700
701 pub fn set_mode(&mut self, id: ControllerId, mode: ControllerMode) -> bool {
703 let Some(controller) = self.controllers.get_mut(&id) else {
704 return false;
705 };
706 controller.mode = mode;
707 true
708 }
709
710 #[must_use]
712 pub fn registration(&self, id: ControllerId) -> Option<&ControllerRegistration> {
713 self.controllers.get(&id).map(|c| &c.registration)
714 }
715
716 #[must_use]
718 pub fn len(&self) -> usize {
719 self.controllers.len()
720 }
721
722 #[must_use]
724 pub fn is_empty(&self) -> bool {
725 self.controllers.is_empty()
726 }
727
728 #[must_use]
730 pub fn shadow_count(&self) -> usize {
731 self.controllers
732 .values()
733 .filter(|c| c.mode == ControllerMode::Shadow)
734 .count()
735 }
736
737 pub fn next_snapshot_id(&mut self) -> SnapshotId {
739 let id = SnapshotId(self.next_snapshot_id);
740 self.next_snapshot_id += 1;
741 id
742 }
743
744 pub fn reset_epoch(&mut self) {
748 for controller in self.controllers.values_mut() {
749 controller.decisions_this_epoch = 0;
750 }
751 }
752
753 pub fn record_decision(&mut self, decision: &ControllerDecision) -> bool {
756 let Some(controller) = self.controllers.get_mut(&decision.controller_id) else {
757 return false;
758 };
759 controller.last_snapshot_id = Some(decision.snapshot_id);
760 controller.last_action_label.clone_from(&decision.label);
761 controller.decisions_this_epoch += 1;
762 let within_budget = controller.decisions_this_epoch
763 <= controller.registration.budget.max_decisions_per_epoch;
764 if !within_budget {
765 controller.budget_overruns += 1;
766 }
767
768 self.record_ledger_entry(
769 decision.controller_id,
770 Some(decision.snapshot_id),
771 LedgerEvent::DecisionRecorded {
772 label: decision.label.clone(),
773 within_budget,
774 },
775 );
776
777 within_budget
778 }
779
780 pub fn update_calibration(&mut self, id: ControllerId, score: f64) {
782 if let Some(controller) = self.controllers.get_mut(&id) {
783 controller.calibration_score = score;
784 }
785 }
786
787 #[must_use]
789 pub fn calibration_score(&self, id: ControllerId) -> Option<f64> {
790 self.controllers.get(&id).map(|c| c.calibration_score)
791 }
792
793 #[must_use]
795 pub fn controller_ids(&self) -> Vec<ControllerId> {
796 self.controllers.keys().copied().collect()
797 }
798
799 pub fn set_promotion_policy(&mut self, policy: PromotionPolicy) {
801 self.promotion_policy = policy;
802 }
803
804 #[must_use]
806 pub fn promotion_policy(&self) -> &PromotionPolicy {
807 &self.promotion_policy
808 }
809
810 pub fn advance_epoch(&mut self) {
812 for controller in self.controllers.values_mut() {
813 controller.epochs_in_current_mode += 1;
814 controller.decisions_this_epoch = 0;
815 }
816 }
817
818 pub fn try_promote(
824 &mut self,
825 id: ControllerId,
826 target: ControllerMode,
827 ) -> Result<ControllerMode, PromotionRejection> {
828 let policy = self.promotion_policy.clone();
829 let controller = self
830 .controllers
831 .get(&id)
832 .ok_or(PromotionRejection::ControllerNotFound)?;
833
834 let current_mode = controller.mode;
835 let calibration = controller.calibration_score;
836 let epochs = controller.epochs_in_current_mode;
837
838 if current_mode == ControllerMode::Hold {
840 let rejection = PromotionRejection::HeldForInvestigation;
841 self.record_ledger_entry(
842 id,
843 None,
844 LedgerEvent::PromotionRejected {
845 target,
846 rejection: rejection.clone(),
847 },
848 );
849 self.log_promotion_rejection(id, &rejection, &policy);
850 return Err(rejection);
851 }
852
853 let valid = matches!(
855 (current_mode, target),
856 (ControllerMode::Shadow, ControllerMode::Canary)
857 | (ControllerMode::Canary, ControllerMode::Active)
858 );
859 if !valid {
860 let rejection = PromotionRejection::InvalidTransition {
861 from: current_mode,
862 to: target,
863 };
864 self.record_ledger_entry(
865 id,
866 None,
867 LedgerEvent::PromotionRejected {
868 target,
869 rejection: rejection.clone(),
870 },
871 );
872 self.log_promotion_rejection(id, &rejection, &policy);
873 return Err(rejection);
874 }
875
876 if calibration < policy.min_calibration_score {
878 let rejection = PromotionRejection::CalibrationTooLow {
879 current: calibration,
880 required: policy.min_calibration_score,
881 };
882 self.record_ledger_entry(
883 id,
884 None,
885 LedgerEvent::PromotionRejected {
886 target,
887 rejection: rejection.clone(),
888 },
889 );
890 self.log_promotion_rejection(id, &rejection, &policy);
891 return Err(rejection);
892 }
893
894 let required_epochs = match current_mode {
896 ControllerMode::Shadow => policy.min_shadow_epochs,
897 ControllerMode::Canary => policy.min_canary_epochs,
898 _ => 0,
899 };
900 if epochs < required_epochs {
901 let rejection = PromotionRejection::InsufficientEpochs {
902 current: epochs,
903 required: required_epochs,
904 mode: current_mode,
905 };
906 self.record_ledger_entry(
907 id,
908 None,
909 LedgerEvent::PromotionRejected {
910 target,
911 rejection: rejection.clone(),
912 },
913 );
914 self.log_promotion_rejection(id, &rejection, &policy);
915 return Err(rejection);
916 }
917
918 let controller = self.controllers.get_mut(&id).expect("checked above");
920 controller.mode = target;
921 controller.epochs_in_current_mode = 0;
922 controller.budget_overruns = 0;
923
924 self.record_ledger_entry(
925 id,
926 None,
927 LedgerEvent::Promoted {
928 from: current_mode,
929 to: target,
930 calibration_score: calibration,
931 },
932 );
933
934 if let Some(ref sink) = self.log_sink {
935 sink(&format!(
936 "controller_promoted id={} from={:?} to={:?} calibration={:.3} policy_id={}",
937 id.0, current_mode, target, calibration, policy.policy_id,
938 ));
939 }
940
941 Ok(target)
942 }
943
944 pub fn rollback(
946 &mut self,
947 id: ControllerId,
948 reason: RollbackReason,
949 ) -> Option<RecoveryCommand> {
950 let policy_id = self.promotion_policy.policy_id.clone();
951 let controller = self.controllers.get_mut(&id)?;
952 let from = controller.mode;
953
954 if from == ControllerMode::Shadow {
955 return None;
957 }
958
959 let to = ControllerMode::Shadow;
960 controller.mode = to;
961 controller.epochs_in_current_mode = 0;
962 controller.fallback_active = true;
963 let name = controller.registration.name.clone();
964 let snapshot_id = controller.last_snapshot_id;
965
966 self.record_ledger_entry(
967 id,
968 snapshot_id,
969 LedgerEvent::RolledBack {
970 from,
971 to,
972 reason: reason.clone(),
973 },
974 );
975
976 if let Some(ref sink) = self.log_sink {
977 sink(&format!(
978 "controller_rolled_back id={} from={:?} to={:?} reason={} policy_id={} snapshot_id={:?}",
979 id.0, from, to, reason, policy_id, snapshot_id,
980 ));
981 }
982
983 let remediation = match &reason {
984 RollbackReason::CalibrationRegression { score } => vec![
985 format!("Investigate calibration drop to {score:.3}"),
986 "Review recent decision evidence in ledger".to_string(),
987 "Re-run shadow validation before re-promotion".to_string(),
988 ],
989 RollbackReason::BudgetOverruns { count } => vec![
990 format!("Controller exceeded budget {count} times"),
991 "Review decision frequency and payload complexity".to_string(),
992 "Consider increasing budget or reducing decision scope".to_string(),
993 ],
994 RollbackReason::ManualRollback => vec![
995 "Manual rollback — verify runtime stability".to_string(),
996 "Check evidence ledger for preceding anomalies".to_string(),
997 ],
998 RollbackReason::FallbackTriggered { decision_label } => vec![
999 format!("Fallback triggered by decision: {decision_label}"),
1000 "Inspect decision payload and snapshot context".to_string(),
1001 "Validate fallback path is functioning correctly".to_string(),
1002 ],
1003 };
1004
1005 Some(RecoveryCommand {
1006 controller_id: id,
1007 controller_name: name,
1008 rolled_back_from: from,
1009 rolled_back_to: to,
1010 reason,
1011 policy_id,
1012 at_snapshot_id: snapshot_id,
1013 remediation,
1014 })
1015 }
1016
1017 pub fn hold(&mut self, id: ControllerId) -> bool {
1019 let Some(controller) = self.controllers.get_mut(&id) else {
1020 return false;
1021 };
1022 if controller.mode == ControllerMode::Hold {
1023 return false; }
1025 let from = controller.mode;
1026 controller.held_from_mode = Some(from);
1027 controller.mode = ControllerMode::Hold;
1028
1029 self.record_ledger_entry(id, None, LedgerEvent::Held { from });
1030
1031 if let Some(ref sink) = self.log_sink {
1032 sink(&format!(
1033 "controller_held id={} from={:?} policy_id={}",
1034 id.0, from, self.promotion_policy.policy_id,
1035 ));
1036 }
1037 true
1038 }
1039
1040 pub fn release_hold(&mut self, id: ControllerId) -> Option<ControllerMode> {
1042 let controller = self.controllers.get_mut(&id)?;
1043 if controller.mode != ControllerMode::Hold {
1044 return None;
1045 }
1046 let restored = controller
1047 .held_from_mode
1048 .take()
1049 .unwrap_or(ControllerMode::Shadow);
1050 controller.mode = restored;
1051 controller.epochs_in_current_mode = 0;
1052
1053 self.record_ledger_entry(id, None, LedgerEvent::Released { to: restored });
1054
1055 if let Some(ref sink) = self.log_sink {
1056 sink(&format!(
1057 "controller_released id={} to={:?} policy_id={}",
1058 id.0, restored, self.promotion_policy.policy_id,
1059 ));
1060 }
1061 Some(restored)
1062 }
1063
1064 #[must_use]
1066 pub fn is_fallback_active(&self, id: ControllerId) -> bool {
1067 self.controllers.get(&id).is_some_and(|c| c.fallback_active)
1068 }
1069
1070 pub fn clear_fallback(&mut self, id: ControllerId) {
1072 if let Some(controller) = self.controllers.get_mut(&id) {
1073 controller.fallback_active = false;
1074 }
1075 }
1076
1077 #[must_use]
1079 pub fn evidence_ledger(&self) -> &[EvidenceLedgerEntry] {
1080 &self.evidence_ledger
1081 }
1082
1083 #[must_use]
1085 pub fn controller_ledger(&self, id: ControllerId) -> Vec<&EvidenceLedgerEntry> {
1086 self.evidence_ledger
1087 .iter()
1088 .filter(|entry| entry.controller_id == id)
1089 .collect()
1090 }
1091
1092 #[must_use]
1094 pub fn epochs_in_current_mode(&self, id: ControllerId) -> Option<u64> {
1095 self.controllers.get(&id).map(|c| c.epochs_in_current_mode)
1096 }
1097
1098 #[must_use]
1100 pub fn budget_overruns(&self, id: ControllerId) -> Option<u32> {
1101 self.controllers.get(&id).map(|c| c.budget_overruns)
1102 }
1103
1104 fn record_ledger_entry(
1105 &mut self,
1106 controller_id: ControllerId,
1107 snapshot_id: Option<SnapshotId>,
1108 event: LedgerEvent,
1109 ) {
1110 let entry = EvidenceLedgerEntry {
1111 entry_id: self.next_ledger_id,
1112 controller_id,
1113 snapshot_id,
1114 event,
1115 policy_id: self.promotion_policy.policy_id.clone(),
1116 timestamp: Time::ZERO, };
1118 self.next_ledger_id += 1;
1119 self.evidence_ledger.push(entry);
1120 }
1121
1122 fn log_promotion_rejection(
1123 &self,
1124 id: ControllerId,
1125 rejection: &PromotionRejection,
1126 policy: &PromotionPolicy,
1127 ) {
1128 if let Some(ref sink) = self.log_sink {
1129 sink(&format!(
1130 "controller_promotion_rejected id={} reason={} policy_id={}",
1131 id.0, rejection, policy.policy_id,
1132 ));
1133 }
1134 }
1135}
1136
1137impl Default for ControllerRegistry {
1138 fn default() -> Self {
1139 Self::new()
1140 }
1141}
1142
1143#[cfg(test)]
1144mod tests {
1145 use super::*;
1146
1147 fn test_registration(name: &str) -> ControllerRegistration {
1148 ControllerRegistration {
1149 name: name.to_string(),
1150 min_version: SnapshotVersion { major: 1, minor: 0 },
1151 max_version: SnapshotVersion { major: 1, minor: 0 },
1152 required_fields: vec!["ready_queue_len".to_string(), "cancel_lane_len".to_string()],
1153 target_seams: vec!["AA01-SEAM-SCHED-CANCEL-STREAK".to_string()],
1154 initial_mode: ControllerMode::Shadow,
1155 proof_artifact_id: None,
1156 budget: ControllerBudget::default(),
1157 }
1158 }
1159
1160 #[test]
1161 fn snapshot_version_compatibility() {
1162 let v1_0 = SnapshotVersion { major: 1, minor: 0 };
1163 let v1_1 = SnapshotVersion { major: 1, minor: 1 };
1164 let v2_0 = SnapshotVersion { major: 2, minor: 0 };
1165
1166 assert!(v1_0.is_compatible_with(&v1_0));
1167 assert!(v1_1.is_compatible_with(&v1_0));
1168 assert!(!v1_0.is_compatible_with(&v1_1));
1169 assert!(!v2_0.is_compatible_with(&v1_0));
1170 }
1171
1172 #[test]
1173 fn snapshot_serialization_roundtrip() {
1174 let snap = RuntimeKernelSnapshot::test_default(1, Time::ZERO);
1175 let json = serde_json::to_string(&snap).unwrap();
1176 let deser: RuntimeKernelSnapshot = serde_json::from_str(&json).unwrap();
1177 assert_eq!(deser.id, snap.id);
1178 assert_eq!(deser.version, snap.version);
1179 assert_eq!(deser.ready_queue_len, 0);
1180 assert_eq!(deser.worker_count, 1);
1181 }
1182
1183 #[test]
1184 fn snapshot_deterministic_serialization() {
1185 let snap1 = RuntimeKernelSnapshot::test_default(42, Time::ZERO);
1186 let snap2 = RuntimeKernelSnapshot::test_default(42, Time::ZERO);
1187 assert_eq!(
1188 serde_json::to_string(&snap1).unwrap(),
1189 serde_json::to_string(&snap2).unwrap(),
1190 );
1191 }
1192
1193 #[test]
1194 fn register_valid_controller() {
1195 let mut registry = ControllerRegistry::new();
1196 let id = registry.register(test_registration("test-ctrl")).unwrap();
1197 assert_eq!(id.0, 1);
1198 assert_eq!(registry.len(), 1);
1199 assert_eq!(registry.mode(id), Some(ControllerMode::Shadow));
1200 }
1201
1202 #[test]
1203 fn reject_empty_name() {
1204 let mut registry = ControllerRegistry::new();
1205 let mut reg = test_registration("");
1206 reg.name = String::new();
1207 assert_eq!(
1208 registry.register(reg).unwrap_err(),
1209 RegistrationError::EmptyName,
1210 );
1211 }
1212
1213 #[test]
1214 fn reject_inverted_version_range() {
1215 let mut registry = ControllerRegistry::new();
1216 let mut reg = test_registration("bad-range");
1217 reg.min_version = SnapshotVersion { major: 2, minor: 0 };
1218 reg.max_version = SnapshotVersion { major: 1, minor: 0 };
1219 assert_eq!(
1220 registry.register(reg).unwrap_err(),
1221 RegistrationError::InvertedVersionRange,
1222 );
1223 }
1224
1225 #[test]
1226 fn reject_incompatible_version() {
1227 let mut registry = ControllerRegistry::new();
1228 let mut reg = test_registration("future-ctrl");
1229 reg.min_version = SnapshotVersion { major: 5, minor: 0 };
1230 reg.max_version = SnapshotVersion { major: 5, minor: 0 };
1231 assert!(matches!(
1232 registry.register(reg).unwrap_err(),
1233 RegistrationError::IncompatibleVersion { .. }
1234 ));
1235
1236 let mut reg2 = test_registration("future-minor-ctrl");
1238 reg2.min_version = SnapshotVersion {
1239 major: SNAPSHOT_VERSION.major,
1240 minor: SNAPSHOT_VERSION.minor + 1,
1241 };
1242 reg2.max_version = SnapshotVersion {
1243 major: SNAPSHOT_VERSION.major,
1244 minor: SNAPSHOT_VERSION.minor + 1,
1245 };
1246 assert!(matches!(
1247 registry.register(reg2).unwrap_err(),
1248 RegistrationError::IncompatibleVersion { .. }
1249 ));
1250 }
1251
1252 #[test]
1253 fn reject_unsupported_fields() {
1254 let mut registry = ControllerRegistry::new();
1255 let mut reg = test_registration("bad-fields");
1256 reg.required_fields = vec!["nonexistent_field".to_string()];
1257 assert!(matches!(
1258 registry.register(reg).unwrap_err(),
1259 RegistrationError::UnsupportedFields(_)
1260 ));
1261 }
1262
1263 #[test]
1264 fn reject_no_target_seams() {
1265 let mut registry = ControllerRegistry::new();
1266 let mut reg = test_registration("no-seams");
1267 reg.target_seams = vec![];
1268 assert_eq!(
1269 registry.register(reg).unwrap_err(),
1270 RegistrationError::NoTargetSeams,
1271 );
1272 }
1273
1274 #[test]
1275 fn reject_zero_budget() {
1276 let mut registry = ControllerRegistry::new();
1277 let mut reg = test_registration("zero-budget");
1278 reg.budget.max_decisions_per_epoch = 0;
1279 assert_eq!(
1280 registry.register(reg).unwrap_err(),
1281 RegistrationError::ZeroBudget,
1282 );
1283 }
1284
1285 #[test]
1286 fn reject_duplicate_name() {
1287 let mut registry = ControllerRegistry::new();
1288 registry.register(test_registration("dup")).unwrap();
1289 assert_eq!(
1290 registry.register(test_registration("dup")).unwrap_err(),
1291 RegistrationError::DuplicateName("dup".to_string()),
1292 );
1293 }
1294
1295 #[test]
1296 fn deregister_controller() {
1297 let mut registry = ControllerRegistry::new();
1298 let id = registry.register(test_registration("removable")).unwrap();
1299 assert!(registry.deregister(id));
1300 assert_eq!(registry.len(), 0);
1301 assert!(!registry.deregister(id));
1302 }
1303
1304 #[test]
1305 fn set_mode() {
1306 let mut registry = ControllerRegistry::new();
1307 let id = registry.register(test_registration("mode-test")).unwrap();
1308 assert_eq!(registry.mode(id), Some(ControllerMode::Shadow));
1309 assert!(registry.set_mode(id, ControllerMode::Active));
1310 assert_eq!(registry.mode(id), Some(ControllerMode::Active));
1311 }
1312
1313 #[test]
1314 fn shadow_count() {
1315 let mut registry = ControllerRegistry::new();
1316 let id1 = registry.register(test_registration("s1")).unwrap();
1317 let _id2 = registry.register(test_registration("s2")).unwrap();
1318 assert_eq!(registry.shadow_count(), 2);
1319 registry.set_mode(id1, ControllerMode::Active);
1320 assert_eq!(registry.shadow_count(), 1);
1321 }
1322
1323 #[test]
1324 fn decision_budget_enforcement() {
1325 let mut registry = ControllerRegistry::new();
1326 let id = registry.register(test_registration("budget-ctrl")).unwrap();
1327 let snap_id = registry.next_snapshot_id();
1328
1329 let decision = ControllerDecision {
1330 controller_id: id,
1331 snapshot_id: snap_id,
1332 label: "test".to_string(),
1333 payload: serde_json::Value::Null,
1334 confidence: 0.9,
1335 fallback_label: "noop".to_string(),
1336 };
1337
1338 assert!(registry.record_decision(&decision));
1340 assert!(!registry.record_decision(&decision));
1342 registry.reset_epoch();
1344 assert!(registry.record_decision(&decision));
1346 }
1347
1348 #[test]
1349 fn calibration_tracking() {
1350 let mut registry = ControllerRegistry::new();
1351 let id = registry.register(test_registration("calib")).unwrap();
1352 assert_eq!(registry.calibration_score(id), Some(0.0));
1353 registry.update_calibration(id, 0.85);
1354 assert_eq!(registry.calibration_score(id), Some(0.85));
1355 }
1356
1357 #[test]
1358 fn snapshot_id_monotonic() {
1359 let mut registry = ControllerRegistry::new();
1360 let id1 = registry.next_snapshot_id();
1361 let id2 = registry.next_snapshot_id();
1362 let id3 = registry.next_snapshot_id();
1363 assert!(id1 < id2);
1364 assert!(id2 < id3);
1365 }
1366
1367 #[test]
1368 fn active_mode_not_downgraded_when_snapshot_matches() {
1369 let mut registry = ControllerRegistry::new();
1370 let mut reg = test_registration("downgrade-test");
1371 reg.initial_mode = ControllerMode::Active;
1372 reg.min_version = SNAPSHOT_VERSION;
1374 reg.max_version = SNAPSHOT_VERSION;
1375 let id = registry.register(reg).unwrap();
1377 assert_eq!(registry.mode(id), Some(ControllerMode::Active));
1378 }
1379
1380 #[test]
1381 fn known_fields_completeness() {
1382 let snap = RuntimeKernelSnapshot::test_default(1, Time::ZERO);
1384 let json = serde_json::to_value(&snap).unwrap();
1385 let obj = json.as_object().unwrap();
1386 let meta_fields = [
1388 "id",
1389 "version",
1390 "timestamp",
1391 "registered_controllers",
1392 "shadow_controllers",
1393 ];
1394 for field in KNOWN_FIELDS {
1395 assert!(
1396 obj.contains_key(*field),
1397 "KNOWN_FIELDS contains '{field}' but snapshot JSON does not"
1398 );
1399 }
1400 for key in obj.keys() {
1401 if meta_fields.contains(&key.as_str()) {
1402 continue;
1403 }
1404 assert!(
1405 KNOWN_FIELDS.contains(&key.as_str()),
1406 "snapshot JSON has field '{key}' not in KNOWN_FIELDS"
1407 );
1408 }
1409 }
1410
1411 #[test]
1412 fn registration_info_accessible() {
1413 let mut registry = ControllerRegistry::new();
1414 let id = registry.register(test_registration("info-test")).unwrap();
1415 let reg = registry.registration(id).unwrap();
1416 assert_eq!(reg.name, "info-test");
1417 assert_eq!(reg.target_seams, vec!["AA01-SEAM-SCHED-CANCEL-STREAK"]);
1418 }
1419
1420 #[test]
1421 fn controller_ids_listed() {
1422 let mut registry = ControllerRegistry::new();
1423 let id1 = registry.register(test_registration("a")).unwrap();
1424 let id2 = registry.register(test_registration("b")).unwrap();
1425 let ids = registry.controller_ids();
1426 assert!(ids.contains(&id1));
1427 assert!(ids.contains(&id2));
1428 assert_eq!(ids.len(), 2);
1429 }
1430
1431 #[test]
1432 fn log_sink_receives_registration_event() {
1433 use parking_lot::Mutex;
1434 let logs: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
1435 let logs_clone = Arc::clone(&logs);
1436 let mut registry = ControllerRegistry::new().with_log_sink(Arc::new(move |msg: &str| {
1437 logs_clone.lock().push(msg.to_string());
1438 }));
1439 registry.register(test_registration("logged")).unwrap();
1440 {
1441 let captured = logs.lock();
1442 assert_eq!(captured.len(), 1);
1443 assert!(captured[0].contains("controller_registered"));
1444 assert!(captured[0].contains("logged"));
1445 drop(captured);
1446 }
1447 }
1448
1449 #[test]
1450 fn decision_for_unknown_controller_returns_false() {
1451 let mut registry = ControllerRegistry::new();
1452 let decision = ControllerDecision {
1453 controller_id: ControllerId(999),
1454 snapshot_id: SnapshotId(1),
1455 label: "ghost".to_string(),
1456 payload: serde_json::Value::Null,
1457 confidence: 1.0,
1458 fallback_label: "noop".to_string(),
1459 };
1460 assert!(!registry.record_decision(&decision));
1461 }
1462
1463 #[test]
1464 fn version_display() {
1465 let v = SnapshotVersion { major: 1, minor: 2 };
1466 assert_eq!(format!("{v}"), "1.2");
1467 }
1468
1469 #[test]
1470 fn error_display_coverage() {
1471 let errors = [
1472 RegistrationError::EmptyName,
1473 RegistrationError::InvertedVersionRange,
1474 RegistrationError::IncompatibleVersion {
1475 current: SnapshotVersion { major: 1, minor: 0 },
1476 min: SnapshotVersion { major: 2, minor: 0 },
1477 max: SnapshotVersion { major: 2, minor: 0 },
1478 },
1479 RegistrationError::UnsupportedFields(vec!["foo".to_string()]),
1480 RegistrationError::NoTargetSeams,
1481 RegistrationError::ZeroBudget,
1482 RegistrationError::DuplicateName("dup".to_string()),
1483 ];
1484 for error in &errors {
1485 let msg = format!("{error}");
1486 assert!(!msg.is_empty());
1487 }
1488 }
1489
1490 fn registry_with_policy(policy: PromotionPolicy) -> ControllerRegistry {
1493 let mut r = ControllerRegistry::new();
1494 r.set_promotion_policy(policy);
1495 r
1496 }
1497
1498 fn fast_policy() -> PromotionPolicy {
1499 PromotionPolicy {
1500 min_calibration_score: 0.8,
1501 min_shadow_epochs: 2,
1502 min_canary_epochs: 1,
1503 max_budget_overruns: 3,
1504 policy_id: "test-fast-v1".to_string(),
1505 }
1506 }
1507
1508 #[test]
1509 fn promote_shadow_to_canary() {
1510 let mut registry = registry_with_policy(fast_policy());
1511 let id = registry.register(test_registration("promo")).unwrap();
1512 registry.update_calibration(id, 0.9);
1513 registry.advance_epoch();
1515 registry.advance_epoch();
1516 let result = registry.try_promote(id, ControllerMode::Canary);
1517 assert_eq!(result, Ok(ControllerMode::Canary));
1518 assert_eq!(registry.mode(id), Some(ControllerMode::Canary));
1519 assert_eq!(registry.epochs_in_current_mode(id), Some(0));
1520 }
1521
1522 #[test]
1523 fn promote_canary_to_active() {
1524 let mut registry = registry_with_policy(fast_policy());
1525 let id = registry.register(test_registration("canary-up")).unwrap();
1526 registry.update_calibration(id, 0.95);
1527 registry.advance_epoch();
1528 registry.advance_epoch();
1529 registry.try_promote(id, ControllerMode::Canary).unwrap();
1530 registry.advance_epoch();
1531 let result = registry.try_promote(id, ControllerMode::Active);
1532 assert_eq!(result, Ok(ControllerMode::Active));
1533 }
1534
1535 #[test]
1536 fn promote_rejects_insufficient_epochs() {
1537 let mut registry = registry_with_policy(fast_policy());
1538 let id = registry.register(test_registration("too-soon")).unwrap();
1539 registry.update_calibration(id, 0.9);
1540 registry.advance_epoch();
1542 let result = registry.try_promote(id, ControllerMode::Canary);
1543 assert!(matches!(
1544 result,
1545 Err(PromotionRejection::InsufficientEpochs {
1546 current: 1,
1547 required: 2,
1548 ..
1549 })
1550 ));
1551 }
1552
1553 #[test]
1554 fn promote_rejects_low_calibration() {
1555 let mut registry = registry_with_policy(fast_policy());
1556 let id = registry.register(test_registration("low-cal")).unwrap();
1557 registry.update_calibration(id, 0.5);
1558 registry.advance_epoch();
1559 registry.advance_epoch();
1560 let result = registry.try_promote(id, ControllerMode::Canary);
1561 assert!(matches!(
1562 result,
1563 Err(PromotionRejection::CalibrationTooLow { .. })
1564 ));
1565 }
1566
1567 #[test]
1568 fn promote_rejects_invalid_transition_shadow_to_active() {
1569 let mut registry = registry_with_policy(fast_policy());
1570 let id = registry.register(test_registration("skip")).unwrap();
1571 registry.update_calibration(id, 0.99);
1572 registry.advance_epoch();
1573 registry.advance_epoch();
1574 registry.advance_epoch();
1575 let result = registry.try_promote(id, ControllerMode::Active);
1576 assert!(matches!(
1577 result,
1578 Err(PromotionRejection::InvalidTransition { .. })
1579 ));
1580 }
1581
1582 #[test]
1583 fn promote_rejects_active_to_canary() {
1584 let mut registry = registry_with_policy(fast_policy());
1585 let id = registry.register(test_registration("backward")).unwrap();
1586 registry.update_calibration(id, 0.95);
1587 registry.advance_epoch();
1588 registry.advance_epoch();
1589 registry.try_promote(id, ControllerMode::Canary).unwrap();
1590 registry.advance_epoch();
1591 registry.try_promote(id, ControllerMode::Active).unwrap();
1592 let result = registry.try_promote(id, ControllerMode::Canary);
1593 assert!(matches!(
1594 result,
1595 Err(PromotionRejection::InvalidTransition { .. })
1596 ));
1597 }
1598
1599 #[test]
1600 fn rollback_from_active_to_shadow() {
1601 let mut registry = registry_with_policy(fast_policy());
1602 let id = registry.register(test_registration("rollme")).unwrap();
1603 registry.update_calibration(id, 0.95);
1604 registry.advance_epoch();
1605 registry.advance_epoch();
1606 registry.try_promote(id, ControllerMode::Canary).unwrap();
1607 registry.advance_epoch();
1608 registry.try_promote(id, ControllerMode::Active).unwrap();
1609
1610 let cmd = registry
1611 .rollback(id, RollbackReason::CalibrationRegression { score: 0.3 })
1612 .unwrap();
1613 assert_eq!(registry.mode(id), Some(ControllerMode::Shadow));
1614 assert_eq!(cmd.rolled_back_from, ControllerMode::Active);
1615 assert_eq!(cmd.rolled_back_to, ControllerMode::Shadow);
1616 assert_eq!(cmd.controller_name, "rollme");
1617 assert!(!cmd.remediation.is_empty());
1618 assert!(registry.is_fallback_active(id));
1619 }
1620
1621 #[test]
1622 fn rollback_from_canary_to_shadow() {
1623 let mut registry = registry_with_policy(fast_policy());
1624 let id = registry.register(test_registration("can-roll")).unwrap();
1625 registry.update_calibration(id, 0.9);
1626 registry.advance_epoch();
1627 registry.advance_epoch();
1628 registry.try_promote(id, ControllerMode::Canary).unwrap();
1629
1630 let cmd = registry
1631 .rollback(id, RollbackReason::ManualRollback)
1632 .unwrap();
1633 assert_eq!(cmd.rolled_back_from, ControllerMode::Canary);
1634 assert_eq!(cmd.rolled_back_to, ControllerMode::Shadow);
1635 }
1636
1637 #[test]
1638 fn rollback_from_shadow_returns_none() {
1639 let mut registry = ControllerRegistry::new();
1640 let id = registry
1641 .register(test_registration("already-shadow"))
1642 .unwrap();
1643 assert!(
1644 registry
1645 .rollback(id, RollbackReason::ManualRollback)
1646 .is_none()
1647 );
1648 }
1649
1650 #[test]
1651 fn hold_and_release() {
1652 let mut registry = registry_with_policy(fast_policy());
1653 let id = registry.register(test_registration("holdme")).unwrap();
1654 registry.update_calibration(id, 0.9);
1655 registry.advance_epoch();
1656 registry.advance_epoch();
1657 registry.try_promote(id, ControllerMode::Canary).unwrap();
1658
1659 assert!(registry.hold(id));
1660 assert_eq!(registry.mode(id), Some(ControllerMode::Hold));
1661
1662 let result = registry.try_promote(id, ControllerMode::Active);
1664 assert!(matches!(
1665 result,
1666 Err(PromotionRejection::HeldForInvestigation)
1667 ));
1668
1669 let restored = registry.release_hold(id).unwrap();
1671 assert_eq!(restored, ControllerMode::Canary);
1672 assert_eq!(registry.mode(id), Some(ControllerMode::Canary));
1673 }
1674
1675 #[test]
1676 fn hold_already_held_returns_false() {
1677 let mut registry = ControllerRegistry::new();
1678 let id = registry.register(test_registration("double-hold")).unwrap();
1679 assert!(registry.hold(id));
1680 assert!(!registry.hold(id));
1681 }
1682
1683 #[test]
1684 fn release_non_held_returns_none() {
1685 let mut registry = ControllerRegistry::new();
1686 let id = registry.register(test_registration("not-held")).unwrap();
1687 assert!(registry.release_hold(id).is_none());
1688 }
1689
1690 #[test]
1691 fn fallback_lifecycle() {
1692 let mut registry = registry_with_policy(fast_policy());
1693 let id = registry.register(test_registration("fb")).unwrap();
1694 assert!(!registry.is_fallback_active(id));
1695 registry.update_calibration(id, 0.9);
1696 registry.advance_epoch();
1697 registry.advance_epoch();
1698 registry.try_promote(id, ControllerMode::Canary).unwrap();
1699 registry.rollback(
1700 id,
1701 RollbackReason::FallbackTriggered {
1702 decision_label: "bad-decision".to_string(),
1703 },
1704 );
1705 assert!(registry.is_fallback_active(id));
1706 registry.clear_fallback(id);
1707 assert!(!registry.is_fallback_active(id));
1708 }
1709
1710 #[test]
1711 fn evidence_ledger_records_registration() {
1712 let mut registry = ControllerRegistry::new();
1713 let id = registry.register(test_registration("ledger-reg")).unwrap();
1714 let entries = registry.controller_ledger(id);
1715 assert_eq!(entries.len(), 1);
1716 assert!(matches!(entries[0].event, LedgerEvent::Registered { .. }));
1717 }
1718
1719 #[test]
1720 fn evidence_ledger_records_full_lifecycle() {
1721 let mut registry = registry_with_policy(fast_policy());
1722 let id = registry.register(test_registration("full-life")).unwrap();
1723 registry.update_calibration(id, 0.95);
1724 registry.advance_epoch();
1725 registry.advance_epoch();
1726
1727 registry.try_promote(id, ControllerMode::Canary).unwrap();
1729 registry.advance_epoch();
1730
1731 registry.try_promote(id, ControllerMode::Active).unwrap();
1733
1734 registry.rollback(id, RollbackReason::ManualRollback);
1736
1737 let entries = registry.controller_ledger(id);
1738 assert_eq!(entries.len(), 4);
1740 assert!(matches!(entries[0].event, LedgerEvent::Registered { .. }));
1741 assert!(matches!(
1742 entries[1].event,
1743 LedgerEvent::Promoted {
1744 from: ControllerMode::Shadow,
1745 to: ControllerMode::Canary,
1746 ..
1747 }
1748 ));
1749 assert!(matches!(
1750 entries[2].event,
1751 LedgerEvent::Promoted {
1752 from: ControllerMode::Canary,
1753 to: ControllerMode::Active,
1754 ..
1755 }
1756 ));
1757 assert!(matches!(
1758 entries[3].event,
1759 LedgerEvent::RolledBack {
1760 from: ControllerMode::Active,
1761 to: ControllerMode::Shadow,
1762 ..
1763 }
1764 ));
1765 }
1766
1767 #[test]
1768 fn evidence_ledger_records_decisions() {
1769 let mut registry = ControllerRegistry::new();
1770 let id = registry.register(test_registration("dec-ledger")).unwrap();
1771 let snap_id = registry.next_snapshot_id();
1772 let decision = ControllerDecision {
1773 controller_id: id,
1774 snapshot_id: snap_id,
1775 label: "adjust-streak".to_string(),
1776 payload: serde_json::Value::Null,
1777 confidence: 0.9,
1778 fallback_label: "noop".to_string(),
1779 };
1780 registry.record_decision(&decision);
1781 let entries = registry.controller_ledger(id);
1782 assert_eq!(entries.len(), 2);
1784 assert!(matches!(
1785 &entries[1].event,
1786 LedgerEvent::DecisionRecorded { label, within_budget: true } if label == "adjust-streak"
1787 ));
1788 }
1789
1790 #[test]
1791 fn evidence_ledger_records_promotion_rejections() {
1792 let mut registry = registry_with_policy(fast_policy());
1793 let id = registry
1794 .register(test_registration("reject-ledger"))
1795 .unwrap();
1796 registry.update_calibration(id, 0.5);
1797 registry.advance_epoch();
1798 registry.advance_epoch();
1799 let _ = registry.try_promote(id, ControllerMode::Canary);
1800 let entries = registry.controller_ledger(id);
1801 assert_eq!(entries.len(), 2);
1803 assert!(matches!(
1804 entries[1].event,
1805 LedgerEvent::PromotionRejected { .. }
1806 ));
1807 }
1808
1809 #[test]
1810 fn evidence_ledger_records_hold_and_release() {
1811 let mut registry = ControllerRegistry::new();
1812 let id = registry.register(test_registration("hold-ledger")).unwrap();
1813 registry.hold(id);
1814 registry.release_hold(id);
1815 let entries = registry.controller_ledger(id);
1816 assert_eq!(entries.len(), 3);
1818 assert!(matches!(entries[1].event, LedgerEvent::Held { .. }));
1819 assert!(matches!(entries[2].event, LedgerEvent::Released { .. }));
1820 }
1821
1822 #[test]
1823 fn evidence_ledger_records_deregistration() {
1824 let mut registry = ControllerRegistry::new();
1825 let id = registry
1826 .register(test_registration("dereg-ledger"))
1827 .unwrap();
1828 registry.deregister(id);
1829 let entries = registry.controller_ledger(id);
1830 assert_eq!(entries.len(), 2);
1832 assert!(matches!(entries[1].event, LedgerEvent::Deregistered));
1833 }
1834
1835 #[test]
1836 fn ledger_entry_ids_are_monotonic() {
1837 let mut registry = ControllerRegistry::new();
1838 let id = registry.register(test_registration("mono")).unwrap();
1839 registry.hold(id);
1840 registry.release_hold(id);
1841 let ledger = registry.evidence_ledger();
1842 for pair in ledger.windows(2) {
1843 assert!(pair[0].entry_id < pair[1].entry_id);
1844 }
1845 }
1846
1847 #[test]
1848 fn ledger_entries_carry_policy_id() {
1849 let policy = fast_policy();
1850 let expected_id = policy.policy_id.clone();
1851 let mut registry = registry_with_policy(policy);
1852 let id = registry
1853 .register(test_registration("policy-trace"))
1854 .unwrap();
1855 registry.hold(id);
1856 for entry in registry.controller_ledger(id) {
1857 assert_eq!(entry.policy_id, expected_id);
1858 }
1859 }
1860
1861 #[test]
1862 fn budget_overruns_tracked() {
1863 let mut registry = ControllerRegistry::new();
1864 let id = registry.register(test_registration("overruns")).unwrap();
1865 let snap_id = registry.next_snapshot_id();
1866 let decision = ControllerDecision {
1867 controller_id: id,
1868 snapshot_id: snap_id,
1869 label: "test".to_string(),
1870 payload: serde_json::Value::Null,
1871 confidence: 0.9,
1872 fallback_label: "noop".to_string(),
1873 };
1874 registry.record_decision(&decision);
1876 registry.record_decision(&decision);
1877 registry.record_decision(&decision);
1878 assert_eq!(registry.budget_overruns(id), Some(2));
1879 }
1880
1881 #[test]
1882 fn advance_epoch_increments_mode_counter() {
1883 let mut registry = ControllerRegistry::new();
1884 let id = registry.register(test_registration("epoch-count")).unwrap();
1885 assert_eq!(registry.epochs_in_current_mode(id), Some(0));
1886 registry.advance_epoch();
1887 assert_eq!(registry.epochs_in_current_mode(id), Some(1));
1888 registry.advance_epoch();
1889 assert_eq!(registry.epochs_in_current_mode(id), Some(2));
1890 }
1891
1892 #[test]
1893 fn recovery_command_has_remediation() {
1894 let mut registry = registry_with_policy(fast_policy());
1895 let id = registry.register(test_registration("recovery")).unwrap();
1896 registry.update_calibration(id, 0.95);
1897 registry.advance_epoch();
1898 registry.advance_epoch();
1899 registry.try_promote(id, ControllerMode::Canary).unwrap();
1900
1901 let cmd = registry
1902 .rollback(id, RollbackReason::BudgetOverruns { count: 5 })
1903 .unwrap();
1904 assert_eq!(cmd.policy_id, "test-fast-v1");
1905 assert!(!cmd.remediation.is_empty());
1906 assert!(cmd.remediation.iter().any(|r| r.contains("budget")));
1907 }
1908
1909 #[test]
1910 fn recovery_command_for_fallback_triggered() {
1911 let mut registry = registry_with_policy(fast_policy());
1912 let id = registry
1913 .register(test_registration("fallback-cmd"))
1914 .unwrap();
1915 registry.update_calibration(id, 0.9);
1916 registry.advance_epoch();
1917 registry.advance_epoch();
1918 registry.try_promote(id, ControllerMode::Canary).unwrap();
1919
1920 let cmd = registry
1921 .rollback(
1922 id,
1923 RollbackReason::FallbackTriggered {
1924 decision_label: "bad-action".to_string(),
1925 },
1926 )
1927 .unwrap();
1928 assert!(cmd.remediation.iter().any(|r| r.contains("bad-action")));
1929 }
1930
1931 #[test]
1932 fn structured_log_covers_promotion_and_rollback() {
1933 use parking_lot::Mutex;
1934 let logs: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
1935 let logs_clone = Arc::clone(&logs);
1936 let mut registry = registry_with_policy(fast_policy());
1937 registry = registry.with_log_sink(Arc::new(move |msg: &str| {
1938 logs_clone.lock().push(msg.to_string());
1939 }));
1940 let id = registry.register(test_registration("log-promo")).unwrap();
1941 registry.update_calibration(id, 0.9);
1942 registry.advance_epoch();
1943 registry.advance_epoch();
1944 registry.try_promote(id, ControllerMode::Canary).unwrap();
1945 registry.rollback(id, RollbackReason::ManualRollback);
1946
1947 {
1948 let captured = logs.lock();
1949 assert!(captured.iter().any(|l| l.contains("controller_promoted")));
1950 assert!(
1951 captured
1952 .iter()
1953 .any(|l| l.contains("controller_rolled_back"))
1954 );
1955 assert!(
1956 captured
1957 .iter()
1958 .any(|l| l.contains("policy_id=test-fast-v1"))
1959 );
1960 drop(captured);
1961 }
1962 }
1963
1964 #[test]
1965 fn structured_log_covers_promotion_rejection() {
1966 use parking_lot::Mutex;
1967 let logs: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
1968 let logs_clone = Arc::clone(&logs);
1969 let mut registry = registry_with_policy(fast_policy());
1970 registry = registry.with_log_sink(Arc::new(move |msg: &str| {
1971 logs_clone.lock().push(msg.to_string());
1972 }));
1973 let id = registry.register(test_registration("log-reject")).unwrap();
1974 registry.update_calibration(id, 0.5);
1975 registry.advance_epoch();
1976 registry.advance_epoch();
1977 let _ = registry.try_promote(id, ControllerMode::Canary);
1978
1979 {
1980 let captured = logs.lock();
1981 assert!(
1982 captured
1983 .iter()
1984 .any(|l| l.contains("controller_promotion_rejected"))
1985 );
1986 drop(captured);
1987 }
1988 }
1989
1990 #[test]
1991 fn structured_log_covers_hold_and_release() {
1992 use parking_lot::Mutex;
1993 let logs: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
1994 let logs_clone = Arc::clone(&logs);
1995 let mut registry = ControllerRegistry::new();
1996 registry = registry.with_log_sink(Arc::new(move |msg: &str| {
1997 logs_clone.lock().push(msg.to_string());
1998 }));
1999 let id = registry.register(test_registration("log-hold")).unwrap();
2000 registry.hold(id);
2001 registry.release_hold(id);
2002
2003 {
2004 let captured = logs.lock();
2005 assert!(captured.iter().any(|l| l.contains("controller_held")));
2006 assert!(captured.iter().any(|l| l.contains("controller_released")));
2007 drop(captured);
2008 }
2009 }
2010
2011 #[test]
2012 fn promotion_rejection_display_coverage() {
2013 let rejections = [
2014 PromotionRejection::ControllerNotFound,
2015 PromotionRejection::CalibrationTooLow {
2016 current: 0.5,
2017 required: 0.8,
2018 },
2019 PromotionRejection::InsufficientEpochs {
2020 current: 1,
2021 required: 3,
2022 mode: ControllerMode::Shadow,
2023 },
2024 PromotionRejection::InvalidTransition {
2025 from: ControllerMode::Shadow,
2026 to: ControllerMode::Active,
2027 },
2028 PromotionRejection::HeldForInvestigation,
2029 ];
2030 for rejection in &rejections {
2031 let msg = format!("{rejection}");
2032 assert!(!msg.is_empty());
2033 }
2034 }
2035
2036 #[test]
2037 fn rollback_reason_display_coverage() {
2038 let reasons = [
2039 RollbackReason::CalibrationRegression { score: 0.3 },
2040 RollbackReason::BudgetOverruns { count: 5 },
2041 RollbackReason::ManualRollback,
2042 RollbackReason::FallbackTriggered {
2043 decision_label: "test".to_string(),
2044 },
2045 ];
2046 for reason in &reasons {
2047 let msg = format!("{reason}");
2048 assert!(!msg.is_empty());
2049 }
2050 }
2051
2052 #[test]
2053 fn e2e_promotion_cannot_bypass_verification() {
2054 let mut registry = registry_with_policy(fast_policy());
2056 let id = registry
2057 .register(test_registration("bypass-attempt"))
2058 .unwrap();
2059
2060 registry.update_calibration(id, 0.99);
2062 for _ in 0..10 {
2063 registry.advance_epoch();
2064 }
2065 assert!(matches!(
2066 registry.try_promote(id, ControllerMode::Active),
2067 Err(PromotionRejection::InvalidTransition { .. })
2068 ));
2069
2070 registry.update_calibration(id, 0.1);
2072 assert!(matches!(
2073 registry.try_promote(id, ControllerMode::Canary),
2074 Err(PromotionRejection::CalibrationTooLow { .. })
2075 ));
2076
2077 registry.update_calibration(id, 0.99);
2079 registry.set_mode(id, ControllerMode::Shadow);
2081 let id2 = registry
2086 .register(test_registration("correct-path"))
2087 .unwrap();
2088 registry.update_calibration(id2, 0.9);
2089 assert!(registry.try_promote(id2, ControllerMode::Canary).is_err()); registry.advance_epoch();
2091 assert!(registry.try_promote(id2, ControllerMode::Canary).is_err()); registry.advance_epoch();
2093 assert!(registry.try_promote(id2, ControllerMode::Canary).is_ok()); assert!(registry.try_promote(id2, ControllerMode::Active).is_err()); registry.advance_epoch();
2096 assert!(registry.try_promote(id2, ControllerMode::Active).is_ok()); assert_eq!(registry.mode(id2), Some(ControllerMode::Active));
2098 }
2099
2100 #[test]
2101 fn e2e_failed_rollout_leaves_conservative_state() {
2102 let mut registry = registry_with_policy(fast_policy());
2103 let id = registry
2104 .register(test_registration("failed-rollout"))
2105 .unwrap();
2106 registry.update_calibration(id, 0.9);
2107 registry.advance_epoch();
2108 registry.advance_epoch();
2109 registry.try_promote(id, ControllerMode::Canary).unwrap();
2110 registry.advance_epoch();
2111 registry.try_promote(id, ControllerMode::Active).unwrap();
2112
2113 registry.update_calibration(id, 0.2);
2115 let cmd = registry
2116 .rollback(id, RollbackReason::CalibrationRegression { score: 0.2 })
2117 .unwrap();
2118
2119 assert_eq!(registry.mode(id), Some(ControllerMode::Shadow));
2121 assert!(registry.is_fallback_active(id));
2122 assert_eq!(cmd.rolled_back_to, ControllerMode::Shadow);
2123 assert!(!cmd.remediation.is_empty());
2124
2125 assert!(registry.try_promote(id, ControllerMode::Canary).is_err());
2127 }
2128
2129 #[test]
2130 fn e2e_hold_blocks_entire_pipeline() {
2131 let mut registry = registry_with_policy(fast_policy());
2132 let id = registry.register(test_registration("hold-block")).unwrap();
2133 registry.update_calibration(id, 0.99);
2134 registry.advance_epoch();
2135 registry.advance_epoch();
2136
2137 registry.hold(id);
2138 assert!(matches!(
2140 registry.try_promote(id, ControllerMode::Canary),
2141 Err(PromotionRejection::HeldForInvestigation)
2142 ));
2143
2144 registry.release_hold(id);
2146 registry.advance_epoch();
2148 registry.advance_epoch();
2149 assert!(registry.try_promote(id, ControllerMode::Canary).is_ok());
2150 }
2151
2152 #[test]
2153 fn recovery_command_serializable() {
2154 let cmd = RecoveryCommand {
2155 controller_id: ControllerId(42),
2156 controller_name: "test-ctrl".to_string(),
2157 rolled_back_from: ControllerMode::Active,
2158 rolled_back_to: ControllerMode::Shadow,
2159 reason: RollbackReason::ManualRollback,
2160 policy_id: "test-v1".to_string(),
2161 at_snapshot_id: Some(SnapshotId(100)),
2162 remediation: vec!["check logs".to_string()],
2163 };
2164 let json = serde_json::to_string(&cmd).unwrap();
2165 let deser: RecoveryCommand = serde_json::from_str(&json).unwrap();
2166 assert_eq!(deser.controller_id, ControllerId(42));
2167 assert_eq!(deser.controller_name, "test-ctrl");
2168 }
2169
2170 #[test]
2171 fn evidence_ledger_entry_serializable() {
2172 let entry = EvidenceLedgerEntry {
2173 entry_id: 1,
2174 controller_id: ControllerId(1),
2175 snapshot_id: Some(SnapshotId(5)),
2176 event: LedgerEvent::Promoted {
2177 from: ControllerMode::Shadow,
2178 to: ControllerMode::Canary,
2179 calibration_score: 0.85,
2180 },
2181 policy_id: "test".to_string(),
2182 timestamp: Time::ZERO,
2183 };
2184 let json = serde_json::to_string(&entry).unwrap();
2185 let deser: EvidenceLedgerEntry = serde_json::from_str(&json).unwrap();
2186 assert_eq!(deser.entry_id, 1);
2187 }
2188
2189 #[test]
2190 fn default_promotion_policy_values() {
2191 let policy = PromotionPolicy::default();
2192 assert!((policy.min_calibration_score - 0.8).abs() < f64::EPSILON);
2193 assert_eq!(policy.min_shadow_epochs, 3);
2194 assert_eq!(policy.min_canary_epochs, 2);
2195 assert_eq!(policy.max_budget_overruns, 3);
2196 assert_eq!(policy.policy_id, "default-promotion-policy-v1");
2197 }
2198}