entrenar/monitor/inference/safety_andon/
andon.rs1use super::emergency::EmergencyCondition;
4use super::sil::SafetyIntegrityLevel;
5use crate::monitor::andon::{Alert, AndonSystem};
6use crate::monitor::inference::path::DecisionPath;
7use crate::monitor::inference::trace::DecisionTrace;
8
9pub struct SafetyAndon {
17 andon: AndonSystem,
19 sil: SafetyIntegrityLevel,
21 pub(crate) min_confidence: f32,
23 pub(crate) max_latency_ns: u64,
25 low_confidence_count: usize,
27 pub(crate) low_confidence_threshold: usize,
29 pub(crate) alert_on_unknown: bool,
31}
32
33impl SafetyAndon {
34 pub fn new(sil: SafetyIntegrityLevel) -> Self {
36 Self {
37 andon: AndonSystem::new(),
38 min_confidence: sil.min_confidence(),
39 max_latency_ns: sil.max_latency_ns(),
40 sil,
41 low_confidence_count: 0,
42 low_confidence_threshold: 5,
43 alert_on_unknown: true,
44 }
45 }
46
47 pub fn with_min_confidence(mut self, threshold: f32) -> Self {
49 self.min_confidence = threshold;
50 self
51 }
52
53 pub fn with_max_latency_ns(mut self, max_ns: u64) -> Self {
55 self.max_latency_ns = max_ns;
56 self
57 }
58
59 pub fn with_low_confidence_threshold(mut self, threshold: usize) -> Self {
61 self.low_confidence_threshold = threshold;
62 self
63 }
64
65 pub fn without_unknown_alerts(mut self) -> Self {
67 self.alert_on_unknown = false;
68 self
69 }
70
71 pub fn check_trace<P: DecisionPath>(
73 &mut self,
74 trace: &DecisionTrace<P>,
75 latency_budget_ns: u64,
76 ) {
77 let confidence = trace.confidence();
78 let latency_ns = trace.latency_ns;
79
80 if trace.output.is_nan() || trace.output.is_infinite() {
82 self.trigger_emergency(EmergencyCondition::InvalidOutput);
83 return;
84 }
85
86 if confidence < self.min_confidence {
88 self.low_confidence_count += 1;
89
90 if self.low_confidence_count >= self.low_confidence_threshold {
91 self.trigger_emergency(EmergencyCondition::ConsecutiveLowConfidence {
92 count: self.low_confidence_count,
93 threshold: self.min_confidence,
94 });
95 } else {
96 self.andon.trigger(
97 Alert::warning(format!(
98 "Low confidence: {:.1}% (threshold: {:.1}%)",
99 confidence * 100.0,
100 self.min_confidence * 100.0
101 ))
102 .with_source("SafetyAndon")
103 .with_value(f64::from(confidence)),
104 );
105 }
106 } else {
107 self.low_confidence_count = 0;
108 }
109
110 let effective_budget = latency_budget_ns.min(self.max_latency_ns);
112 if latency_ns > effective_budget {
113 let latency_ms = latency_ns as f64 / 1_000_000.0;
114 let budget_ms = effective_budget as f64 / 1_000_000.0;
115
116 if latency_ns > self.max_latency_ns * 2 {
117 self.trigger_emergency(EmergencyCondition::DecisionTimeout {
119 max_ms: budget_ms as f32,
120 });
121 } else {
122 self.andon.trigger(
123 Alert::warning(format!(
124 "Latency exceeded: {latency_ms:.2}ms > {budget_ms:.2}ms budget"
125 ))
126 .with_source("SafetyAndon")
127 .with_value(latency_ms),
128 );
129 }
130 }
131 }
132
133 pub fn trigger_emergency(&mut self, condition: EmergencyCondition) {
135 let alert =
136 Alert::new(condition.alert_level(), condition.message()).with_source("SafetyAndon");
137 self.andon.trigger(alert);
138 }
139
140 pub fn should_stop(&self) -> bool {
142 self.andon.should_stop()
143 }
144
145 pub fn reset(&mut self) {
147 self.andon.reset();
148 self.low_confidence_count = 0;
149 }
150
151 pub fn history(&self) -> &[Alert] {
153 self.andon.history()
154 }
155
156 pub fn sil(&self) -> SafetyIntegrityLevel {
158 self.sil
159 }
160
161 pub fn andon(&self) -> &AndonSystem {
163 &self.andon
164 }
165}
166
167impl Default for SafetyAndon {
168 fn default() -> Self {
169 Self::new(SafetyIntegrityLevel::QM)
170 }
171}