dsfb_semiconductor/missingness.rs
1//! Missingness-aware grammar: invalidates drift computation when sensor data
2//! is absent for more than [`MAX_CONSECUTIVE_MISSING_RUNS`] consecutive runs.
3//!
4//! # Hardware Reality Check
5//! Semiconductor sensors fail. An MFC with a broken transducer will produce
6//! a flat line of zeros, not NaN — and a naive DSFB engine will interpret
7//! a sustained zero residual as "nominal" when the truth is "unknown."
8//!
9//! More dangerously, imputed values (mean-fill) can accumulate into a
10//! spurious drift signal over a long outage window, causing the engine to
11//! escalate a phantom anomaly that exists only in the imputation model.
12//!
13//! # Policy
14//! * If a feature's sensor is missing for `> MAX_CONSECUTIVE_MISSING_RUNS`
15//! consecutive runs, the drift value `d` is **invalidated** and set to
16//! [`DriftValidity::Unknown`].
17//! * Grammar transitions from [`DriftValidity::Unknown`] features are
18//! **suppressed** — the feature is held at its last valid grammar state with
19//! a `suppressed_by_missingness` flag set to `true`.
20//! * The outage event is recorded verbatim in the traceability manifest,
21//! preserving the audit trail.
22
23use serde::{Deserialize, Serialize};
24use std::collections::HashMap;
25
26/// Maximum number of consecutive missing runs before drift is invalidated.
27pub const MAX_CONSECUTIVE_MISSING_RUNS: usize = 3;
28
29// ─── Drift Validity ───────────────────────────────────────────────────────────
30
31/// Marks whether the first-difference (drift) value for a run is valid.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
33pub enum DriftValidity {
34 /// Drift computed from two consecutive non-missing values.
35 Valid,
36 /// Drift invalidated due to missing data beyond the permitted window.
37 Unknown,
38}
39
40// ─── Feature Missingness Tracker ─────────────────────────────────────────────
41
42/// Per-feature missingness tracker.
43///
44/// Accumulates consecutive missing runs and emits [`DriftValidity::Unknown`]
45/// once the threshold is exceeded.
46#[derive(Debug, Clone, Default, Serialize, Deserialize)]
47pub struct FeatureMissingnessTracker {
48 /// Feature identifier.
49 pub feature_id: String,
50 /// Current consecutive missing run count.
51 pub consecutive_missing: usize,
52 /// Total missing runs across the entire run sequence.
53 pub total_missing: usize,
54 /// Run indices where missingness triggered drift invalidation.
55 pub invalidation_events: Vec<usize>,
56}
57
58impl FeatureMissingnessTracker {
59 pub fn new(feature_id: impl Into<String>) -> Self {
60 Self {
61 feature_id: feature_id.into(),
62 ..Default::default()
63 }
64 }
65
66 /// Update the tracker for a single run.
67 ///
68 /// * `is_missing` — whether the sensor value for this run is absent.
69 /// * `run_index` — zero-based run counter (used in invalidation events).
70 ///
71 /// Returns the [`DriftValidity`] for this run.
72 pub fn update(&mut self, is_missing: bool, run_index: usize) -> DriftValidity {
73 if is_missing {
74 self.consecutive_missing += 1;
75 self.total_missing += 1;
76 } else {
77 self.consecutive_missing = 0;
78 }
79
80 if self.consecutive_missing > MAX_CONSECUTIVE_MISSING_RUNS {
81 self.invalidation_events.push(run_index);
82 DriftValidity::Unknown
83 } else {
84 DriftValidity::Valid
85 }
86 }
87
88 /// Returns `true` if drift is currently invalidated.
89 #[must_use]
90 pub fn is_invalidated(&self) -> bool {
91 self.consecutive_missing > MAX_CONSECUTIVE_MISSING_RUNS
92 }
93}
94
95// ─── Missingness-Aware Run Record ─────────────────────────────────────────────
96
97/// The annotated record for a single run after missingness processing.
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct MissingnessAwareRecord {
100 pub run_index: usize,
101 pub feature_id: String,
102 /// The observed or imputed value.
103 pub value: f64,
104 /// Whether the original observation was missing.
105 pub is_missing: bool,
106 /// Drift validity for this point.
107 pub drift_validity: DriftValidity,
108 /// Whether the grammar transition at this point is suppressed.
109 pub suppressed_by_missingness: bool,
110}
111
112// ─── Missingness-Aware Grammar Filter ────────────────────────────────────────
113
114/// Applies the missingness policy across all features in a run sequence.
115///
116/// Call [`MissingnessAwareGrammar::process`] once per feature with the
117/// raw imputed-value vector and the corresponding `is_imputed` mask.
118///
119/// The returned [`MissingnessAwareRecord`] vector can be used to gate the
120/// downstream grammar layer: any record with
121/// `suppressed_by_missingness = true` should be held at the previous
122/// grammar state rather than allowing a new transition.
123#[derive(Debug, Default)]
124pub struct MissingnessAwareGrammar {
125 trackers: HashMap<String, FeatureMissingnessTracker>,
126}
127
128impl MissingnessAwareGrammar {
129 pub fn new() -> Self {
130 Self::default()
131 }
132
133 /// Process a feature's run sequence and return annotated records.
134 ///
135 /// # Arguments
136 /// * `feature_id` — sensor identifier.
137 /// * `values` — slice of imputed sensor values (one per run).
138 /// * `is_imputed` — parallel slice indicating which values were imputed
139 /// (i.e., the original sensor reading was missing).
140 pub fn process(
141 &mut self,
142 feature_id: &str,
143 values: &[f64],
144 is_imputed: &[bool],
145 ) -> Vec<MissingnessAwareRecord> {
146 assert_eq!(
147 values.len(),
148 is_imputed.len(),
149 "values and is_imputed must have equal length"
150 );
151
152 let tracker = self
153 .trackers
154 .entry(feature_id.to_string())
155 .or_insert_with(|| FeatureMissingnessTracker::new(feature_id));
156
157 // Reset between calls — each call processes a fresh run sequence.
158 tracker.consecutive_missing = 0;
159 tracker.invalidation_events.clear();
160
161 values
162 .iter()
163 .zip(is_imputed.iter())
164 .enumerate()
165 .map(|(run_index, (&value, &is_missing))| {
166 let drift_validity = tracker.update(is_missing, run_index);
167 MissingnessAwareRecord {
168 run_index,
169 feature_id: feature_id.to_string(),
170 value,
171 is_missing,
172 drift_validity,
173 suppressed_by_missingness: drift_validity == DriftValidity::Unknown,
174 }
175 })
176 .collect()
177 }
178
179 /// Return all feature trackers for serialisation into the traceability
180 /// manifest.
181 pub fn trackers(&self) -> &HashMap<String, FeatureMissingnessTracker> {
182 &self.trackers
183 }
184
185 /// Return a summary for embedding in the run manifest JSON.
186 pub fn summary(&self) -> MissingSummary {
187 let total_features = self.trackers.len();
188 let features_with_invalidations = self
189 .trackers
190 .values()
191 .filter(|t| !t.invalidation_events.is_empty())
192 .count();
193 let total_invalidation_events: usize = self
194 .trackers
195 .values()
196 .map(|t| t.invalidation_events.len())
197 .sum();
198 let total_missing_observations: usize =
199 self.trackers.values().map(|t| t.total_missing).sum();
200
201 MissingSummary {
202 total_features,
203 features_with_invalidations,
204 total_invalidation_events,
205 total_missing_observations,
206 max_consecutive_missing_threshold: MAX_CONSECUTIVE_MISSING_RUNS,
207 }
208 }
209}
210
211// ─── Summary ────────────────────────────────────────────────────────────────
212
213/// Compact summary of missingness across all features — emitted in the
214/// run manifest JSON for audit.
215#[derive(Debug, Clone, Serialize, Deserialize)]
216pub struct MissingSummary {
217 pub total_features: usize,
218 pub features_with_invalidations: usize,
219 pub total_invalidation_events: usize,
220 pub total_missing_observations: usize,
221 pub max_consecutive_missing_threshold: usize,
222}
223
224// ─── Unit tests ───────────────────────────────────────────────────────────────
225
226#[cfg(test)]
227mod tests {
228 use super::*;
229
230 #[test]
231 fn tracker_valid_below_threshold() {
232 let mut t = FeatureMissingnessTracker::new("S001");
233 assert_eq!(t.update(true, 0), DriftValidity::Valid);
234 assert_eq!(t.update(true, 1), DriftValidity::Valid);
235 assert_eq!(t.update(true, 2), DriftValidity::Valid);
236 // exactly at threshold: still Valid
237 assert_eq!(t.consecutive_missing, 3);
238 }
239
240 #[test]
241 fn tracker_invalidates_after_threshold() {
242 let mut t = FeatureMissingnessTracker::new("S002");
243 for i in 0..=3 {
244 t.update(true, i);
245 }
246 // 4th consecutive missing → Unknown
247 assert_eq!(t.update(true, 4), DriftValidity::Unknown);
248 assert!(t.is_invalidated());
249 }
250
251 #[test]
252 fn tracker_resets_on_valid_observation() {
253 let mut t = FeatureMissingnessTracker::new("S003");
254 for i in 0..10 {
255 t.update(true, i);
256 }
257 // Valid observation resets streak
258 assert_eq!(t.update(false, 10), DriftValidity::Valid);
259 assert_eq!(t.consecutive_missing, 0);
260 assert!(!t.is_invalidated());
261 }
262
263 #[test]
264 fn grammar_filter_suppresses_after_threshold() {
265 let mut grammar = MissingnessAwareGrammar::new();
266 let values: Vec<f64> = vec![0.0; 8];
267 // First 4 are missing, rest are present
268 let is_imputed = vec![true, true, true, true, false, false, false, false];
269 let records = grammar.process("S001", &values, &is_imputed);
270
271 // Runs 0-2: consecutive missing ≤ 3 → Valid
272 assert_eq!(records[0].drift_validity, DriftValidity::Valid);
273 assert_eq!(records[2].drift_validity, DriftValidity::Valid);
274 // Run 3: 4th consecutive missing → Unknown
275 assert_eq!(records[3].drift_validity, DriftValidity::Unknown);
276 assert!(records[3].suppressed_by_missingness);
277 // After valid observation, back to Valid
278 assert_eq!(records[4].drift_validity, DriftValidity::Valid);
279 assert!(!records[4].suppressed_by_missingness);
280 }
281
282 #[test]
283 fn summary_counts_invalidated_features() {
284 let mut grammar = MissingnessAwareGrammar::new();
285 // Feature with >3 consecutive missing
286 let values = vec![0.0; 5];
287 let all_missing = vec![true; 5];
288 grammar.process("S_BAD", &values, &all_missing);
289
290 // Feature with no missingness
291 let none_missing = vec![false; 5];
292 grammar.process("S_GOOD", &values, &none_missing);
293
294 let summary = grammar.summary();
295 assert_eq!(summary.total_features, 2);
296 assert_eq!(summary.features_with_invalidations, 1);
297 }
298}