Skip to main content

dsfb_semiconductor/
missingness.rs

1//! Missingness-aware grammar: invalidates drift computation when sensor data
2//! is absent for more than [`MAX_CONSECUTIVE_MISSING_RUNS`] consecutive runs.
3//!
4//! # Hardware Reality Check
5//! Semiconductor sensors fail.  An MFC with a broken transducer will produce
6//! a flat line of zeros, not NaN — and a naive DSFB engine will interpret
7//! a sustained zero residual as "nominal" when the truth is "unknown."
8//!
9//! More dangerously, imputed values (mean-fill) can accumulate into a
10//! spurious drift signal over a long outage window, causing the engine to
11//! escalate a phantom anomaly that exists only in the imputation model.
12//!
13//! # Policy
14//! * If a feature's sensor is missing for `> MAX_CONSECUTIVE_MISSING_RUNS`
15//!   consecutive runs, the drift value `d` is **invalidated** and set to
16//!   [`DriftValidity::Unknown`].
17//! * Grammar transitions from [`DriftValidity::Unknown`] features are
18//!   **suppressed** — the feature is held at its last valid grammar state with
19//!   a `suppressed_by_missingness` flag set to `true`.
20//! * The outage event is recorded verbatim in the traceability manifest,
21//!   preserving the audit trail.
22
23use serde::{Deserialize, Serialize};
24use std::collections::HashMap;
25
26/// Maximum number of consecutive missing runs before drift is invalidated.
27pub const MAX_CONSECUTIVE_MISSING_RUNS: usize = 3;
28
29// ─── Drift Validity ───────────────────────────────────────────────────────────
30
31/// Marks whether the first-difference (drift) value for a run is valid.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
33pub enum DriftValidity {
34    /// Drift computed from two consecutive non-missing values.
35    Valid,
36    /// Drift invalidated due to missing data beyond the permitted window.
37    Unknown,
38}
39
40// ─── Feature Missingness Tracker ─────────────────────────────────────────────
41
42/// Per-feature missingness tracker.
43///
44/// Accumulates consecutive missing runs and emits [`DriftValidity::Unknown`]
45/// once the threshold is exceeded.
46#[derive(Debug, Clone, Default, Serialize, Deserialize)]
47pub struct FeatureMissingnessTracker {
48    /// Feature identifier.
49    pub feature_id: String,
50    /// Current consecutive missing run count.
51    pub consecutive_missing: usize,
52    /// Total missing runs across the entire run sequence.
53    pub total_missing: usize,
54    /// Run indices where missingness triggered drift invalidation.
55    pub invalidation_events: Vec<usize>,
56}
57
58impl FeatureMissingnessTracker {
59    pub fn new(feature_id: impl Into<String>) -> Self {
60        Self {
61            feature_id: feature_id.into(),
62            ..Default::default()
63        }
64    }
65
66    /// Update the tracker for a single run.
67    ///
68    /// * `is_missing` — whether the sensor value for this run is absent.
69    /// * `run_index` — zero-based run counter (used in invalidation events).
70    ///
71    /// Returns the [`DriftValidity`] for this run.
72    pub fn update(&mut self, is_missing: bool, run_index: usize) -> DriftValidity {
73        if is_missing {
74            self.consecutive_missing += 1;
75            self.total_missing += 1;
76        } else {
77            self.consecutive_missing = 0;
78        }
79
80        if self.consecutive_missing > MAX_CONSECUTIVE_MISSING_RUNS {
81            self.invalidation_events.push(run_index);
82            DriftValidity::Unknown
83        } else {
84            DriftValidity::Valid
85        }
86    }
87
88    /// Returns `true` if drift is currently invalidated.
89    #[must_use]
90    pub fn is_invalidated(&self) -> bool {
91        self.consecutive_missing > MAX_CONSECUTIVE_MISSING_RUNS
92    }
93}
94
95// ─── Missingness-Aware Run Record ─────────────────────────────────────────────
96
97/// The annotated record for a single run after missingness processing.
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct MissingnessAwareRecord {
100    pub run_index: usize,
101    pub feature_id: String,
102    /// The observed or imputed value.
103    pub value: f64,
104    /// Whether the original observation was missing.
105    pub is_missing: bool,
106    /// Drift validity for this point.
107    pub drift_validity: DriftValidity,
108    /// Whether the grammar transition at this point is suppressed.
109    pub suppressed_by_missingness: bool,
110}
111
112// ─── Missingness-Aware Grammar Filter ────────────────────────────────────────
113
114/// Applies the missingness policy across all features in a run sequence.
115///
116/// Call [`MissingnessAwareGrammar::process`] once per feature with the
117/// raw imputed-value vector and the corresponding `is_imputed` mask.
118///
119/// The returned [`MissingnessAwareRecord`] vector can be used to gate the
120/// downstream grammar layer: any record with
121/// `suppressed_by_missingness = true` should be held at the previous
122/// grammar state rather than allowing a new transition.
123#[derive(Debug, Default)]
124pub struct MissingnessAwareGrammar {
125    trackers: HashMap<String, FeatureMissingnessTracker>,
126}
127
128impl MissingnessAwareGrammar {
129    pub fn new() -> Self {
130        Self::default()
131    }
132
133    /// Process a feature's run sequence and return annotated records.
134    ///
135    /// # Arguments
136    /// * `feature_id` — sensor identifier.
137    /// * `values` — slice of imputed sensor values (one per run).
138    /// * `is_imputed` — parallel slice indicating which values were imputed
139    ///   (i.e., the original sensor reading was missing).
140    pub fn process(
141        &mut self,
142        feature_id: &str,
143        values: &[f64],
144        is_imputed: &[bool],
145    ) -> Vec<MissingnessAwareRecord> {
146        assert_eq!(
147            values.len(),
148            is_imputed.len(),
149            "values and is_imputed must have equal length"
150        );
151
152        let tracker = self
153            .trackers
154            .entry(feature_id.to_string())
155            .or_insert_with(|| FeatureMissingnessTracker::new(feature_id));
156
157        // Reset between calls — each call processes a fresh run sequence.
158        tracker.consecutive_missing = 0;
159        tracker.invalidation_events.clear();
160
161        values
162            .iter()
163            .zip(is_imputed.iter())
164            .enumerate()
165            .map(|(run_index, (&value, &is_missing))| {
166                let drift_validity = tracker.update(is_missing, run_index);
167                MissingnessAwareRecord {
168                    run_index,
169                    feature_id: feature_id.to_string(),
170                    value,
171                    is_missing,
172                    drift_validity,
173                    suppressed_by_missingness: drift_validity == DriftValidity::Unknown,
174                }
175            })
176            .collect()
177    }
178
179    /// Return all feature trackers for serialisation into the traceability
180    /// manifest.
181    pub fn trackers(&self) -> &HashMap<String, FeatureMissingnessTracker> {
182        &self.trackers
183    }
184
185    /// Return a summary for embedding in the run manifest JSON.
186    pub fn summary(&self) -> MissingSummary {
187        let total_features = self.trackers.len();
188        let features_with_invalidations = self
189            .trackers
190            .values()
191            .filter(|t| !t.invalidation_events.is_empty())
192            .count();
193        let total_invalidation_events: usize = self
194            .trackers
195            .values()
196            .map(|t| t.invalidation_events.len())
197            .sum();
198        let total_missing_observations: usize =
199            self.trackers.values().map(|t| t.total_missing).sum();
200
201        MissingSummary {
202            total_features,
203            features_with_invalidations,
204            total_invalidation_events,
205            total_missing_observations,
206            max_consecutive_missing_threshold: MAX_CONSECUTIVE_MISSING_RUNS,
207        }
208    }
209}
210
211// ─── Summary ────────────────────────────────────────────────────────────────
212
213/// Compact summary of missingness across all features — emitted in the
214/// run manifest JSON for audit.
215#[derive(Debug, Clone, Serialize, Deserialize)]
216pub struct MissingSummary {
217    pub total_features: usize,
218    pub features_with_invalidations: usize,
219    pub total_invalidation_events: usize,
220    pub total_missing_observations: usize,
221    pub max_consecutive_missing_threshold: usize,
222}
223
224// ─── Unit tests ───────────────────────────────────────────────────────────────
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229
230    #[test]
231    fn tracker_valid_below_threshold() {
232        let mut t = FeatureMissingnessTracker::new("S001");
233        assert_eq!(t.update(true, 0), DriftValidity::Valid);
234        assert_eq!(t.update(true, 1), DriftValidity::Valid);
235        assert_eq!(t.update(true, 2), DriftValidity::Valid);
236        // exactly at threshold: still Valid
237        assert_eq!(t.consecutive_missing, 3);
238    }
239
240    #[test]
241    fn tracker_invalidates_after_threshold() {
242        let mut t = FeatureMissingnessTracker::new("S002");
243        for i in 0..=3 {
244            t.update(true, i);
245        }
246        // 4th consecutive missing → Unknown
247        assert_eq!(t.update(true, 4), DriftValidity::Unknown);
248        assert!(t.is_invalidated());
249    }
250
251    #[test]
252    fn tracker_resets_on_valid_observation() {
253        let mut t = FeatureMissingnessTracker::new("S003");
254        for i in 0..10 {
255            t.update(true, i);
256        }
257        // Valid observation resets streak
258        assert_eq!(t.update(false, 10), DriftValidity::Valid);
259        assert_eq!(t.consecutive_missing, 0);
260        assert!(!t.is_invalidated());
261    }
262
263    #[test]
264    fn grammar_filter_suppresses_after_threshold() {
265        let mut grammar = MissingnessAwareGrammar::new();
266        let values: Vec<f64> = vec![0.0; 8];
267        // First 4 are missing, rest are present
268        let is_imputed = vec![true, true, true, true, false, false, false, false];
269        let records = grammar.process("S001", &values, &is_imputed);
270
271        // Runs 0-2: consecutive missing ≤ 3 → Valid
272        assert_eq!(records[0].drift_validity, DriftValidity::Valid);
273        assert_eq!(records[2].drift_validity, DriftValidity::Valid);
274        // Run 3: 4th consecutive missing → Unknown
275        assert_eq!(records[3].drift_validity, DriftValidity::Unknown);
276        assert!(records[3].suppressed_by_missingness);
277        // After valid observation, back to Valid
278        assert_eq!(records[4].drift_validity, DriftValidity::Valid);
279        assert!(!records[4].suppressed_by_missingness);
280    }
281
282    #[test]
283    fn summary_counts_invalidated_features() {
284        let mut grammar = MissingnessAwareGrammar::new();
285        // Feature with >3 consecutive missing
286        let values = vec![0.0; 5];
287        let all_missing = vec![true; 5];
288        grammar.process("S_BAD", &values, &all_missing);
289
290        // Feature with no missingness
291        let none_missing = vec![false; 5];
292        grammar.process("S_GOOD", &values, &none_missing);
293
294        let summary = grammar.summary();
295        assert_eq!(summary.total_features, 2);
296        assert_eq!(summary.features_with_invalidations, 1);
297    }
298}