Skip to main content

dsfb_semiconductor/
multivariate_observer.rs

1//! Multivariate observer — ingests PCA/FDC residual streams and provides
2//! structural interpretation via the `StructuralPCA` module.
3//!
4//! # Design: Monitoring the Monitor
5//! Existing FDC systems reduce the multivariate residual space to two scalar
6//! statistics:
7//!
8//! * **Hotelling's T²** — squared Mahalanobis distance in the principal
9//!   component subspace; detects changes in the *modelled* variation.
10//! * **Q-Statistic (SPE)** — sum of squared residuals in the *complement*
11//!   subspace; detects changes in the *unmodelled* variation.
12//!
13//! Neither statistic explains *which* process variables are responsible
14//! for the excursion, nor *how* the residual vector is oriented in
15//! physical space.
16//!
17//! The [`StructuralPCA`] module provides the "why" to the PCA "what":
18//! it decomposes the PCA residual vector into its principal loading
19//! directions, identifies the dominant physical dimensions, and maps the
20//! result to a DSFB grammar state and semiotic label.
21//!
22//! # Observer-Only Pattern
23//! **No upstream controller state is modified.**  The multivariate observer
24//! is a read-only side-channel that consumes statistics already produced by
25//! the FDC system.  If a measurement is unavailable the observer degrades
26//! gracefully by returning [`StructuralVerdict::Unavailable`].
27
28use serde::{Deserialize, Serialize};
29
30// ─── PCA Observation ─────────────────────────────────────────────────────────
31
32/// A single multivariate process observation expressed in terms of the PCA
33/// residual statistics already computed by the upstream FDC system.
34///
35/// All fields are *received from* the FDC system; the DSFB observer never
36/// writes back to it.
37#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
38pub struct PcaObservation {
39    /// Zero-indexed run / lot identifier.
40    pub run_index: usize,
41    /// Hotelling's T² statistic for this run
42    /// (Mahalanobis distance² in the PC subspace).
43    pub t2: Option<f64>,
44    /// Q-Statistic (Squared Prediction Error / SPE) for this run.
45    pub q_stat: Option<f64>,
46    /// Number of principal components retained by the upstream FDC model.
47    pub n_components: usize,
48    /// Loadings of the first principal component, one entry per sensor.
49    /// Must have length == number of sensors selected for PCA.
50    /// `None` when the FDC model does not expose loadings.
51    pub pc1_loading: Option<Vec<f64>>,
52    /// Raw normalised residual vector (one entry per sensor).
53    /// `None` when the FDC system does not expose individual residuals.
54    pub residual_vector: Option<Vec<f64>>,
55    /// Sensor labels corresponding to entries in `pc1_loading` and
56    /// `residual_vector`.
57    pub sensor_labels: Vec<String>,
58}
59
60// ─── Structural Verdict ──────────────────────────────────────────────────────
61
62/// The structural interpretation the DSFB engine infers from the
63/// PCA/FDC statistics.
64#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
65pub enum StructuralVerdict {
66    /// Both T² and Q are within their control limits; process is nominal.
67    Nominal,
68    /// T² is elevated but Q is within limits: the process has shifted
69    /// *along* a known principal direction (modelled variation).  The
70    /// dominant loading direction is captured in the `direction` field.
71    ModelledShift { dominant_sensors: Vec<String> },
72    /// Q is elevated but T² is within limits: the process has moved
73    /// *orthogonal* to the known principal directions (unmodelled variation).
74    /// A new failure mode may be emerging.
75    UnmodelledExcursion,
76    /// Both T² and Q are elevated: a large, multi-dimensional excursion.
77    /// This is the highest-severity verdict.
78    JointExcursion { dominant_sensors: Vec<String> },
79    /// Required statistics are missing; the observer cannot issue a verdict.
80    Unavailable,
81}
82
83impl StructuralVerdict {
84    /// Maps the verdict to the DSFB grammar state string that would be
85    /// emitted in the traceability manifest.
86    pub fn grammar_state(&self) -> &'static str {
87        match self {
88            Self::Nominal => "Admissible",
89            Self::ModelledShift { .. } => "SustainedDrift",
90            Self::UnmodelledExcursion => "TransientViolation",
91            Self::JointExcursion { .. } => "PersistentViolation",
92            Self::Unavailable => "Unavailable",
93        }
94    }
95
96    /// Recommended operator action.
97    pub fn action(&self) -> &'static str {
98        match self {
99            Self::Nominal => "Monitor",
100            Self::ModelledShift { .. } => "Review",
101            Self::UnmodelledExcursion => "Review — investigate new failure mode",
102            Self::JointExcursion { .. } => "Escalate",
103            Self::Unavailable => "Check FDC telemetry",
104        }
105    }
106}
107
108// ─── Structural PCA ──────────────────────────────────────────────────────────
109
110/// Structural PCA module — provides the "why" to the PCA "what".
111///
112/// Given a [`PcaObservation`] from the existing FDC system, this module:
113///
114/// 1. Classifies the excursion into four structural categories (Nominal /
115///    ModelledShift / UnmodelledExcursion / JointExcursion).
116/// 2. When loadings and residuals are available, identifies the dominant
117///    physical sensors responsible for the deviation.
118/// 3. Emits a [`StructuralInterpretation`] that can be serialised into the
119///    run manifest and the traceability audit trail.
120///
121/// # Thresholds
122/// The control limits for T² and Q are obtained from the FDC model's healthy
123/// phase statistics.  They must be provided at construction time; the DSFB
124/// engine never re-computes these limits from raw data.
125#[derive(Debug, Clone, Serialize, Deserialize)]
126pub struct StructuralPCA {
127    /// Upper control limit for T² (typically the 99th percentile of the
128    /// chi-squared distribution with `n_components` degrees of freedom).
129    pub t2_ucl: f64,
130    /// Upper control limit for Q / SPE.
131    pub q_ucl: f64,
132    /// Number of dominant sensors to report in the structural interpretation.
133    pub top_k_sensors: usize,
134}
135
136impl Default for StructuralPCA {
137    fn default() -> Self {
138        Self {
139            t2_ucl: 9.21,                 // chi²(2, 0.99)
140            q_ucl: 3.0,                   // 3-sigma rule on Q
141            top_k_sensors: 5,
142        }
143    }
144}
145
146impl StructuralPCA {
147    /// Interpret a single [`PcaObservation`] and return the structural verdict.
148    pub fn interpret(&self, obs: &PcaObservation) -> StructuralInterpretation {
149        let t2_alarm = obs.t2.map(|v| v > self.t2_ucl);
150        let q_alarm = obs.q_stat.map(|v| v > self.q_ucl);
151
152        let verdict = match (t2_alarm, q_alarm) {
153            (None, _) | (_, None) => StructuralVerdict::Unavailable,
154            (Some(false), Some(false)) => StructuralVerdict::Nominal,
155            (Some(true), Some(false)) => {
156                let dominant = self.dominant_sensors(obs, true);
157                StructuralVerdict::ModelledShift {
158                    dominant_sensors: dominant,
159                }
160            }
161            (Some(false), Some(true)) => StructuralVerdict::UnmodelledExcursion,
162            (Some(true), Some(true)) => {
163                let dominant = self.dominant_sensors(obs, false);
164                StructuralVerdict::JointExcursion {
165                    dominant_sensors: dominant,
166                }
167            }
168        };
169
170        StructuralInterpretation {
171            run_index: obs.run_index,
172            t2: obs.t2,
173            t2_ucl: self.t2_ucl,
174            q_stat: obs.q_stat,
175            q_ucl: self.q_ucl,
176            verdict: verdict.clone(),
177            grammar_state: verdict.grammar_state().to_string(),
178            action: verdict.action().to_string(),
179            integration_mode: "read_only_side_channel".into(),
180        }
181    }
182
183    /// Returns the labels of the `top_k` sensors contributing most to the
184    /// PCA residual vector.  Falls back to loading contribution when no
185    /// residual vector is available.
186    fn dominant_sensors(&self, obs: &PcaObservation, use_loadings: bool) -> Vec<String> {
187        let scores: Option<Vec<f64>> = if use_loadings {
188            obs.pc1_loading
189                .as_ref()
190                .map(|l| l.iter().map(|v| v.abs()).collect())
191        } else {
192            obs.residual_vector
193                .as_ref()
194                .map(|r| r.iter().map(|v| v.abs()).collect())
195        };
196
197        let Some(mut scored) = scores.map(|scores| {
198            obs.sensor_labels
199                .iter()
200                .zip(scores.iter())
201                .map(|(label, &score)| (label.clone(), score))
202                .collect::<Vec<_>>()
203        }) else {
204            return Vec::new();
205        };
206
207        scored.sort_by(|a, b| b.1.total_cmp(&a.1));
208        scored
209            .into_iter()
210            .take(self.top_k_sensors)
211            .map(|(label, _)| label)
212            .collect()
213    }
214}
215
216// ─── Structural Interpretation Record ────────────────────────────────────────
217
218/// The full structural interpretation record emitted by [`StructuralPCA::interpret`].
219///
220/// This struct is directly serialisable to JSON and included verbatim in
221/// the `dsfb_run_manifest.json` audit trail.
222#[derive(Debug, Clone, Serialize, Deserialize)]
223pub struct StructuralInterpretation {
224    pub run_index: usize,
225    pub t2: Option<f64>,
226    pub t2_ucl: f64,
227    pub q_stat: Option<f64>,
228    pub q_ucl: f64,
229    pub verdict: StructuralVerdict,
230    pub grammar_state: String,
231    pub action: String,
232    /// Always `"read_only_side_channel"` — confirms the Observer-Only pattern.
233    pub integration_mode: String,
234}
235
236// ─── Multivariate Observer ────────────────────────────────────────────────────
237
238/// High-level observer that ingests PCA/FDC statistics from an upstream
239/// monitoring system and produces DSFB structural interpretations.
240///
241/// # Thread Safety
242/// The observer accumulates a history of observations in a [`Vec`].  If
243/// concurrent ingestion is required, wrap in an `Arc<Mutex<...>>`.
244#[derive(Debug, Default)]
245pub struct MultivariateObserver {
246    pub structural_pca: StructuralPCA,
247    history: Vec<StructuralInterpretation>,
248}
249
250impl MultivariateObserver {
251    /// Construct with a custom [`StructuralPCA`] configuration.
252    pub fn with_config(structural_pca: StructuralPCA) -> Self {
253        Self {
254            structural_pca,
255            history: Vec::new(),
256        }
257    }
258
259    /// Ingest a PCA observation and store the structural interpretation.
260    ///
261    /// This method is the only entry point for external data.  It never
262    /// modifies any upstream controller state.
263    pub fn ingest(&mut self, obs: &PcaObservation) -> &StructuralInterpretation {
264        let interpretation = self.structural_pca.interpret(obs);
265        self.history.push(interpretation);
266        self.history.last().unwrap()
267    }
268
269    /// Return all stored structural interpretations.
270    pub fn interpretations(&self) -> &[StructuralInterpretation] {
271        &self.history
272    }
273
274    /// Count observations where the verdict is a specific variant.
275    pub fn count_verdicts(&self, verdict: &StructuralVerdict) -> usize {
276        self.history
277            .iter()
278            .filter(|i| std::mem::discriminant(&i.verdict) == std::mem::discriminant(verdict))
279            .count()
280    }
281}
282
283// ─── Unit tests ───────────────────────────────────────────────────────────────
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    fn base_obs(run: usize, t2: f64, q: f64) -> PcaObservation {
290        PcaObservation {
291            run_index: run,
292            t2: Some(t2),
293            q_stat: Some(q),
294            n_components: 3,
295            pc1_loading: Some(vec![0.8, 0.5, 0.1, 0.05]),
296            residual_vector: Some(vec![1.2, 0.3, 0.1, 0.05]),
297            sensor_labels: vec![
298                "S001".into(),
299                "S002".into(),
300                "S003".into(),
301                "S004".into(),
302            ],
303        }
304    }
305
306    #[test]
307    fn nominal_verdict_when_both_within_limits() {
308        let spca = StructuralPCA::default();
309        let obs = base_obs(0, 5.0, 1.5);
310        let interp = spca.interpret(&obs);
311        assert_eq!(interp.verdict, StructuralVerdict::Nominal);
312        assert_eq!(interp.grammar_state, "Admissible");
313    }
314
315    #[test]
316    fn modelled_shift_when_t2_alarm() {
317        let spca = StructuralPCA::default();
318        let obs = base_obs(1, 15.0, 1.5);
319        let interp = spca.interpret(&obs);
320        assert!(
321            matches!(interp.verdict, StructuralVerdict::ModelledShift { .. }),
322            "expected ModelledShift, got {:?}",
323            interp.verdict
324        );
325        assert_eq!(interp.grammar_state, "SustainedDrift");
326    }
327
328    #[test]
329    fn unmodelled_excursion_when_q_alarm() {
330        let spca = StructuralPCA::default();
331        let obs = base_obs(2, 5.0, 8.0);
332        let interp = spca.interpret(&obs);
333        assert_eq!(interp.verdict, StructuralVerdict::UnmodelledExcursion);
334        assert_eq!(interp.grammar_state, "TransientViolation");
335    }
336
337    #[test]
338    fn joint_excursion_when_both_alarm() {
339        let spca = StructuralPCA::default();
340        let obs = base_obs(3, 15.0, 8.0);
341        let interp = spca.interpret(&obs);
342        assert!(matches!(
343            interp.verdict,
344            StructuralVerdict::JointExcursion { .. }
345        ));
346        assert_eq!(interp.grammar_state, "PersistentViolation");
347    }
348
349    #[test]
350    fn unavailable_when_t2_missing() {
351        let spca = StructuralPCA::default();
352        let mut obs = base_obs(4, 0.0, 1.5);
353        obs.t2 = None;
354        let interp = spca.interpret(&obs);
355        assert_eq!(interp.verdict, StructuralVerdict::Unavailable);
356    }
357
358    #[test]
359    fn dominant_sensors_returns_top_k() {
360        let spca = StructuralPCA { top_k_sensors: 2, ..Default::default() };
361        let obs = base_obs(5, 15.0, 1.5);
362        let interp = spca.interpret(&obs);
363        if let StructuralVerdict::ModelledShift { dominant_sensors } = interp.verdict {
364            assert_eq!(dominant_sensors.len(), 2);
365            assert_eq!(dominant_sensors[0], "S001"); // highest loading 0.8
366        } else {
367            panic!("expected ModelledShift");
368        }
369    }
370
371    #[test]
372    fn observer_accumulates_history() {
373        let mut obs_engine = MultivariateObserver::default();
374        for i in 0..5 {
375            obs_engine.ingest(&base_obs(i, 5.0, 1.5));
376        }
377        assert_eq!(obs_engine.interpretations().len(), 5);
378        assert_eq!(
379            obs_engine.count_verdicts(&StructuralVerdict::Nominal),
380            5
381        );
382    }
383
384    #[test]
385    fn integration_mode_is_read_only() {
386        let spca = StructuralPCA::default();
387        let obs = base_obs(0, 5.0, 1.5);
388        let interp = spca.interpret(&obs);
389        assert_eq!(interp.integration_mode, "read_only_side_channel");
390    }
391}