Skip to main content

dsfb_semiconductor/
baselines.rs

1use crate::config::PipelineConfig;
2use crate::nominal::NominalModel;
3use crate::preprocessing::PreparedDataset;
4use crate::residual::ResidualSet;
5use serde::Serialize;
6
7#[derive(Debug, Clone, Serialize)]
8pub struct EwmaFeatureTrace {
9    pub feature_index: usize,
10    pub feature_name: String,
11    pub ewma: Vec<f64>,
12    pub healthy_mean: f64,
13    pub healthy_std: f64,
14    pub threshold: f64,
15    pub alarm: Vec<bool>,
16}
17
18#[derive(Debug, Clone, Serialize)]
19pub struct CusumFeatureTrace {
20    pub feature_index: usize,
21    pub feature_name: String,
22    pub cusum: Vec<f64>,
23    pub healthy_mean: f64,
24    pub healthy_std: f64,
25    pub kappa: f64,
26    pub alarm_threshold: f64,
27    pub alarm: Vec<bool>,
28}
29
30#[derive(Debug, Clone, Serialize)]
31pub struct RunEnergyTrace {
32    pub energy: Vec<f64>,
33    pub healthy_mean: f64,
34    pub healthy_std: f64,
35    pub threshold: f64,
36    pub analyzable_feature_count: usize,
37    pub alarm: Vec<bool>,
38}
39
40#[derive(Debug, Clone, Serialize)]
41pub struct PcaFdcTrace {
42    pub t2: Vec<f64>,
43    pub spe: Vec<f64>,
44    pub t2_healthy_mean: f64,
45    pub t2_healthy_std: f64,
46    pub spe_healthy_mean: f64,
47    pub spe_healthy_std: f64,
48    pub t2_threshold: f64,
49    pub spe_threshold: f64,
50    pub analyzable_feature_count: usize,
51    pub healthy_observation_count: usize,
52    pub retained_components: usize,
53    pub explained_variance_fraction: f64,
54    pub target_variance_explained: f64,
55    pub alarm: Vec<bool>,
56}
57
58#[derive(Debug, Clone, Serialize)]
59pub struct BaselineSet {
60    pub ewma: Vec<EwmaFeatureTrace>,
61    pub cusum: Vec<CusumFeatureTrace>,
62    pub run_energy: RunEnergyTrace,
63    pub pca_fdc: PcaFdcTrace,
64}
65
66pub fn compute_baselines(
67    dataset: &PreparedDataset,
68    nominal: &NominalModel,
69    residuals: &ResidualSet,
70    config: &PipelineConfig,
71) -> BaselineSet {
72    let ewma = residuals
73        .traces
74        .iter()
75        .zip(&nominal.features)
76        .map(|(trace, feature)| {
77            let ewma = ewma_series(&trace.norms, config.ewma_alpha);
78            let healthy_ewma = dataset
79                .healthy_pass_indices
80                .iter()
81                .filter_map(|&idx| ewma.get(idx).copied())
82                .collect::<Vec<_>>();
83            let healthy_mean = mean(&healthy_ewma).unwrap_or(0.0);
84            let healthy_std = sample_std(&healthy_ewma, healthy_mean).unwrap_or(0.0);
85            let threshold = if feature.analyzable {
86                healthy_mean + config.ewma_sigma_multiplier * healthy_std.max(config.epsilon)
87            } else {
88                0.0
89            };
90            let alarm = ewma
91                .iter()
92                .map(|value| feature.analyzable && *value > threshold)
93                .collect::<Vec<_>>();
94
95            EwmaFeatureTrace {
96                feature_index: trace.feature_index,
97                feature_name: trace.feature_name.clone(),
98                ewma,
99                healthy_mean,
100                healthy_std,
101                threshold,
102                alarm,
103            }
104        })
105        .collect::<Vec<_>>();
106
107    let cusum = residuals
108        .traces
109        .iter()
110        .zip(&nominal.features)
111        .map(|(trace, feature)| {
112            let healthy_norms = dataset
113                .healthy_pass_indices
114                .iter()
115                .filter_map(|&idx| trace.norms.get(idx).copied())
116                .collect::<Vec<_>>();
117            let healthy_mean = mean(&healthy_norms).unwrap_or(0.0);
118            let healthy_std = sample_std(&healthy_norms, healthy_mean).unwrap_or(0.0);
119            let sigma = healthy_std.max(config.epsilon);
120            let kappa = if feature.analyzable {
121                config.cusum_kappa_sigma_multiplier * sigma
122            } else {
123                0.0
124            };
125            let alarm_threshold = if feature.analyzable {
126                config.cusum_alarm_sigma_multiplier * sigma
127            } else {
128                0.0
129            };
130            let cusum = positive_cusum_series(&trace.norms, healthy_mean, kappa);
131            let alarm = cusum
132                .iter()
133                .map(|value| feature.analyzable && *value > alarm_threshold)
134                .collect::<Vec<_>>();
135
136            CusumFeatureTrace {
137                feature_index: trace.feature_index,
138                feature_name: trace.feature_name.clone(),
139                cusum,
140                healthy_mean,
141                healthy_std,
142                kappa,
143                alarm_threshold,
144                alarm,
145            }
146        })
147        .collect::<Vec<_>>();
148
149    let analyzable_feature_indices = nominal
150        .features
151        .iter()
152        .filter(|feature| feature.analyzable)
153        .map(|feature| feature.feature_index)
154        .collect::<Vec<_>>();
155    let run_energy_series = (0..dataset.labels.len())
156        .map(|run_index| {
157            if analyzable_feature_indices.is_empty() {
158                return 0.0;
159            }
160            analyzable_feature_indices
161                .iter()
162                .map(|&feature_index| {
163                    let sigma = nominal.features[feature_index]
164                        .healthy_std
165                        .max(config.epsilon);
166                    let residual = residuals.traces[feature_index].residuals[run_index];
167                    let z = residual / sigma;
168                    z * z
169                })
170                .sum::<f64>()
171                / analyzable_feature_indices.len() as f64
172        })
173        .collect::<Vec<_>>();
174    let healthy_run_energy = dataset
175        .healthy_pass_indices
176        .iter()
177        .filter_map(|&idx| run_energy_series.get(idx).copied())
178        .collect::<Vec<_>>();
179    let run_energy_healthy_mean = mean(&healthy_run_energy).unwrap_or(0.0);
180    let run_energy_healthy_std =
181        sample_std(&healthy_run_energy, run_energy_healthy_mean).unwrap_or(0.0);
182    let run_energy_threshold = run_energy_healthy_mean
183        + config.run_energy_sigma_multiplier * run_energy_healthy_std.max(config.epsilon);
184    let run_energy_alarm = run_energy_series
185        .iter()
186        .map(|value| !analyzable_feature_indices.is_empty() && *value > run_energy_threshold)
187        .collect::<Vec<_>>();
188
189    let pca_fdc = compute_pca_fdc(
190        dataset,
191        nominal,
192        residuals,
193        config,
194        &analyzable_feature_indices,
195    );
196
197    BaselineSet {
198        ewma,
199        cusum,
200        run_energy: RunEnergyTrace {
201            energy: run_energy_series,
202            healthy_mean: run_energy_healthy_mean,
203            healthy_std: run_energy_healthy_std,
204            threshold: run_energy_threshold,
205            analyzable_feature_count: analyzable_feature_indices.len(),
206            alarm: run_energy_alarm,
207        },
208        pca_fdc,
209    }
210}
211
212pub fn ewma_series(values: &[f64], alpha: f64) -> Vec<f64> {
213    if values.is_empty() {
214        return Vec::new();
215    }
216    let mut out = Vec::with_capacity(values.len());
217    let mut state = values[0];
218    out.push(state);
219    for value in &values[1..] {
220        state = alpha * *value + (1.0 - alpha) * state;
221        out.push(state);
222    }
223    out
224}
225
226pub fn positive_cusum_series(values: &[f64], target_mean: f64, kappa: f64) -> Vec<f64> {
227    let mut out = Vec::with_capacity(values.len());
228    let mut state = 0.0;
229    for value in values {
230        state = (state + (*value - target_mean - kappa)).max(0.0);
231        out.push(state);
232    }
233    out
234}
235
236fn mean(values: &[f64]) -> Option<f64> {
237    (!values.is_empty()).then(|| values.iter().sum::<f64>() / values.len() as f64)
238}
239
240fn sample_std(values: &[f64], mean: f64) -> Option<f64> {
241    if values.len() < 2 {
242        return None;
243    }
244    let variance = values
245        .iter()
246        .map(|value| {
247            let centered = *value - mean;
248            centered * centered
249        })
250        .sum::<f64>()
251        / (values.len() as f64 - 1.0);
252    Some(variance.sqrt())
253}
254
255fn compute_pca_fdc(
256    dataset: &PreparedDataset,
257    nominal: &NominalModel,
258    residuals: &ResidualSet,
259    config: &PipelineConfig,
260    analyzable_feature_indices: &[usize],
261) -> PcaFdcTrace {
262    let run_count = dataset.labels.len();
263    let healthy_observation_count = dataset.healthy_pass_indices.len();
264    if analyzable_feature_indices.is_empty() || healthy_observation_count < 2 {
265        return PcaFdcTrace {
266            t2: vec![0.0; run_count],
267            spe: vec![0.0; run_count],
268            t2_healthy_mean: 0.0,
269            t2_healthy_std: 0.0,
270            spe_healthy_mean: 0.0,
271            spe_healthy_std: 0.0,
272            t2_threshold: 0.0,
273            spe_threshold: 0.0,
274            analyzable_feature_count: analyzable_feature_indices.len(),
275            healthy_observation_count,
276            retained_components: 0,
277            explained_variance_fraction: 0.0,
278            target_variance_explained: config.pca_variance_explained,
279            alarm: vec![false; run_count],
280        };
281    }
282
283    let healthy_standardized = dataset
284        .healthy_pass_indices
285        .iter()
286        .map(|&run_index| {
287            analyzable_feature_indices
288                .iter()
289                .map(|&feature_index| {
290                    standardized_residual(nominal, residuals, feature_index, run_index, config)
291                })
292                .collect::<Vec<_>>()
293        })
294        .collect::<Vec<_>>();
295    let column_means = column_means(&healthy_standardized);
296    let centered_healthy = healthy_standardized
297        .iter()
298        .map(|row| {
299            row.iter()
300                .zip(&column_means)
301                .map(|(value, mean)| *value - *mean)
302                .collect::<Vec<_>>()
303        })
304        .collect::<Vec<_>>();
305
306    let gram = gram_matrix(&centered_healthy);
307    let (eigenvalues, eigenvectors) =
308        jacobi_eigen_symmetric(&gram, 64 * gram.len().max(1).pow(2), 1.0e-10);
309    let mut components = eigenvalues
310        .iter()
311        .copied()
312        .zip(eigenvectors)
313        .filter(|(eigenvalue, _)| *eigenvalue > config.epsilon)
314        .collect::<Vec<_>>();
315    components
316        .sort_by(|(lhs, _), (rhs, _)| rhs.partial_cmp(lhs).unwrap_or(std::cmp::Ordering::Equal));
317
318    let total_variance = components.iter().map(|(value, _)| *value).sum::<f64>();
319    let mut retained = Vec::new();
320    let mut cumulative_variance = 0.0;
321    if total_variance > config.epsilon {
322        for (eigenvalue, sample_eigenvector) in components {
323            cumulative_variance += eigenvalue;
324            let loading = sample_to_feature_loading(
325                &centered_healthy,
326                &sample_eigenvector,
327                eigenvalue,
328                config.epsilon,
329            );
330            retained.push((eigenvalue, loading));
331            if cumulative_variance / total_variance >= config.pca_variance_explained {
332                break;
333            }
334        }
335    }
336
337    let explained_variance_fraction = if total_variance > config.epsilon {
338        retained.iter().map(|(value, _)| *value).sum::<f64>() / total_variance
339    } else {
340        0.0
341    };
342
343    let mut t2 = Vec::with_capacity(run_count);
344    let mut spe = Vec::with_capacity(run_count);
345    for run_index in 0..run_count {
346        let centered = analyzable_feature_indices
347            .iter()
348            .enumerate()
349            .map(|(local_index, &feature_index)| {
350                standardized_residual(nominal, residuals, feature_index, run_index, config)
351                    - column_means[local_index]
352            })
353            .collect::<Vec<_>>();
354        let (t2_value, spe_value) = pca_scores(&centered, &retained, config.epsilon);
355        t2.push(t2_value);
356        spe.push(spe_value);
357    }
358
359    let healthy_t2 = dataset
360        .healthy_pass_indices
361        .iter()
362        .filter_map(|&run_index| t2.get(run_index).copied())
363        .collect::<Vec<_>>();
364    let healthy_spe = dataset
365        .healthy_pass_indices
366        .iter()
367        .filter_map(|&run_index| spe.get(run_index).copied())
368        .collect::<Vec<_>>();
369    let t2_healthy_mean = mean(&healthy_t2).unwrap_or(0.0);
370    let t2_healthy_std = sample_std(&healthy_t2, t2_healthy_mean).unwrap_or(0.0);
371    let spe_healthy_mean = mean(&healthy_spe).unwrap_or(0.0);
372    let spe_healthy_std = sample_std(&healthy_spe, spe_healthy_mean).unwrap_or(0.0);
373    let t2_threshold =
374        t2_healthy_mean + config.pca_t2_sigma_multiplier * t2_healthy_std.max(config.epsilon);
375    let spe_threshold =
376        spe_healthy_mean + config.pca_spe_sigma_multiplier * spe_healthy_std.max(config.epsilon);
377    let alarm = (0..run_count)
378        .map(|run_index| t2[run_index] > t2_threshold || spe[run_index] > spe_threshold)
379        .collect::<Vec<_>>();
380
381    PcaFdcTrace {
382        t2,
383        spe,
384        t2_healthy_mean,
385        t2_healthy_std,
386        spe_healthy_mean,
387        spe_healthy_std,
388        t2_threshold,
389        spe_threshold,
390        analyzable_feature_count: analyzable_feature_indices.len(),
391        healthy_observation_count,
392        retained_components: retained.len(),
393        explained_variance_fraction,
394        target_variance_explained: config.pca_variance_explained,
395        alarm,
396    }
397}
398
399fn standardized_residual(
400    nominal: &NominalModel,
401    residuals: &ResidualSet,
402    feature_index: usize,
403    run_index: usize,
404    config: &PipelineConfig,
405) -> f64 {
406    let sigma = nominal.features[feature_index]
407        .healthy_std
408        .max(config.epsilon);
409    residuals.traces[feature_index].residuals[run_index] / sigma
410}
411
412fn column_means(matrix: &[Vec<f64>]) -> Vec<f64> {
413    if matrix.is_empty() {
414        return Vec::new();
415    }
416    let width = matrix[0].len();
417    let mut means = vec![0.0; width];
418    for row in matrix {
419        for (index, value) in row.iter().enumerate() {
420            means[index] += *value;
421        }
422    }
423    for mean in &mut means {
424        *mean /= matrix.len() as f64;
425    }
426    means
427}
428
429fn gram_matrix(matrix: &[Vec<f64>]) -> Vec<Vec<f64>> {
430    let n = matrix.len();
431    let mut gram = vec![vec![0.0; n]; n];
432    for row_index in 0..n {
433        for col_index in row_index..n {
434            let value = dot(&matrix[row_index], &matrix[col_index]) / (n as f64 - 1.0);
435            gram[row_index][col_index] = value;
436            gram[col_index][row_index] = value;
437        }
438    }
439    gram
440}
441
442fn jacobi_eigen_symmetric(
443    matrix: &[Vec<f64>],
444    max_iterations: usize,
445    tolerance: f64,
446) -> (Vec<f64>, Vec<Vec<f64>>) {
447    let n = matrix.len();
448    if n == 0 {
449        return (Vec::new(), Vec::new());
450    }
451    let mut a = matrix.to_vec();
452    let mut v = vec![vec![0.0; n]; n];
453    for index in 0..n {
454        v[index][index] = 1.0;
455    }
456
457    for _ in 0..max_iterations {
458        let mut p = 0usize;
459        let mut q = 0usize;
460        let mut max_off_diagonal = 0.0_f64;
461        for row in 0..n {
462            for col in (row + 1)..n {
463                let magnitude = a[row][col].abs();
464                if magnitude > max_off_diagonal {
465                    max_off_diagonal = magnitude;
466                    p = row;
467                    q = col;
468                }
469            }
470        }
471        if max_off_diagonal <= tolerance {
472            break;
473        }
474
475        let theta = 0.5 * (2.0 * a[p][q]).atan2(a[q][q] - a[p][p]);
476        let cos_theta = theta.cos();
477        let sin_theta = theta.sin();
478
479        let app = a[p][p];
480        let aqq = a[q][q];
481        let apq = a[p][q];
482        a[p][p] = cos_theta * cos_theta * app - 2.0 * sin_theta * cos_theta * apq
483            + sin_theta * sin_theta * aqq;
484        a[q][q] = sin_theta * sin_theta * app
485            + 2.0 * sin_theta * cos_theta * apq
486            + cos_theta * cos_theta * aqq;
487        a[p][q] = 0.0;
488        a[q][p] = 0.0;
489
490        for k in 0..n {
491            if k == p || k == q {
492                continue;
493            }
494            let akp = a[k][p];
495            let akq = a[k][q];
496            a[k][p] = cos_theta * akp - sin_theta * akq;
497            a[p][k] = a[k][p];
498            a[k][q] = sin_theta * akp + cos_theta * akq;
499            a[q][k] = a[k][q];
500        }
501
502        for row in &mut v {
503            let vip = row[p];
504            let viq = row[q];
505            row[p] = cos_theta * vip - sin_theta * viq;
506            row[q] = sin_theta * vip + cos_theta * viq;
507        }
508    }
509
510    let eigenvalues = (0..n).map(|index| a[index][index]).collect::<Vec<_>>();
511    let eigenvectors = (0..n)
512        .map(|column| v.iter().map(|row| row[column]).collect::<Vec<_>>())
513        .collect::<Vec<_>>();
514    (eigenvalues, eigenvectors)
515}
516
517fn sample_to_feature_loading(
518    centered_healthy: &[Vec<f64>],
519    sample_eigenvector: &[f64],
520    eigenvalue: f64,
521    epsilon: f64,
522) -> Vec<f64> {
523    let singular = (eigenvalue * (centered_healthy.len() as f64 - 1.0))
524        .max(epsilon)
525        .sqrt();
526    let feature_count = centered_healthy.first().map(|row| row.len()).unwrap_or(0);
527    let mut loading = vec![0.0; feature_count];
528    for (sample_index, row) in centered_healthy.iter().enumerate() {
529        let weight = sample_eigenvector[sample_index];
530        for (feature_index, value) in row.iter().enumerate() {
531            loading[feature_index] += value * weight;
532        }
533    }
534    for value in &mut loading {
535        *value /= singular;
536    }
537    let norm = l2_norm(&loading).max(epsilon);
538    for value in &mut loading {
539        *value /= norm;
540    }
541    loading
542}
543
544fn pca_scores(
545    centered: &[f64],
546    retained_components: &[(f64, Vec<f64>)],
547    epsilon: f64,
548) -> (f64, f64) {
549    if retained_components.is_empty() {
550        return (0.0, squared_norm(centered));
551    }
552    let mut reconstructed = vec![0.0; centered.len()];
553    let mut t2 = 0.0;
554    for (eigenvalue, loading) in retained_components {
555        let score = dot(centered, loading);
556        t2 += score * score / eigenvalue.max(epsilon);
557        for (index, value) in loading.iter().enumerate() {
558            reconstructed[index] += score * value;
559        }
560    }
561    let mut residual = vec![0.0; centered.len()];
562    for (index, value) in centered.iter().enumerate() {
563        residual[index] = *value - reconstructed[index];
564    }
565    (t2, squared_norm(&residual))
566}
567
568fn dot(lhs: &[f64], rhs: &[f64]) -> f64 {
569    lhs.iter().zip(rhs).map(|(left, right)| left * right).sum()
570}
571
572fn squared_norm(values: &[f64]) -> f64 {
573    values.iter().map(|value| value * value).sum()
574}
575
576fn l2_norm(values: &[f64]) -> f64 {
577    squared_norm(values).sqrt()
578}
579
580#[cfg(test)]
581mod tests {
582    use super::*;
583
584    #[test]
585    fn ewma_series_matches_recursive_definition() {
586        let ewma = ewma_series(&[1.0, 3.0, 5.0], 0.5);
587        assert_eq!(ewma, vec![1.0, 2.0, 3.5]);
588    }
589
590    #[test]
591    fn positive_cusum_accumulates_only_above_target_plus_kappa() {
592        let cusum = positive_cusum_series(&[1.0, 2.0, 4.0, 3.0, 1.5], 1.0, 0.5);
593        assert_eq!(cusum, vec![0.0, 0.5, 3.0, 4.5, 4.5]);
594    }
595
596    #[test]
597    fn jacobi_eigen_symmetric_recovers_simple_diagonalization() {
598        let matrix = vec![vec![3.0, 1.0], vec![1.0, 3.0]];
599        let (mut eigenvalues, _eigenvectors) = jacobi_eigen_symmetric(&matrix, 64, 1.0e-12);
600        eigenvalues.sort_by(|lhs, rhs| lhs.partial_cmp(rhs).unwrap());
601        assert!((eigenvalues[0] - 2.0).abs() < 1.0e-6);
602        assert!((eigenvalues[1] - 4.0).abs() < 1.0e-6);
603    }
604
605    #[test]
606    fn pca_scores_are_finite_without_retained_components() {
607        let (t2, spe) = pca_scores(&[1.0, -2.0], &[], 1.0e-9);
608        assert_eq!(t2, 0.0);
609        assert_eq!(spe, 5.0);
610    }
611}