1use crate::config::PipelineConfig;
2use crate::nominal::NominalModel;
3use crate::preprocessing::PreparedDataset;
4use crate::residual::ResidualSet;
5use serde::Serialize;
6
7#[derive(Debug, Clone, Serialize)]
8pub struct EwmaFeatureTrace {
9 pub feature_index: usize,
10 pub feature_name: String,
11 pub ewma: Vec<f64>,
12 pub healthy_mean: f64,
13 pub healthy_std: f64,
14 pub threshold: f64,
15 pub alarm: Vec<bool>,
16}
17
18#[derive(Debug, Clone, Serialize)]
19pub struct CusumFeatureTrace {
20 pub feature_index: usize,
21 pub feature_name: String,
22 pub cusum: Vec<f64>,
23 pub healthy_mean: f64,
24 pub healthy_std: f64,
25 pub kappa: f64,
26 pub alarm_threshold: f64,
27 pub alarm: Vec<bool>,
28}
29
30#[derive(Debug, Clone, Serialize)]
31pub struct RunEnergyTrace {
32 pub energy: Vec<f64>,
33 pub healthy_mean: f64,
34 pub healthy_std: f64,
35 pub threshold: f64,
36 pub analyzable_feature_count: usize,
37 pub alarm: Vec<bool>,
38}
39
40#[derive(Debug, Clone, Serialize)]
41pub struct PcaFdcTrace {
42 pub t2: Vec<f64>,
43 pub spe: Vec<f64>,
44 pub t2_healthy_mean: f64,
45 pub t2_healthy_std: f64,
46 pub spe_healthy_mean: f64,
47 pub spe_healthy_std: f64,
48 pub t2_threshold: f64,
49 pub spe_threshold: f64,
50 pub analyzable_feature_count: usize,
51 pub healthy_observation_count: usize,
52 pub retained_components: usize,
53 pub explained_variance_fraction: f64,
54 pub target_variance_explained: f64,
55 pub alarm: Vec<bool>,
56}
57
58#[derive(Debug, Clone, Serialize)]
59pub struct BaselineSet {
60 pub ewma: Vec<EwmaFeatureTrace>,
61 pub cusum: Vec<CusumFeatureTrace>,
62 pub run_energy: RunEnergyTrace,
63 pub pca_fdc: PcaFdcTrace,
64}
65
66pub fn compute_baselines(
67 dataset: &PreparedDataset,
68 nominal: &NominalModel,
69 residuals: &ResidualSet,
70 config: &PipelineConfig,
71) -> BaselineSet {
72 let ewma = residuals
73 .traces
74 .iter()
75 .zip(&nominal.features)
76 .map(|(trace, feature)| {
77 let ewma = ewma_series(&trace.norms, config.ewma_alpha);
78 let healthy_ewma = dataset
79 .healthy_pass_indices
80 .iter()
81 .filter_map(|&idx| ewma.get(idx).copied())
82 .collect::<Vec<_>>();
83 let healthy_mean = mean(&healthy_ewma).unwrap_or(0.0);
84 let healthy_std = sample_std(&healthy_ewma, healthy_mean).unwrap_or(0.0);
85 let threshold = if feature.analyzable {
86 healthy_mean + config.ewma_sigma_multiplier * healthy_std.max(config.epsilon)
87 } else {
88 0.0
89 };
90 let alarm = ewma
91 .iter()
92 .map(|value| feature.analyzable && *value > threshold)
93 .collect::<Vec<_>>();
94
95 EwmaFeatureTrace {
96 feature_index: trace.feature_index,
97 feature_name: trace.feature_name.clone(),
98 ewma,
99 healthy_mean,
100 healthy_std,
101 threshold,
102 alarm,
103 }
104 })
105 .collect::<Vec<_>>();
106
107 let cusum = residuals
108 .traces
109 .iter()
110 .zip(&nominal.features)
111 .map(|(trace, feature)| {
112 let healthy_norms = dataset
113 .healthy_pass_indices
114 .iter()
115 .filter_map(|&idx| trace.norms.get(idx).copied())
116 .collect::<Vec<_>>();
117 let healthy_mean = mean(&healthy_norms).unwrap_or(0.0);
118 let healthy_std = sample_std(&healthy_norms, healthy_mean).unwrap_or(0.0);
119 let sigma = healthy_std.max(config.epsilon);
120 let kappa = if feature.analyzable {
121 config.cusum_kappa_sigma_multiplier * sigma
122 } else {
123 0.0
124 };
125 let alarm_threshold = if feature.analyzable {
126 config.cusum_alarm_sigma_multiplier * sigma
127 } else {
128 0.0
129 };
130 let cusum = positive_cusum_series(&trace.norms, healthy_mean, kappa);
131 let alarm = cusum
132 .iter()
133 .map(|value| feature.analyzable && *value > alarm_threshold)
134 .collect::<Vec<_>>();
135
136 CusumFeatureTrace {
137 feature_index: trace.feature_index,
138 feature_name: trace.feature_name.clone(),
139 cusum,
140 healthy_mean,
141 healthy_std,
142 kappa,
143 alarm_threshold,
144 alarm,
145 }
146 })
147 .collect::<Vec<_>>();
148
149 let analyzable_feature_indices = nominal
150 .features
151 .iter()
152 .filter(|feature| feature.analyzable)
153 .map(|feature| feature.feature_index)
154 .collect::<Vec<_>>();
155 let run_energy_series = (0..dataset.labels.len())
156 .map(|run_index| {
157 if analyzable_feature_indices.is_empty() {
158 return 0.0;
159 }
160 analyzable_feature_indices
161 .iter()
162 .map(|&feature_index| {
163 let sigma = nominal.features[feature_index]
164 .healthy_std
165 .max(config.epsilon);
166 let residual = residuals.traces[feature_index].residuals[run_index];
167 let z = residual / sigma;
168 z * z
169 })
170 .sum::<f64>()
171 / analyzable_feature_indices.len() as f64
172 })
173 .collect::<Vec<_>>();
174 let healthy_run_energy = dataset
175 .healthy_pass_indices
176 .iter()
177 .filter_map(|&idx| run_energy_series.get(idx).copied())
178 .collect::<Vec<_>>();
179 let run_energy_healthy_mean = mean(&healthy_run_energy).unwrap_or(0.0);
180 let run_energy_healthy_std =
181 sample_std(&healthy_run_energy, run_energy_healthy_mean).unwrap_or(0.0);
182 let run_energy_threshold = run_energy_healthy_mean
183 + config.run_energy_sigma_multiplier * run_energy_healthy_std.max(config.epsilon);
184 let run_energy_alarm = run_energy_series
185 .iter()
186 .map(|value| !analyzable_feature_indices.is_empty() && *value > run_energy_threshold)
187 .collect::<Vec<_>>();
188
189 let pca_fdc = compute_pca_fdc(
190 dataset,
191 nominal,
192 residuals,
193 config,
194 &analyzable_feature_indices,
195 );
196
197 BaselineSet {
198 ewma,
199 cusum,
200 run_energy: RunEnergyTrace {
201 energy: run_energy_series,
202 healthy_mean: run_energy_healthy_mean,
203 healthy_std: run_energy_healthy_std,
204 threshold: run_energy_threshold,
205 analyzable_feature_count: analyzable_feature_indices.len(),
206 alarm: run_energy_alarm,
207 },
208 pca_fdc,
209 }
210}
211
212pub fn ewma_series(values: &[f64], alpha: f64) -> Vec<f64> {
213 if values.is_empty() {
214 return Vec::new();
215 }
216 let mut out = Vec::with_capacity(values.len());
217 let mut state = values[0];
218 out.push(state);
219 for value in &values[1..] {
220 state = alpha * *value + (1.0 - alpha) * state;
221 out.push(state);
222 }
223 out
224}
225
226pub fn positive_cusum_series(values: &[f64], target_mean: f64, kappa: f64) -> Vec<f64> {
227 let mut out = Vec::with_capacity(values.len());
228 let mut state = 0.0;
229 for value in values {
230 state = (state + (*value - target_mean - kappa)).max(0.0);
231 out.push(state);
232 }
233 out
234}
235
236fn mean(values: &[f64]) -> Option<f64> {
237 (!values.is_empty()).then(|| values.iter().sum::<f64>() / values.len() as f64)
238}
239
240fn sample_std(values: &[f64], mean: f64) -> Option<f64> {
241 if values.len() < 2 {
242 return None;
243 }
244 let variance = values
245 .iter()
246 .map(|value| {
247 let centered = *value - mean;
248 centered * centered
249 })
250 .sum::<f64>()
251 / (values.len() as f64 - 1.0);
252 Some(variance.sqrt())
253}
254
255fn compute_pca_fdc(
256 dataset: &PreparedDataset,
257 nominal: &NominalModel,
258 residuals: &ResidualSet,
259 config: &PipelineConfig,
260 analyzable_feature_indices: &[usize],
261) -> PcaFdcTrace {
262 let run_count = dataset.labels.len();
263 let healthy_observation_count = dataset.healthy_pass_indices.len();
264 if analyzable_feature_indices.is_empty() || healthy_observation_count < 2 {
265 return PcaFdcTrace {
266 t2: vec![0.0; run_count],
267 spe: vec![0.0; run_count],
268 t2_healthy_mean: 0.0,
269 t2_healthy_std: 0.0,
270 spe_healthy_mean: 0.0,
271 spe_healthy_std: 0.0,
272 t2_threshold: 0.0,
273 spe_threshold: 0.0,
274 analyzable_feature_count: analyzable_feature_indices.len(),
275 healthy_observation_count,
276 retained_components: 0,
277 explained_variance_fraction: 0.0,
278 target_variance_explained: config.pca_variance_explained,
279 alarm: vec![false; run_count],
280 };
281 }
282
283 let healthy_standardized = dataset
284 .healthy_pass_indices
285 .iter()
286 .map(|&run_index| {
287 analyzable_feature_indices
288 .iter()
289 .map(|&feature_index| {
290 standardized_residual(nominal, residuals, feature_index, run_index, config)
291 })
292 .collect::<Vec<_>>()
293 })
294 .collect::<Vec<_>>();
295 let column_means = column_means(&healthy_standardized);
296 let centered_healthy = healthy_standardized
297 .iter()
298 .map(|row| {
299 row.iter()
300 .zip(&column_means)
301 .map(|(value, mean)| *value - *mean)
302 .collect::<Vec<_>>()
303 })
304 .collect::<Vec<_>>();
305
306 let gram = gram_matrix(¢ered_healthy);
307 let (eigenvalues, eigenvectors) =
308 jacobi_eigen_symmetric(&gram, 64 * gram.len().max(1).pow(2), 1.0e-10);
309 let mut components = eigenvalues
310 .iter()
311 .copied()
312 .zip(eigenvectors)
313 .filter(|(eigenvalue, _)| *eigenvalue > config.epsilon)
314 .collect::<Vec<_>>();
315 components
316 .sort_by(|(lhs, _), (rhs, _)| rhs.partial_cmp(lhs).unwrap_or(std::cmp::Ordering::Equal));
317
318 let total_variance = components.iter().map(|(value, _)| *value).sum::<f64>();
319 let mut retained = Vec::new();
320 let mut cumulative_variance = 0.0;
321 if total_variance > config.epsilon {
322 for (eigenvalue, sample_eigenvector) in components {
323 cumulative_variance += eigenvalue;
324 let loading = sample_to_feature_loading(
325 ¢ered_healthy,
326 &sample_eigenvector,
327 eigenvalue,
328 config.epsilon,
329 );
330 retained.push((eigenvalue, loading));
331 if cumulative_variance / total_variance >= config.pca_variance_explained {
332 break;
333 }
334 }
335 }
336
337 let explained_variance_fraction = if total_variance > config.epsilon {
338 retained.iter().map(|(value, _)| *value).sum::<f64>() / total_variance
339 } else {
340 0.0
341 };
342
343 let mut t2 = Vec::with_capacity(run_count);
344 let mut spe = Vec::with_capacity(run_count);
345 for run_index in 0..run_count {
346 let centered = analyzable_feature_indices
347 .iter()
348 .enumerate()
349 .map(|(local_index, &feature_index)| {
350 standardized_residual(nominal, residuals, feature_index, run_index, config)
351 - column_means[local_index]
352 })
353 .collect::<Vec<_>>();
354 let (t2_value, spe_value) = pca_scores(¢ered, &retained, config.epsilon);
355 t2.push(t2_value);
356 spe.push(spe_value);
357 }
358
359 let healthy_t2 = dataset
360 .healthy_pass_indices
361 .iter()
362 .filter_map(|&run_index| t2.get(run_index).copied())
363 .collect::<Vec<_>>();
364 let healthy_spe = dataset
365 .healthy_pass_indices
366 .iter()
367 .filter_map(|&run_index| spe.get(run_index).copied())
368 .collect::<Vec<_>>();
369 let t2_healthy_mean = mean(&healthy_t2).unwrap_or(0.0);
370 let t2_healthy_std = sample_std(&healthy_t2, t2_healthy_mean).unwrap_or(0.0);
371 let spe_healthy_mean = mean(&healthy_spe).unwrap_or(0.0);
372 let spe_healthy_std = sample_std(&healthy_spe, spe_healthy_mean).unwrap_or(0.0);
373 let t2_threshold =
374 t2_healthy_mean + config.pca_t2_sigma_multiplier * t2_healthy_std.max(config.epsilon);
375 let spe_threshold =
376 spe_healthy_mean + config.pca_spe_sigma_multiplier * spe_healthy_std.max(config.epsilon);
377 let alarm = (0..run_count)
378 .map(|run_index| t2[run_index] > t2_threshold || spe[run_index] > spe_threshold)
379 .collect::<Vec<_>>();
380
381 PcaFdcTrace {
382 t2,
383 spe,
384 t2_healthy_mean,
385 t2_healthy_std,
386 spe_healthy_mean,
387 spe_healthy_std,
388 t2_threshold,
389 spe_threshold,
390 analyzable_feature_count: analyzable_feature_indices.len(),
391 healthy_observation_count,
392 retained_components: retained.len(),
393 explained_variance_fraction,
394 target_variance_explained: config.pca_variance_explained,
395 alarm,
396 }
397}
398
399fn standardized_residual(
400 nominal: &NominalModel,
401 residuals: &ResidualSet,
402 feature_index: usize,
403 run_index: usize,
404 config: &PipelineConfig,
405) -> f64 {
406 let sigma = nominal.features[feature_index]
407 .healthy_std
408 .max(config.epsilon);
409 residuals.traces[feature_index].residuals[run_index] / sigma
410}
411
412fn column_means(matrix: &[Vec<f64>]) -> Vec<f64> {
413 if matrix.is_empty() {
414 return Vec::new();
415 }
416 let width = matrix[0].len();
417 let mut means = vec![0.0; width];
418 for row in matrix {
419 for (index, value) in row.iter().enumerate() {
420 means[index] += *value;
421 }
422 }
423 for mean in &mut means {
424 *mean /= matrix.len() as f64;
425 }
426 means
427}
428
429fn gram_matrix(matrix: &[Vec<f64>]) -> Vec<Vec<f64>> {
430 let n = matrix.len();
431 let mut gram = vec![vec![0.0; n]; n];
432 for row_index in 0..n {
433 for col_index in row_index..n {
434 let value = dot(&matrix[row_index], &matrix[col_index]) / (n as f64 - 1.0);
435 gram[row_index][col_index] = value;
436 gram[col_index][row_index] = value;
437 }
438 }
439 gram
440}
441
442fn jacobi_eigen_symmetric(
443 matrix: &[Vec<f64>],
444 max_iterations: usize,
445 tolerance: f64,
446) -> (Vec<f64>, Vec<Vec<f64>>) {
447 let n = matrix.len();
448 if n == 0 {
449 return (Vec::new(), Vec::new());
450 }
451 let mut a = matrix.to_vec();
452 let mut v = vec![vec![0.0; n]; n];
453 for index in 0..n {
454 v[index][index] = 1.0;
455 }
456
457 for _ in 0..max_iterations {
458 let mut p = 0usize;
459 let mut q = 0usize;
460 let mut max_off_diagonal = 0.0_f64;
461 for row in 0..n {
462 for col in (row + 1)..n {
463 let magnitude = a[row][col].abs();
464 if magnitude > max_off_diagonal {
465 max_off_diagonal = magnitude;
466 p = row;
467 q = col;
468 }
469 }
470 }
471 if max_off_diagonal <= tolerance {
472 break;
473 }
474
475 let theta = 0.5 * (2.0 * a[p][q]).atan2(a[q][q] - a[p][p]);
476 let cos_theta = theta.cos();
477 let sin_theta = theta.sin();
478
479 let app = a[p][p];
480 let aqq = a[q][q];
481 let apq = a[p][q];
482 a[p][p] = cos_theta * cos_theta * app - 2.0 * sin_theta * cos_theta * apq
483 + sin_theta * sin_theta * aqq;
484 a[q][q] = sin_theta * sin_theta * app
485 + 2.0 * sin_theta * cos_theta * apq
486 + cos_theta * cos_theta * aqq;
487 a[p][q] = 0.0;
488 a[q][p] = 0.0;
489
490 for k in 0..n {
491 if k == p || k == q {
492 continue;
493 }
494 let akp = a[k][p];
495 let akq = a[k][q];
496 a[k][p] = cos_theta * akp - sin_theta * akq;
497 a[p][k] = a[k][p];
498 a[k][q] = sin_theta * akp + cos_theta * akq;
499 a[q][k] = a[k][q];
500 }
501
502 for row in &mut v {
503 let vip = row[p];
504 let viq = row[q];
505 row[p] = cos_theta * vip - sin_theta * viq;
506 row[q] = sin_theta * vip + cos_theta * viq;
507 }
508 }
509
510 let eigenvalues = (0..n).map(|index| a[index][index]).collect::<Vec<_>>();
511 let eigenvectors = (0..n)
512 .map(|column| v.iter().map(|row| row[column]).collect::<Vec<_>>())
513 .collect::<Vec<_>>();
514 (eigenvalues, eigenvectors)
515}
516
517fn sample_to_feature_loading(
518 centered_healthy: &[Vec<f64>],
519 sample_eigenvector: &[f64],
520 eigenvalue: f64,
521 epsilon: f64,
522) -> Vec<f64> {
523 let singular = (eigenvalue * (centered_healthy.len() as f64 - 1.0))
524 .max(epsilon)
525 .sqrt();
526 let feature_count = centered_healthy.first().map(|row| row.len()).unwrap_or(0);
527 let mut loading = vec![0.0; feature_count];
528 for (sample_index, row) in centered_healthy.iter().enumerate() {
529 let weight = sample_eigenvector[sample_index];
530 for (feature_index, value) in row.iter().enumerate() {
531 loading[feature_index] += value * weight;
532 }
533 }
534 for value in &mut loading {
535 *value /= singular;
536 }
537 let norm = l2_norm(&loading).max(epsilon);
538 for value in &mut loading {
539 *value /= norm;
540 }
541 loading
542}
543
544fn pca_scores(
545 centered: &[f64],
546 retained_components: &[(f64, Vec<f64>)],
547 epsilon: f64,
548) -> (f64, f64) {
549 if retained_components.is_empty() {
550 return (0.0, squared_norm(centered));
551 }
552 let mut reconstructed = vec![0.0; centered.len()];
553 let mut t2 = 0.0;
554 for (eigenvalue, loading) in retained_components {
555 let score = dot(centered, loading);
556 t2 += score * score / eigenvalue.max(epsilon);
557 for (index, value) in loading.iter().enumerate() {
558 reconstructed[index] += score * value;
559 }
560 }
561 let mut residual = vec![0.0; centered.len()];
562 for (index, value) in centered.iter().enumerate() {
563 residual[index] = *value - reconstructed[index];
564 }
565 (t2, squared_norm(&residual))
566}
567
568fn dot(lhs: &[f64], rhs: &[f64]) -> f64 {
569 lhs.iter().zip(rhs).map(|(left, right)| left * right).sum()
570}
571
572fn squared_norm(values: &[f64]) -> f64 {
573 values.iter().map(|value| value * value).sum()
574}
575
576fn l2_norm(values: &[f64]) -> f64 {
577 squared_norm(values).sqrt()
578}
579
580#[cfg(test)]
581mod tests {
582 use super::*;
583
584 #[test]
585 fn ewma_series_matches_recursive_definition() {
586 let ewma = ewma_series(&[1.0, 3.0, 5.0], 0.5);
587 assert_eq!(ewma, vec![1.0, 2.0, 3.5]);
588 }
589
590 #[test]
591 fn positive_cusum_accumulates_only_above_target_plus_kappa() {
592 let cusum = positive_cusum_series(&[1.0, 2.0, 4.0, 3.0, 1.5], 1.0, 0.5);
593 assert_eq!(cusum, vec![0.0, 0.5, 3.0, 4.5, 4.5]);
594 }
595
596 #[test]
597 fn jacobi_eigen_symmetric_recovers_simple_diagonalization() {
598 let matrix = vec![vec![3.0, 1.0], vec![1.0, 3.0]];
599 let (mut eigenvalues, _eigenvectors) = jacobi_eigen_symmetric(&matrix, 64, 1.0e-12);
600 eigenvalues.sort_by(|lhs, rhs| lhs.partial_cmp(rhs).unwrap());
601 assert!((eigenvalues[0] - 2.0).abs() < 1.0e-6);
602 assert!((eigenvalues[1] - 4.0).abs() < 1.0e-6);
603 }
604
605 #[test]
606 fn pca_scores_are_finite_without_retained_components() {
607 let (t2, spe) = pca_scores(&[1.0, -2.0], &[], 1.0e-9);
608 assert_eq!(t2, 0.0);
609 assert_eq!(spe, 5.0);
610 }
611}