1use crate::depth::band::{modified_band_1d, modified_epigraph_index_1d};
7use crate::error::FdarError;
8use crate::iter_maybe_parallel;
9use crate::matrix::FdMatrix;
10use crate::streaming_depth::{SortedReferenceState, StreamingDepth, StreamingFraimanMuniz};
11use rand::prelude::*;
12use rand_distr::StandardNormal;
13#[cfg(feature = "parallel")]
14use rayon::iter::ParallelIterator;
15
16fn compute_trimmed_stats(data: &FdMatrix, depths: &[f64], n_keep: usize) -> (Vec<f64>, Vec<f64>) {
20 let m = data.ncols();
21
22 let mut depth_idx: Vec<(usize, f64)> =
23 depths.iter().enumerate().map(|(i, &d)| (i, d)).collect();
24 if n_keep < depth_idx.len() {
26 depth_idx.select_nth_unstable_by(n_keep - 1, |a, b| {
27 b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)
28 });
29 }
30 let keep_idx: Vec<usize> = depth_idx[..n_keep].iter().map(|(i, _)| *i).collect();
31
32 let results: Vec<(f64, f64)> = iter_maybe_parallel!(0..m)
33 .map(|j| {
34 let mut mean_j = 0.0;
35 for &i in &keep_idx {
36 mean_j += data[(i, j)];
37 }
38 mean_j /= n_keep as f64;
39
40 let mut var_j = 0.0;
41 for &i in &keep_idx {
42 let diff = data[(i, j)] - mean_j;
43 var_j += diff * diff;
44 }
45 var_j /= n_keep as f64;
46 var_j = var_j.max(1e-10);
47
48 (mean_j, var_j)
49 })
50 .collect();
51
52 let trimmed_mean: Vec<f64> = results.iter().map(|&(m, _)| m).collect();
53 let trimmed_var: Vec<f64> = results.iter().map(|&(_, v)| v).collect();
54
55 (trimmed_mean, trimmed_var)
56}
57
58fn normalized_distance(
60 data: &FdMatrix,
61 i: usize,
62 trimmed_mean: &[f64],
63 trimmed_var: &[f64],
64) -> f64 {
65 let m = data.ncols();
66 let mut dist = 0.0;
67 for j in 0..m {
68 let diff = data[(i, j)] - trimmed_mean[j];
69 dist += diff * diff / trimmed_var[j];
70 }
71 (dist / m as f64).sqrt()
72}
73
74#[must_use = "expensive computation whose result should not be discarded"]
87pub fn outliers_threshold_lrt(
88 data: &FdMatrix,
89 nb: usize,
90 smo: f64,
91 trim: f64,
92 seed: u64,
93 percentile: f64,
94) -> f64 {
95 outliers_threshold_lrt_with_dist(data, nb, smo, trim, seed, percentile).0
96}
97
98#[must_use = "expensive computation whose result should not be discarded"]
116pub fn outliers_threshold_lrt_with_dist(
117 data: &FdMatrix,
118 nb: usize,
119 smo: f64,
120 trim: f64,
121 seed: u64,
122 percentile: f64,
123) -> (f64, Vec<f64>) {
124 let n = data.nrows();
125 let m = data.ncols();
126
127 if n < 3 || m == 0 {
128 return (0.0, vec![]);
129 }
130
131 let n_keep = ((1.0 - trim) * n as f64).ceil().max(1.0) as usize;
132 let n_keep = n_keep.min(n);
133
134 let col_vars: Vec<f64> = iter_maybe_parallel!(0..m)
136 .map(|j| {
137 let mut sum = 0.0;
138 let mut sum_sq = 0.0;
139 for i in 0..n {
140 let val = data[(i, j)];
141 sum += val;
142 sum_sq += val * val;
143 }
144 let mean = sum / n as f64;
145 let var = sum_sq / n as f64 - mean * mean;
147 var.max(0.0).sqrt()
148 })
149 .collect();
150
151 let max_dists: Vec<f64> = iter_maybe_parallel!(0..nb)
153 .map(|b| {
154 let mut rng = StdRng::seed_from_u64(seed.wrapping_add(b as u64));
155
156 let indices: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
158 let noise_vals: Vec<f64> = (0..n * m)
160 .map(|_| rng.sample::<f64, _>(StandardNormal))
161 .collect();
162 let mut boot_data = FdMatrix::zeros(n, m);
163 for j in 0..m {
165 let smo_var = smo * col_vars[j];
166 for (new_i, &old_i) in indices.iter().enumerate() {
167 let noise = noise_vals[new_i * m + j] * smo_var;
168 boot_data[(new_i, j)] = data[(old_i, j)] + noise;
169 }
170 }
171
172 let state = SortedReferenceState::from_reference(&boot_data);
174 let streaming_fm = StreamingFraimanMuniz::new(state, true);
175 let depths = streaming_fm.depth_batch(&boot_data);
176 let (trimmed_mean, trimmed_var) = compute_trimmed_stats(&boot_data, &depths, n_keep);
177
178 (0..n)
180 .map(|i| normalized_distance(&boot_data, i, &trimmed_mean, &trimmed_var))
181 .fold(0.0_f64, f64::max)
182 })
183 .collect();
184
185 let mut sorted_dists = max_dists;
187 crate::helpers::sort_nan_safe(&mut sorted_dists);
188 let idx =
189 crate::utility::f64_to_usize_clamped(nb as f64 * percentile).min(nb.saturating_sub(1));
190 let threshold = sorted_dists.get(idx).copied().unwrap_or(0.0);
191 (threshold, sorted_dists)
192}
193
194#[must_use = "expensive computation whose result should not be discarded"]
204pub fn detect_outliers_lrt(data: &FdMatrix, threshold: f64, trim: f64) -> Vec<bool> {
228 let n = data.nrows();
229 let m = data.ncols();
230
231 if n < 3 || m == 0 {
232 return vec![false; n];
233 }
234
235 let n_keep = ((1.0 - trim) * n as f64).ceil().max(1.0) as usize;
236 let n_keep = n_keep.min(n);
237
238 let state = SortedReferenceState::from_reference(data);
239 let streaming_fm = StreamingFraimanMuniz::new(state, true);
240 let depths = streaming_fm.depth_batch(data);
241 let (trimmed_mean, trimmed_var) = compute_trimmed_stats(data, &depths, n_keep);
242
243 iter_maybe_parallel!(0..n)
244 .map(|i| normalized_distance(data, i, &trimmed_mean, &trimmed_var) > threshold)
245 .collect()
246}
247
248#[derive(Debug, Clone, PartialEq)]
250#[non_exhaustive]
251pub struct OutligramResult {
252 pub mei: Vec<f64>,
254 pub mbd: Vec<f64>,
256 pub a0: f64,
258 pub a1: f64,
259 pub a2: f64,
260 pub threshold: f64,
262 pub outlier_flags: Vec<bool>,
264}
265
266pub fn outliergram(data: &FdMatrix, factor: f64) -> Result<OutligramResult, FdarError> {
279 let n = data.nrows();
280 if n < 3 {
281 return Err(FdarError::InvalidDimension {
282 parameter: "data",
283 expected: "at least 3 rows".to_string(),
284 actual: format!("{n} rows"),
285 });
286 }
287
288 let mei = modified_epigraph_index_1d(data, data);
289 let mbd = modified_band_1d(data, data);
290
291 let mut xtx = [[0.0; 3]; 3];
294 let mut xty = [0.0; 3];
295 for i in 0..n {
296 let x = [1.0, mei[i], mei[i] * mei[i]];
297 for r in 0..3 {
298 for c in 0..3 {
299 xtx[r][c] += x[r] * x[c];
300 }
301 xty[r] += x[r] * mbd[i];
302 }
303 }
304
305 let (a0, a1, a2) = solve_3x3(xtx, xty);
307
308 let residuals: Vec<f64> = (0..n)
310 .map(|i| mbd[i] - (a0 + a1 * mei[i] + a2 * mei[i] * mei[i]))
311 .collect();
312
313 let mut sorted_resid = residuals.clone();
315 sorted_resid.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
316 let q1 = sorted_resid[n / 4];
317 let q3 = sorted_resid[3 * n / 4];
318 let iqr = q3 - q1;
319 let threshold = q1 - factor * iqr;
320
321 let outlier_flags: Vec<bool> = residuals.iter().map(|&r| r < threshold).collect();
322
323 Ok(OutligramResult {
324 mei,
325 mbd,
326 a0,
327 a1,
328 a2,
329 threshold,
330 outlier_flags,
331 })
332}
333
334#[derive(Debug, Clone, PartialEq)]
336#[non_exhaustive]
337pub struct MagnitudeShapeResult {
338 pub magnitude: Vec<f64>,
340 pub shape: Vec<f64>,
342}
343
344pub fn magnitude_shape_outlyingness(data: &FdMatrix) -> Result<MagnitudeShapeResult, FdarError> {
353 let (n, m) = data.shape();
354 if n < 2 || m == 0 {
355 return Err(FdarError::InvalidDimension {
356 parameter: "data",
357 expected: "at least 2 rows and 1 column".to_string(),
358 actual: format!("{n} rows, {m} columns"),
359 });
360 }
361
362 let mbd = modified_band_1d(data, data);
364 let magnitude: Vec<f64> = mbd.iter().map(|&d| 1.0 - d).collect();
365
366 let mut col_means = vec![0.0; m];
369 for j in 0..m {
370 for i in 0..n {
371 col_means[j] += data[(i, j)];
372 }
373 col_means[j] /= n as f64;
374 }
375
376 let mut directions = vec![vec![0.0; m]; n];
378 for i in 0..n {
379 let mut norm_sq = 0.0;
380 for j in 0..m {
381 let c = data[(i, j)] - col_means[j];
382 directions[i][j] = c;
383 norm_sq += c * c;
384 }
385 let norm = norm_sq.sqrt().max(1e-15);
386 for j in 0..m {
387 directions[i][j] /= norm;
388 }
389 }
390
391 let mut mean_dir = vec![0.0; m];
393 for i in 0..n {
394 for j in 0..m {
395 mean_dir[j] += directions[i][j];
396 }
397 }
398 let mut mean_norm_sq = 0.0;
399 for j in 0..m {
400 mean_dir[j] /= n as f64;
401 mean_norm_sq += mean_dir[j] * mean_dir[j];
402 }
403 let mean_norm = mean_norm_sq.sqrt().max(1e-15);
404 for j in 0..m {
405 mean_dir[j] /= mean_norm;
406 }
407
408 let shape: Vec<f64> = (0..n)
410 .map(|i| {
411 let dist_sq: f64 = (0..m)
412 .map(|j| {
413 let d = directions[i][j] - mean_dir[j];
414 d * d
415 })
416 .sum();
417 dist_sq.sqrt()
418 })
419 .collect();
420
421 Ok(MagnitudeShapeResult { magnitude, shape })
422}
423
424fn solve_3x3(a: [[f64; 3]; 3], b: [f64; 3]) -> (f64, f64, f64) {
426 let det = a[0][0] * (a[1][1] * a[2][2] - a[1][2] * a[2][1])
427 - a[0][1] * (a[1][0] * a[2][2] - a[1][2] * a[2][0])
428 + a[0][2] * (a[1][0] * a[2][1] - a[1][1] * a[2][0]);
429 if det.abs() < 1e-15 {
430 return (0.0, 0.0, 0.0);
431 }
432
433 let det_x = b[0] * (a[1][1] * a[2][2] - a[1][2] * a[2][1])
434 - a[0][1] * (b[1] * a[2][2] - a[1][2] * b[2])
435 + a[0][2] * (b[1] * a[2][1] - a[1][1] * b[2]);
436
437 let det_y = a[0][0] * (b[1] * a[2][2] - a[1][2] * b[2])
438 - b[0] * (a[1][0] * a[2][2] - a[1][2] * a[2][0])
439 + a[0][2] * (a[1][0] * b[2] - b[1] * a[2][0]);
440
441 let det_z = a[0][0] * (a[1][1] * b[2] - b[1] * a[2][1])
442 - a[0][1] * (a[1][0] * b[2] - b[1] * a[2][0])
443 + b[0] * (a[1][0] * a[2][1] - a[1][1] * a[2][0]);
444
445 (det_x / det, det_y / det, det_z / det)
446}
447
448#[cfg(test)]
449mod tests {
450 use super::*;
451 use std::f64::consts::PI;
452
453 fn generate_normal_fdata(n: usize, m: usize, seed: u64) -> FdMatrix {
455 let mut rng = StdRng::seed_from_u64(seed);
456 let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
457
458 let mut data = FdMatrix::zeros(n, m);
459 for i in 0..n {
460 let phase: f64 = rng.gen::<f64>() * 0.2;
461 let amp: f64 = 1.0 + rng.gen::<f64>() * 0.1;
462 for j in 0..m {
463 let noise: f64 = rng.sample::<f64, _>(StandardNormal) * 0.05;
464 data[(i, j)] = amp * (2.0 * PI * t[j] + phase).sin() + noise;
465 }
466 }
467 data
468 }
469
470 fn generate_data_with_outlier(n: usize, m: usize, n_outliers: usize) -> FdMatrix {
472 let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
473
474 let mut data = FdMatrix::zeros(n, m);
475
476 for i in 0..(n - n_outliers) {
478 for j in 0..m {
479 data[(i, j)] = (2.0 * PI * t[j]).sin();
480 }
481 }
482
483 for i in (n - n_outliers)..n {
485 for j in 0..m {
486 data[(i, j)] = (2.0 * PI * t[j]).sin() + 10.0;
487 }
488 }
489
490 data
491 }
492
493 #[test]
496 fn test_outliers_threshold_lrt_returns_positive() {
497 let n = 20;
498 let m = 30;
499 let data = generate_normal_fdata(n, m, 42);
500
501 let threshold = outliers_threshold_lrt(&data, 50, 0.1, 0.1, 42, 0.95);
502
503 assert!(threshold > 0.0, "Threshold should be positive");
504 }
505
506 #[test]
507 fn test_outliers_threshold_lrt_deterministic() {
508 let n = 15;
509 let m = 25;
510 let data = generate_normal_fdata(n, m, 42);
511
512 let t1 = outliers_threshold_lrt(&data, 30, 0.1, 0.1, 123, 0.95);
513 let t2 = outliers_threshold_lrt(&data, 30, 0.1, 0.1, 123, 0.95);
514
515 assert!(
516 (t1 - t2).abs() < 1e-10,
517 "Same seed should give same threshold"
518 );
519 }
520
521 #[test]
522 fn test_outliers_threshold_lrt_percentile_effect() {
523 let n = 20;
524 let m = 30;
525 let data = generate_normal_fdata(n, m, 42);
526
527 let t_low = outliers_threshold_lrt(&data, 50, 0.1, 0.1, 42, 0.50);
528 let t_high = outliers_threshold_lrt(&data, 50, 0.1, 0.1, 42, 0.99);
529
530 assert!(
531 t_high >= t_low,
532 "Higher percentile should give higher or equal threshold"
533 );
534 }
535
536 #[test]
537 fn test_outliers_threshold_lrt_invalid_input() {
538 let data = FdMatrix::zeros(2, 30);
540 let threshold = outliers_threshold_lrt(&data, 50, 0.1, 0.1, 42, 0.95);
541 assert!(threshold.abs() < 1e-10, "Should return 0 for n < 3");
542
543 let data = FdMatrix::zeros(10, 0);
545 let threshold = outliers_threshold_lrt(&data, 50, 0.1, 0.1, 42, 0.95);
546 assert!(threshold.abs() < 1e-10);
547 }
548
549 #[test]
552 fn test_detect_outliers_lrt_finds_obvious_outlier() {
553 let n = 20;
554 let m = 30;
555 let data = generate_data_with_outlier(n, m, 1);
556
557 let outliers = detect_outliers_lrt(&data, 3.0, 0.1);
559
560 assert_eq!(outliers.len(), n);
561
562 assert!(outliers[n - 1], "Obvious outlier should be detected");
564
565 let n_detected: usize = outliers.iter().filter(|&&x| x).count();
567 assert!(n_detected <= 3, "Should not detect too many outliers");
568 }
569
570 #[test]
571 fn test_detect_outliers_lrt_homogeneous_data() {
572 let n = 20;
573 let m = 30;
574 let data = generate_normal_fdata(n, m, 42);
575
576 let outliers = detect_outliers_lrt(&data, 100.0, 0.1);
578
579 let n_detected: usize = outliers.iter().filter(|&&x| x).count();
580 assert_eq!(
581 n_detected, 0,
582 "Very high threshold should detect no outliers"
583 );
584 }
585
586 #[test]
587 fn test_detect_outliers_lrt_threshold_effect() {
588 let n = 20;
589 let m = 30;
590 let data = generate_data_with_outlier(n, m, 3);
591
592 let low_thresh = detect_outliers_lrt(&data, 2.0, 0.1);
593 let high_thresh = detect_outliers_lrt(&data, 10.0, 0.1);
594
595 let n_low: usize = low_thresh.iter().filter(|&&x| x).count();
596 let n_high: usize = high_thresh.iter().filter(|&&x| x).count();
597
598 assert!(
599 n_low >= n_high,
600 "Lower threshold should detect more or equal outliers"
601 );
602 }
603
604 #[test]
605 fn test_detect_outliers_lrt_invalid_input() {
606 let data = FdMatrix::zeros(2, 30);
608 let outliers = detect_outliers_lrt(&data, 3.0, 0.1);
609 assert_eq!(outliers.len(), 2);
610 assert!(
611 outliers.iter().all(|&x| !x),
612 "Should return all false for n < 3"
613 );
614 }
615
616 #[test]
617 fn test_identical_data_outliers() {
618 let n = 10;
619 let m = 20;
620 let data = FdMatrix::from_column_major(vec![1.0; n * m], n, m).unwrap();
621 let flags = detect_outliers_lrt(&data, 1.0, 0.15);
622 assert_eq!(flags.len(), n);
623 for &f in &flags {
625 assert!(!f);
626 }
627 }
628
629 #[test]
630 fn test_n3_minimal_outliers() {
631 let n = 3;
633 let m = 10;
634 let mut data_vec = vec![0.0; n * m];
635 for j in 0..m {
637 data_vec[j * n] = 0.0;
638 data_vec[1 + j * n] = 0.1;
639 data_vec[2 + j * n] = 100.0;
640 }
641 let data = FdMatrix::from_column_major(data_vec, n, m).unwrap();
642 let flags = detect_outliers_lrt(&data, 0.5, 0.15);
643 assert_eq!(flags.len(), n);
644 }
645
646 #[test]
649 fn test_with_dist_returns_sorted_distribution() {
650 let data = generate_normal_fdata(20, 30, 42);
651 let nb = 50;
652 let (threshold, dist) = outliers_threshold_lrt_with_dist(&data, nb, 0.1, 0.1, 42, 0.95);
653
654 assert_eq!(dist.len(), nb, "Distribution length should equal nb");
655 for w in dist.windows(2) {
656 assert!(w[0] <= w[1], "Distribution should be sorted");
657 }
658 let idx = ((nb as f64 * 0.95) as usize).min(nb - 1);
659 assert!(
660 (threshold - dist[idx]).abs() < 1e-10,
661 "Threshold should match distribution at percentile index"
662 );
663 }
664
665 #[test]
666 fn test_with_dist_matches_scalar() {
667 let data = generate_normal_fdata(15, 25, 99);
668 let scalar = outliers_threshold_lrt(&data, 40, 0.1, 0.1, 123, 0.95);
669 let (with_dist, _) = outliers_threshold_lrt_with_dist(&data, 40, 0.1, 0.1, 123, 0.95);
670 assert!(
671 (scalar - with_dist).abs() < 1e-10,
672 "Scalar version should match with_dist version"
673 );
674 }
675
676 #[test]
677 fn test_bootstrap_dist_enables_pvalue() {
678 let n = 20;
679 let m = 30;
680 let data = generate_data_with_outlier(n, m, 1);
681 let trim = 0.1;
682
683 let (_, dist) = outliers_threshold_lrt_with_dist(&data, 200, 0.1, trim, 42, 0.99);
684 let nb = dist.len();
685
686 let n_keep = ((1.0 - trim) * n as f64).ceil() as usize;
688 let state = SortedReferenceState::from_reference(&data);
689 let streaming_fm = StreamingFraimanMuniz::new(state, true);
690 let depths = streaming_fm.depth_batch(&data);
691 let (tmean, tvar) = compute_trimmed_stats(&data, &depths, n_keep);
692
693 let d_outlier = normalized_distance(&data, n - 1, &tmean, &tvar);
695 let p_outlier =
696 (dist.iter().filter(|&&v| v >= d_outlier).count() as f64 + 1.0) / (nb as f64 + 1.0);
697
698 let d_normal = normalized_distance(&data, 0, &tmean, &tvar);
700 let p_normal =
701 (dist.iter().filter(|&&v| v >= d_normal).count() as f64 + 1.0) / (nb as f64 + 1.0);
702
703 assert!(
704 p_outlier < 0.05,
705 "Outlier should have small p-value, got {p_outlier}"
706 );
707 assert!(
708 p_normal > 0.05,
709 "Normal curve should have large p-value, got {p_normal}"
710 );
711 }
712
713 #[test]
714 fn test_with_dist_invalid_input() {
715 let data = FdMatrix::zeros(2, 30);
716 let (threshold, dist) = outliers_threshold_lrt_with_dist(&data, 50, 0.1, 0.1, 42, 0.95);
717 assert!(threshold.abs() < 1e-10);
718 assert!(dist.is_empty(), "Should return empty dist for n < 3");
719 }
720
721 #[test]
722 fn test_all_false_high_threshold() {
723 let n = 10;
724 let m = 20;
725 let data_vec: Vec<f64> = (0..n * m).map(|i| (i as f64 * 0.1).sin()).collect();
726 let data = FdMatrix::from_column_major(data_vec, n, m).unwrap();
727 let flags = detect_outliers_lrt(&data, 1e10, 0.15);
729 for &f in &flags {
730 assert!(!f, "High threshold should produce no outliers");
731 }
732 }
733
734 #[test]
737 fn test_trim_zero_no_trimming() {
738 let data = generate_normal_fdata(10, 20, 42);
740 let threshold = outliers_threshold_lrt(&data, 30, 0.1, 0.0, 42, 0.95);
741 assert!(threshold > 0.0);
742 let flags = detect_outliers_lrt(&data, threshold, 0.0);
743 assert_eq!(flags.len(), 10);
744 }
745
746 #[test]
747 fn test_trim_near_one_heavy_trimming() {
748 let data = generate_normal_fdata(10, 20, 42);
750 let threshold = outliers_threshold_lrt(&data, 30, 0.1, 0.9, 42, 0.95);
751 assert!(threshold >= 0.0);
752 let flags = detect_outliers_lrt(&data, threshold, 0.9);
753 assert_eq!(flags.len(), 10);
754 }
755
756 #[test]
757 fn test_trim_one_clamps_to_one() {
758 let data = generate_normal_fdata(10, 20, 42);
760 let threshold = outliers_threshold_lrt(&data, 30, 0.1, 1.0, 42, 0.95);
761 assert!(threshold >= 0.0);
762 let flags = detect_outliers_lrt(&data, threshold, 1.0);
763 assert_eq!(flags.len(), 10);
764 }
765
766 #[test]
767 fn test_trim_negative_clamps_to_n() {
768 let data = generate_normal_fdata(10, 20, 42);
770 let threshold = outliers_threshold_lrt(&data, 30, 0.1, -0.5, 42, 0.95);
771 assert!(threshold > 0.0);
772 let flags = detect_outliers_lrt(&data, threshold, -0.5);
773 assert_eq!(flags.len(), 10);
774 }
775
776 #[test]
779 fn test_smo_zero_no_noise() {
780 let data = generate_normal_fdata(10, 20, 42);
782 let (threshold, dist) = outliers_threshold_lrt_with_dist(&data, 30, 0.0, 0.1, 42, 0.95);
783 assert!(threshold > 0.0);
784 assert_eq!(dist.len(), 30);
785 }
786
787 #[test]
788 fn test_nb_zero_empty_bootstrap() {
789 let data = generate_normal_fdata(10, 20, 42);
790 let (threshold, dist) = outliers_threshold_lrt_with_dist(&data, 0, 0.1, 0.1, 42, 0.95);
791 assert!(threshold.abs() < 1e-10);
792 assert!(dist.is_empty());
793 }
794
795 #[test]
796 fn test_nb_one_single_bootstrap() {
797 let data = generate_normal_fdata(10, 20, 42);
798 let (threshold, dist) = outliers_threshold_lrt_with_dist(&data, 1, 0.1, 0.1, 42, 0.95);
799 assert_eq!(dist.len(), 1);
800 assert!((threshold - dist[0]).abs() < 1e-10);
802 }
803
804 #[test]
805 fn test_percentile_zero_returns_minimum() {
806 let data = generate_normal_fdata(15, 20, 42);
807 let nb = 50;
808 let (_, dist) = outliers_threshold_lrt_with_dist(&data, nb, 0.1, 0.1, 42, 0.95);
809 let t_zero = outliers_threshold_lrt(&data, nb, 0.1, 0.1, 42, 0.0);
810 assert!(
811 (t_zero - dist[0]).abs() < 1e-10,
812 "percentile=0 should return the minimum of the distribution"
813 );
814 }
815
816 #[test]
817 fn test_percentile_one_returns_maximum() {
818 let data = generate_normal_fdata(15, 20, 42);
819 let nb = 50;
820 let (_, dist) = outliers_threshold_lrt_with_dist(&data, nb, 0.1, 0.1, 42, 0.95);
821 let t_one = outliers_threshold_lrt(&data, nb, 0.1, 0.1, 42, 1.0);
822 assert!(
823 (t_one - *dist.last().unwrap()).abs() < 1e-10,
824 "percentile=1 should return the maximum of the distribution"
825 );
826 }
827
828 #[test]
831 fn test_distribution_values_non_negative() {
832 let data = generate_normal_fdata(15, 20, 42);
833 let (_, dist) = outliers_threshold_lrt_with_dist(&data, 50, 0.1, 0.1, 42, 0.95);
834 for &v in &dist {
835 assert!(v >= 0.0, "Max-distances must be non-negative, got {v}");
836 }
837 }
838
839 #[test]
842 fn test_detect_m_zero_returns_all_false() {
843 let data = FdMatrix::zeros(10, 0);
844 let flags = detect_outliers_lrt(&data, 3.0, 0.1);
845 assert_eq!(flags.len(), 10);
846 assert!(flags.iter().all(|&f| !f));
847 }
848
849 #[test]
850 fn test_detect_multiple_outliers() {
851 let data = generate_data_with_outlier(20, 30, 3);
852 let flags = detect_outliers_lrt(&data, 3.0, 0.1);
853 let outlier_count = flags[17..20].iter().filter(|&&x| x).count();
855 assert!(
856 outlier_count >= 2,
857 "At least 2 of 3 outliers should be detected, got {outlier_count}"
858 );
859 }
860
861 #[test]
864 fn test_end_to_end_threshold_then_detect() {
865 let data = generate_data_with_outlier(20, 30, 2);
866 let threshold = outliers_threshold_lrt(&data, 100, 0.1, 0.1, 42, 0.99);
867 let flags = detect_outliers_lrt(&data, threshold, 0.1);
868
869 assert!(
871 flags[18] || flags[19],
872 "At least one outlier should be detected in end-to-end flow"
873 );
874 let false_positives = flags[..18].iter().filter(|&&x| x).count();
876 assert!(
877 false_positives <= 2,
878 "False positive count should be low, got {false_positives}"
879 );
880 }
881
882 #[test]
883 fn test_end_to_end_with_dist_pvalues_all_curves() {
884 let n = 25;
886 let m = 30;
887 let data = generate_data_with_outlier(n, m, 2);
888 let trim = 0.1;
889
890 let (_, dist) = outliers_threshold_lrt_with_dist(&data, 200, 0.1, trim, 42, 0.99);
891 let nb = dist.len();
892
893 let n_keep = ((1.0 - trim) * n as f64).ceil().max(1.0) as usize;
894 let n_keep = n_keep.min(n);
895 let state = SortedReferenceState::from_reference(&data);
896 let streaming_fm = StreamingFraimanMuniz::new(state, true);
897 let depths = streaming_fm.depth_batch(&data);
898 let (tmean, tvar) = compute_trimmed_stats(&data, &depths, n_keep);
899
900 let pvalues: Vec<f64> = (0..n)
902 .map(|i| {
903 let d = normalized_distance(&data, i, &tmean, &tvar);
904 (dist.iter().filter(|&&v| v >= d).count() as f64 + 1.0) / (nb as f64 + 1.0)
905 })
906 .collect();
907
908 let normal_small_p = pvalues[..23].iter().filter(|&&p| p < 0.01).count();
910 assert_eq!(
911 normal_small_p, 0,
912 "Normal curves should not have tiny p-values"
913 );
914
915 for &i in &[23, 24] {
917 assert!(
918 pvalues[i] < 0.05,
919 "Outlier curve {i} should have small p-value, got {}",
920 pvalues[i]
921 );
922 }
923 }
924
925 fn outliergram_test_data() -> FdMatrix {
928 let n = 20;
930 let m = 30;
931 let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
932 let mut vals = vec![0.0; n * m];
933 for i in 0..n {
934 for (j, &tj) in t.iter().enumerate() {
935 let base = tj.sin();
936 vals[i + j * n] = if i < 18 {
937 base + 0.1 * (i as f64 * 0.5).sin()
938 } else {
939 base + 2.0 * (if i == 18 { 1.0 } else { -1.0 })
941 };
942 }
943 }
944 FdMatrix::from_column_major(vals, n, m).unwrap()
945 }
946
947 #[test]
948 fn outliergram_runs() {
949 let data = outliergram_test_data();
950 let result = outliergram(&data, 1.5).unwrap();
951 assert_eq!(result.mei.len(), 20);
952 assert_eq!(result.mbd.len(), 20);
953 assert_eq!(result.outlier_flags.len(), 20);
954 let central_mbd: f64 = result.mbd[..18].iter().sum::<f64>() / 18.0;
956 assert!(result.mbd[18] < central_mbd || result.mbd[19] < central_mbd);
957 }
958
959 #[test]
960 fn outliergram_parabola_coefficients() {
961 let data = outliergram_test_data();
962 let result = outliergram(&data, 1.5).unwrap();
963 assert!(result.a0.is_finite());
966 assert!(result.a1.is_finite());
967 assert!(result.a2.is_finite());
968 }
969
970 #[test]
971 fn magnitude_shape_dimensions() {
972 let data = outliergram_test_data();
973 let result = magnitude_shape_outlyingness(&data).unwrap();
974 assert_eq!(result.magnitude.len(), 20);
975 assert_eq!(result.shape.len(), 20);
976 assert!(result.magnitude.iter().all(|&v| v >= 0.0));
978 assert!(result.shape.iter().all(|&v| v >= 0.0));
979 }
980
981 #[test]
982 fn magnitude_outliers_have_high_magnitude() {
983 let data = outliergram_test_data();
984 let result = magnitude_shape_outlyingness(&data).unwrap();
985 let central_mag: f64 = result.magnitude[..18].iter().sum::<f64>() / 18.0;
986 assert!(result.magnitude[18] > central_mag || result.magnitude[19] > central_mag);
988 }
989
990 #[test]
991 fn outliergram_too_few_curves() {
992 let data = FdMatrix::from_column_major(vec![1.0, 2.0, 3.0, 4.0], 2, 2).unwrap();
993 assert!(outliergram(&data, 1.5).is_err());
994 }
995}