fdars_core/alignment/
outlier.rs1use super::karcher::karcher_mean;
7use super::pairwise::{amplitude_distance, elastic_distance, phase_distance_pair};
8use super::robust_karcher::{karcher_median, RobustKarcherConfig};
9use crate::error::FdarError;
10use crate::matrix::FdMatrix;
11
12#[derive(Debug, Clone, PartialEq)]
14pub struct ElasticOutlierConfig {
15 pub lambda: f64,
17 pub alpha: f64,
20 pub use_median: bool,
23}
24
25impl Default for ElasticOutlierConfig {
26 fn default() -> Self {
27 Self {
28 lambda: 0.0,
29 alpha: 0.05,
30 use_median: true,
31 }
32 }
33}
34
35#[derive(Debug, Clone, PartialEq)]
37#[non_exhaustive]
38pub struct ElasticOutlierResult {
39 pub outlier_indices: Vec<usize>,
41 pub distances: Vec<f64>,
43 pub threshold: f64,
45 pub amplitude_distances: Vec<f64>,
47 pub phase_distances: Vec<f64>,
49}
50
51#[must_use = "expensive computation whose result should not be discarded"]
66pub fn elastic_outlier_detection(
67 data: &FdMatrix,
68 argvals: &[f64],
69 config: &ElasticOutlierConfig,
70) -> Result<ElasticOutlierResult, FdarError> {
71 let (n, m) = data.shape();
72
73 if argvals.len() != m {
74 return Err(FdarError::InvalidDimension {
75 parameter: "argvals",
76 expected: format!("{m}"),
77 actual: format!("{}", argvals.len()),
78 });
79 }
80 if n < 2 {
81 return Err(FdarError::InvalidDimension {
82 parameter: "data",
83 expected: "at least 2 rows".to_string(),
84 actual: format!("{n} rows"),
85 });
86 }
87
88 let reference = if config.use_median {
90 let median_config = RobustKarcherConfig {
91 max_iter: 15,
92 tol: 1e-3,
93 lambda: config.lambda,
94 trim_fraction: 0.1,
95 };
96 let result = karcher_median(data, argvals, &median_config)?;
97 result.mean
98 } else {
99 let result = karcher_mean(data, argvals, 15, 1e-3, config.lambda);
100 result.mean
101 };
102
103 let distances: Vec<f64> = (0..n)
105 .map(|i| {
106 let fi = data.row(i);
107 elastic_distance(&reference, &fi, argvals, config.lambda)
108 })
109 .collect();
110
111 let amplitude_distances: Vec<f64> = (0..n)
113 .map(|i| {
114 let fi = data.row(i);
115 amplitude_distance(&reference, &fi, argvals, config.lambda)
116 })
117 .collect();
118
119 let phase_distances: Vec<f64> = (0..n)
120 .map(|i| {
121 let fi = data.row(i);
122 phase_distance_pair(&reference, &fi, argvals, config.lambda)
123 })
124 .collect();
125
126 let threshold = tukey_fence(&distances);
128
129 let outlier_indices: Vec<usize> = (0..n).filter(|&i| distances[i] > threshold).collect();
131
132 Ok(ElasticOutlierResult {
133 outlier_indices,
134 distances,
135 threshold,
136 amplitude_distances,
137 phase_distances,
138 })
139}
140
141fn tukey_fence(values: &[f64]) -> f64 {
143 let n = values.len();
144 if n < 4 {
145 return values.iter().copied().fold(f64::NEG_INFINITY, f64::max) + 1.0;
147 }
148
149 let mut sorted = values.to_vec();
150 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
151
152 let q1 = percentile_sorted(&sorted, 25.0);
153 let q3 = percentile_sorted(&sorted, 75.0);
154 let iqr = q3 - q1;
155
156 q3 + 1.5 * iqr
157}
158
159fn percentile_sorted(sorted: &[f64], pct: f64) -> f64 {
161 let n = sorted.len();
162 if n == 0 {
163 return 0.0;
164 }
165 if n == 1 {
166 return sorted[0];
167 }
168
169 let rank = pct / 100.0 * (n - 1) as f64;
170 let lo = rank.floor() as usize;
171 let hi = rank.ceil() as usize;
172 let frac = rank - lo as f64;
173
174 if lo >= n || hi >= n {
175 sorted[n - 1]
176 } else {
177 sorted[lo] * (1.0 - frac) + sorted[hi] * frac
178 }
179}
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184 use crate::test_helpers::uniform_grid;
185
186 fn make_clean_data(n: usize, m: usize) -> (FdMatrix, Vec<f64>) {
187 let t = uniform_grid(m);
188 let mut data_vec = vec![0.0; n * m];
189 for i in 0..n {
190 let phase = 0.02 * i as f64;
191 for j in 0..m {
192 data_vec[i + j * n] = ((t[j] + phase) * 4.0).sin();
193 }
194 }
195 let data = FdMatrix::from_column_major(data_vec, n, m).unwrap();
196 (data, t)
197 }
198
199 #[test]
200 fn outlier_detection_no_outliers() {
201 let (data, t) = make_clean_data(6, 20);
202 let config = ElasticOutlierConfig::default();
203 let result = elastic_outlier_detection(&data, &t, &config).unwrap();
204
205 assert_eq!(result.distances.len(), 6);
206 assert_eq!(result.amplitude_distances.len(), 6);
207 assert_eq!(result.phase_distances.len(), 6);
208 assert!(result.threshold > 0.0);
209
210 assert!(
212 result.outlier_indices.len() <= 1,
213 "clean data should have at most 1 outlier, got {}",
214 result.outlier_indices.len()
215 );
216 }
217
218 #[test]
219 fn outlier_detection_finds_extreme() {
220 let m = 20;
221 let t = uniform_grid(m);
222 let n = 8;
223 let mut data_vec = vec![0.0; n * m];
224
225 for i in 0..7 {
227 let phase = 0.02 * i as f64;
228 for j in 0..m {
229 data_vec[i + j * n] = ((t[j] + phase) * 4.0).sin();
230 }
231 }
232 for j in 0..m {
234 data_vec[7 + j * n] = (t[j] * 20.0).cos() * 10.0;
235 }
236 let data = FdMatrix::from_column_major(data_vec, n, m).unwrap();
237
238 let config = ElasticOutlierConfig::default();
239 let result = elastic_outlier_detection(&data, &t, &config).unwrap();
240
241 assert!(
243 result.outlier_indices.contains(&7),
244 "should detect curve 7 as outlier, detected: {:?}",
245 result.outlier_indices
246 );
247
248 assert!(
250 result.distances[7] > result.threshold,
251 "outlier distance ({}) should exceed threshold ({})",
252 result.distances[7],
253 result.threshold
254 );
255 }
256
257 #[test]
258 fn outlier_detection_config_default() {
259 let cfg = ElasticOutlierConfig::default();
260 assert!((cfg.lambda - 0.0).abs() < f64::EPSILON);
261 assert!((cfg.alpha - 0.05).abs() < f64::EPSILON);
262 assert!(cfg.use_median);
263 }
264}