Skip to main content

fdars_core/alignment/
outlier.rs

1//! SRVF-based outlier detection using elastic distances.
2//!
3//! Detects outlier curves by computing elastic distances from a reference
4//! (Karcher mean or median) and applying the Tukey fence rule.
5
6use super::karcher::karcher_mean;
7use super::pairwise::{amplitude_distance, elastic_distance, phase_distance_pair};
8use super::robust_karcher::{karcher_median, RobustKarcherConfig};
9use crate::error::FdarError;
10use crate::matrix::FdMatrix;
11
12/// Configuration for elastic outlier detection.
13#[derive(Debug, Clone, PartialEq)]
14pub struct ElasticOutlierConfig {
15    /// Roughness penalty for elastic alignment (0.0 = no penalty).
16    pub lambda: f64,
17    /// Significance level (controls threshold sensitivity; currently used
18    /// to document intent — the actual threshold uses the Tukey fence).
19    pub alpha: f64,
20    /// If `true`, use the Karcher median as reference (more robust).
21    /// If `false`, use the Karcher mean.
22    pub use_median: bool,
23}
24
25impl Default for ElasticOutlierConfig {
26    fn default() -> Self {
27        Self {
28            lambda: 0.0,
29            alpha: 0.05,
30            use_median: true,
31        }
32    }
33}
34
35/// Result of elastic outlier detection.
36#[derive(Debug, Clone, PartialEq)]
37#[non_exhaustive]
38pub struct ElasticOutlierResult {
39    /// Indices of detected outlier curves.
40    pub outlier_indices: Vec<usize>,
41    /// Total elastic distance from each curve to the reference (length n).
42    pub distances: Vec<f64>,
43    /// Cutoff distance (Tukey fence: Q3 + 1.5 * IQR).
44    pub threshold: f64,
45    /// Amplitude component of elastic distance for each curve (length n).
46    pub amplitude_distances: Vec<f64>,
47    /// Phase component of elastic distance for each curve (length n).
48    pub phase_distances: Vec<f64>,
49}
50
51/// Detect outlier curves using elastic distances and the Tukey fence.
52///
53/// Computes a reference curve (Karcher mean or median), then measures the
54/// elastic distance from each curve to the reference. Curves exceeding the
55/// Tukey fence threshold (Q3 + 1.5 * IQR) are flagged as outliers.
56///
57/// # Arguments
58/// * `data`    — Functional data matrix (n x m).
59/// * `argvals` — Evaluation points (length m).
60/// * `config`  — Configuration parameters.
61///
62/// # Errors
63/// Returns [`FdarError::InvalidDimension`] if `argvals` length does not match `m`
64/// or `n < 2`.
65#[must_use = "expensive computation whose result should not be discarded"]
66pub fn elastic_outlier_detection(
67    data: &FdMatrix,
68    argvals: &[f64],
69    config: &ElasticOutlierConfig,
70) -> Result<ElasticOutlierResult, FdarError> {
71    let (n, m) = data.shape();
72
73    if argvals.len() != m {
74        return Err(FdarError::InvalidDimension {
75            parameter: "argvals",
76            expected: format!("{m}"),
77            actual: format!("{}", argvals.len()),
78        });
79    }
80    if n < 2 {
81        return Err(FdarError::InvalidDimension {
82            parameter: "data",
83            expected: "at least 2 rows".to_string(),
84            actual: format!("{n} rows"),
85        });
86    }
87
88    // Step 1: Compute reference curve.
89    let reference = if config.use_median {
90        let median_config = RobustKarcherConfig {
91            max_iter: 15,
92            tol: 1e-3,
93            lambda: config.lambda,
94            trim_fraction: 0.1,
95        };
96        let result = karcher_median(data, argvals, &median_config)?;
97        result.mean
98    } else {
99        let result = karcher_mean(data, argvals, 15, 1e-3, config.lambda);
100        result.mean
101    };
102
103    // Step 2: Compute elastic distances from reference.
104    let distances: Vec<f64> = (0..n)
105        .map(|i| {
106            let fi = data.row(i);
107            elastic_distance(&reference, &fi, argvals, config.lambda)
108        })
109        .collect();
110
111    // Step 3: Compute amplitude and phase distances.
112    let amplitude_distances: Vec<f64> = (0..n)
113        .map(|i| {
114            let fi = data.row(i);
115            amplitude_distance(&reference, &fi, argvals, config.lambda)
116        })
117        .collect();
118
119    let phase_distances: Vec<f64> = (0..n)
120        .map(|i| {
121            let fi = data.row(i);
122            phase_distance_pair(&reference, &fi, argvals, config.lambda)
123        })
124        .collect();
125
126    // Step 4: Tukey fence on total distances.
127    let threshold = tukey_fence(&distances);
128
129    // Step 5: Identify outliers.
130    let outlier_indices: Vec<usize> = (0..n).filter(|&i| distances[i] > threshold).collect();
131
132    Ok(ElasticOutlierResult {
133        outlier_indices,
134        distances,
135        threshold,
136        amplitude_distances,
137        phase_distances,
138    })
139}
140
141/// Compute the Tukey fence threshold: Q3 + 1.5 * IQR.
142fn tukey_fence(values: &[f64]) -> f64 {
143    let n = values.len();
144    if n < 4 {
145        // With very few values, use max + epsilon as a permissive threshold.
146        return values.iter().copied().fold(f64::NEG_INFINITY, f64::max) + 1.0;
147    }
148
149    let mut sorted = values.to_vec();
150    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
151
152    let q1 = percentile_sorted(&sorted, 25.0);
153    let q3 = percentile_sorted(&sorted, 75.0);
154    let iqr = q3 - q1;
155
156    q3 + 1.5 * iqr
157}
158
159/// Compute a percentile from a sorted slice using linear interpolation.
160fn percentile_sorted(sorted: &[f64], pct: f64) -> f64 {
161    let n = sorted.len();
162    if n == 0 {
163        return 0.0;
164    }
165    if n == 1 {
166        return sorted[0];
167    }
168
169    let rank = pct / 100.0 * (n - 1) as f64;
170    let lo = rank.floor() as usize;
171    let hi = rank.ceil() as usize;
172    let frac = rank - lo as f64;
173
174    if lo >= n || hi >= n {
175        sorted[n - 1]
176    } else {
177        sorted[lo] * (1.0 - frac) + sorted[hi] * frac
178    }
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184    use crate::test_helpers::uniform_grid;
185
186    fn make_clean_data(n: usize, m: usize) -> (FdMatrix, Vec<f64>) {
187        let t = uniform_grid(m);
188        let mut data_vec = vec![0.0; n * m];
189        for i in 0..n {
190            let phase = 0.02 * i as f64;
191            for j in 0..m {
192                data_vec[i + j * n] = ((t[j] + phase) * 4.0).sin();
193            }
194        }
195        let data = FdMatrix::from_column_major(data_vec, n, m).unwrap();
196        (data, t)
197    }
198
199    #[test]
200    fn outlier_detection_no_outliers() {
201        let (data, t) = make_clean_data(6, 20);
202        let config = ElasticOutlierConfig::default();
203        let result = elastic_outlier_detection(&data, &t, &config).unwrap();
204
205        assert_eq!(result.distances.len(), 6);
206        assert_eq!(result.amplitude_distances.len(), 6);
207        assert_eq!(result.phase_distances.len(), 6);
208        assert!(result.threshold > 0.0);
209
210        // With clean homogeneous data, expect no (or very few) outliers.
211        assert!(
212            result.outlier_indices.len() <= 1,
213            "clean data should have at most 1 outlier, got {}",
214            result.outlier_indices.len()
215        );
216    }
217
218    #[test]
219    fn outlier_detection_finds_extreme() {
220        let m = 20;
221        let t = uniform_grid(m);
222        let n = 8;
223        let mut data_vec = vec![0.0; n * m];
224
225        // 7 clean curves.
226        for i in 0..7 {
227            let phase = 0.02 * i as f64;
228            for j in 0..m {
229                data_vec[i + j * n] = ((t[j] + phase) * 4.0).sin();
230            }
231        }
232        // 1 extreme outlier (curve 7).
233        for j in 0..m {
234            data_vec[7 + j * n] = (t[j] * 20.0).cos() * 10.0;
235        }
236        let data = FdMatrix::from_column_major(data_vec, n, m).unwrap();
237
238        let config = ElasticOutlierConfig::default();
239        let result = elastic_outlier_detection(&data, &t, &config).unwrap();
240
241        // The outlier (index 7) should be detected.
242        assert!(
243            result.outlier_indices.contains(&7),
244            "should detect curve 7 as outlier, detected: {:?}",
245            result.outlier_indices
246        );
247
248        // The outlier's distance should exceed the threshold.
249        assert!(
250            result.distances[7] > result.threshold,
251            "outlier distance ({}) should exceed threshold ({})",
252            result.distances[7],
253            result.threshold
254        );
255    }
256
257    #[test]
258    fn outlier_detection_config_default() {
259        let cfg = ElasticOutlierConfig::default();
260        assert!((cfg.lambda - 0.0).abs() < f64::EPSILON);
261        assert!((cfg.alpha - 0.05).abs() < f64::EPSILON);
262        assert!(cfg.use_median);
263    }
264}