use super::karcher::karcher_mean;
use super::pairwise::{amplitude_distance, elastic_distance, phase_distance_pair};
use super::robust_karcher::{karcher_median, RobustKarcherConfig};
use crate::error::FdarError;
use crate::matrix::FdMatrix;
#[derive(Debug, Clone, PartialEq)]
pub struct ElasticOutlierConfig {
pub lambda: f64,
pub alpha: f64,
pub use_median: bool,
}
impl Default for ElasticOutlierConfig {
fn default() -> Self {
Self {
lambda: 0.0,
alpha: 0.05,
use_median: true,
}
}
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct ElasticOutlierResult {
pub outlier_indices: Vec<usize>,
pub distances: Vec<f64>,
pub threshold: f64,
pub amplitude_distances: Vec<f64>,
pub phase_distances: Vec<f64>,
}
#[must_use = "expensive computation whose result should not be discarded"]
pub fn elastic_outlier_detection(
data: &FdMatrix,
argvals: &[f64],
config: &ElasticOutlierConfig,
) -> Result<ElasticOutlierResult, FdarError> {
let (n, m) = data.shape();
if argvals.len() != m {
return Err(FdarError::InvalidDimension {
parameter: "argvals",
expected: format!("{m}"),
actual: format!("{}", argvals.len()),
});
}
if n < 2 {
return Err(FdarError::InvalidDimension {
parameter: "data",
expected: "at least 2 rows".to_string(),
actual: format!("{n} rows"),
});
}
let reference = if config.use_median {
let median_config = RobustKarcherConfig {
max_iter: 15,
tol: 1e-3,
lambda: config.lambda,
trim_fraction: 0.1,
};
let result = karcher_median(data, argvals, &median_config)?;
result.mean
} else {
let result = karcher_mean(data, argvals, 15, 1e-3, config.lambda);
result.mean
};
let distances: Vec<f64> = (0..n)
.map(|i| {
let fi = data.row(i);
elastic_distance(&reference, &fi, argvals, config.lambda)
})
.collect();
let amplitude_distances: Vec<f64> = (0..n)
.map(|i| {
let fi = data.row(i);
amplitude_distance(&reference, &fi, argvals, config.lambda)
})
.collect();
let phase_distances: Vec<f64> = (0..n)
.map(|i| {
let fi = data.row(i);
phase_distance_pair(&reference, &fi, argvals, config.lambda)
})
.collect();
let threshold = tukey_fence(&distances);
let outlier_indices: Vec<usize> = (0..n).filter(|&i| distances[i] > threshold).collect();
Ok(ElasticOutlierResult {
outlier_indices,
distances,
threshold,
amplitude_distances,
phase_distances,
})
}
fn tukey_fence(values: &[f64]) -> f64 {
let n = values.len();
if n < 4 {
return values.iter().copied().fold(f64::NEG_INFINITY, f64::max) + 1.0;
}
let mut sorted = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let q1 = percentile_sorted(&sorted, 25.0);
let q3 = percentile_sorted(&sorted, 75.0);
let iqr = q3 - q1;
q3 + 1.5 * iqr
}
fn percentile_sorted(sorted: &[f64], pct: f64) -> f64 {
let n = sorted.len();
if n == 0 {
return 0.0;
}
if n == 1 {
return sorted[0];
}
let rank = pct / 100.0 * (n - 1) as f64;
let lo = rank.floor() as usize;
let hi = rank.ceil() as usize;
let frac = rank - lo as f64;
if lo >= n || hi >= n {
sorted[n - 1]
} else {
sorted[lo] * (1.0 - frac) + sorted[hi] * frac
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::test_helpers::uniform_grid;
fn make_clean_data(n: usize, m: usize) -> (FdMatrix, Vec<f64>) {
let t = uniform_grid(m);
let mut data_vec = vec![0.0; n * m];
for i in 0..n {
let phase = 0.02 * i as f64;
for j in 0..m {
data_vec[i + j * n] = ((t[j] + phase) * 4.0).sin();
}
}
let data = FdMatrix::from_column_major(data_vec, n, m).unwrap();
(data, t)
}
#[test]
fn outlier_detection_no_outliers() {
let (data, t) = make_clean_data(6, 20);
let config = ElasticOutlierConfig::default();
let result = elastic_outlier_detection(&data, &t, &config).unwrap();
assert_eq!(result.distances.len(), 6);
assert_eq!(result.amplitude_distances.len(), 6);
assert_eq!(result.phase_distances.len(), 6);
assert!(result.threshold > 0.0);
assert!(
result.outlier_indices.len() <= 1,
"clean data should have at most 1 outlier, got {}",
result.outlier_indices.len()
);
}
#[test]
fn outlier_detection_finds_extreme() {
let m = 20;
let t = uniform_grid(m);
let n = 8;
let mut data_vec = vec![0.0; n * m];
for i in 0..7 {
let phase = 0.02 * i as f64;
for j in 0..m {
data_vec[i + j * n] = ((t[j] + phase) * 4.0).sin();
}
}
for j in 0..m {
data_vec[7 + j * n] = (t[j] * 20.0).cos() * 10.0;
}
let data = FdMatrix::from_column_major(data_vec, n, m).unwrap();
let config = ElasticOutlierConfig::default();
let result = elastic_outlier_detection(&data, &t, &config).unwrap();
assert!(
result.outlier_indices.contains(&7),
"should detect curve 7 as outlier, detected: {:?}",
result.outlier_indices
);
assert!(
result.distances[7] > result.threshold,
"outlier distance ({}) should exceed threshold ({})",
result.distances[7],
result.threshold
);
}
#[test]
fn outlier_detection_config_default() {
let cfg = ElasticOutlierConfig::default();
assert!((cfg.lambda - 0.0).abs() < f64::EPSILON);
assert!((cfg.alpha - 0.05).abs() < f64::EPSILON);
assert!(cfg.use_median);
}
}