use statistical::median;
pub const OUTLIER_THRESHOLD: f64 = 1.4826 * 10.0;
pub fn modified_zscores(xs: &[f64]) -> Vec<f64> {
assert!(!xs.is_empty());
let x_median = median(xs);
let deviations: Vec<f64> = xs.iter().map(|x| (x - x_median).abs()).collect();
let mad = median(&deviations);
let mad = if mad > 0.0 { mad } else { f64::EPSILON };
xs.iter().map(|&x| (x - x_median) / mad).collect()
}
#[cfg(test)]
pub fn num_outliers(xs: &[f64]) -> usize {
if xs.is_empty() {
return 0;
}
let scores = modified_zscores(xs);
scores
.iter()
.filter(|&&s| s.abs() > OUTLIER_THRESHOLD)
.count()
}
#[test]
fn test_detect_outliers() {
assert_eq!(0, num_outliers(&[]));
assert_eq!(0, num_outliers(&[50.0]));
assert_eq!(0, num_outliers(&[1000.0, 0.0]));
let xs = [-0.2, 0.0, 0.2];
assert_eq!(0, num_outliers(&xs));
let xs = [-0.2, 0.0, 0.2, 4.0];
assert_eq!(1, num_outliers(&xs));
let xs = [0.5, 0.30, 0.29, 0.31, 0.30];
assert_eq!(1, num_outliers(&xs));
let xs = [
2.33269488,
1.42195907,
-0.57527698,
-0.31293437,
2.2948158,
0.75813273,
-1.0712388,
-0.96394741,
-1.15897446,
1.10976285,
];
assert_eq!(0, num_outliers(&xs));
let xs = [
2.33269488,
1.42195907,
-0.57527698,
-0.31293437,
2.2948158,
0.75813273,
-1.0712388,
-0.96394741,
-1.15897446,
1.10976285,
20.0,
-500.0,
];
assert_eq!(2, num_outliers(&xs));
}
#[test]
fn test_detect_outliers_if_mad_becomes_0() {
let xs = [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0];
assert_eq!(1, num_outliers(&xs));
let xs = [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0];
assert_eq!(2, num_outliers(&xs));
}