use crate::error::{AnomalyError, AnomalyResult};
const LOG_EPS: f32 = 1e-10;
pub struct Ecod {
sorted_features: Vec<Vec<f32>>,
skewness: Vec<f32>,
n_train: usize,
n_features: usize,
}
impl Ecod {
#[must_use]
pub fn new() -> Self {
Self {
sorted_features: Vec::new(),
skewness: Vec::new(),
n_train: 0,
n_features: 0,
}
}
pub fn fit(&mut self, data: &[f32], n_samples: usize, n_features: usize) -> AnomalyResult<()> {
if n_samples == 0 {
return Err(AnomalyError::EmptyInput);
}
if n_features == 0 {
return Err(AnomalyError::InvalidFeatureCount { n: 0 });
}
if data.len() != n_samples * n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n_samples * n_features,
got: data.len(),
});
}
self.n_train = n_samples;
self.n_features = n_features;
self.sorted_features = Vec::with_capacity(n_features);
self.skewness = Vec::with_capacity(n_features);
for j in 0..n_features {
let mut col: Vec<f32> = (0..n_samples).map(|i| data[i * n_features + j]).collect();
col.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = n_samples as f32;
let mean = col.iter().sum::<f32>() / n;
let var = col.iter().map(|&v| (v - mean) * (v - mean)).sum::<f32>() / n;
let std_dev = var.sqrt();
let skew = if std_dev > 1e-8_f32 {
col.iter()
.map(|&v| {
let z = (v - mean) / std_dev;
z * z * z
})
.sum::<f32>()
/ n
} else {
0.0_f32
};
self.skewness.push(skew);
self.sorted_features.push(col);
}
Ok(())
}
fn ecdf(&self, feat: usize, v: f32) -> f32 {
let col = &self.sorted_features[feat];
let count = col.partition_point(|&x| x <= v);
count as f32 / self.n_train as f32
}
pub fn score(&self, x: &[f32]) -> AnomalyResult<f32> {
if self.n_features == 0 {
return Err(AnomalyError::NotFitted);
}
if x.len() != self.n_features {
return Err(AnomalyError::FeatureCountMismatch {
expected: self.n_features,
got: x.len(),
});
}
let mut acc = 0.0_f32;
for (j, &xi) in x.iter().enumerate().take(self.n_features) {
let p_left = self.ecdf(j, xi);
let p_right = 1.0_f32 - p_left;
let skew = self.skewness[j];
let w_left = 0.5_f32 + 0.5_f32 * skew.tanh();
let w_right = 1.0_f32 - w_left;
let score_left = -(p_left + LOG_EPS).ln();
let score_right = -(p_right + LOG_EPS).ln();
acc += w_left * score_left + w_right * score_right;
}
Ok(acc / self.n_features as f32)
}
pub fn score_batch(&self, x: &[f32], n: usize) -> AnomalyResult<Vec<f32>> {
if self.n_features == 0 {
return Err(AnomalyError::NotFitted);
}
if x.len() != n * self.n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n * self.n_features,
got: x.len(),
});
}
let mut scores = Vec::with_capacity(n);
for i in 0..n {
let sample = &x[i * self.n_features..(i + 1) * self.n_features];
scores.push(self.score(sample)?);
}
Ok(scores)
}
#[must_use]
pub fn skewness(&self) -> &[f32] {
&self.skewness
}
#[must_use]
pub fn n_features(&self) -> usize {
self.n_features
}
#[must_use]
pub fn n_train(&self) -> usize {
self.n_train
}
}
impl Default for Ecod {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_1d(n: usize) -> Vec<f32> {
(0..n).map(|i| i as f32).collect()
}
fn make_2d(n: usize) -> Vec<f32> {
(0..n).flat_map(|i| [i as f32, (i * 2) as f32]).collect()
}
#[test]
fn ecod_1d_fit_score_finite() {
let data = make_1d(20);
let mut det = Ecod::new();
det.fit(&data, 20, 1)
.expect("fit should succeed on 1-D data");
let s = det
.score(&[10.0_f32])
.expect("score should succeed after fit");
assert!(s.is_finite() && s >= 0.0, "s={s}");
}
#[test]
fn ecod_not_fitted_error() {
let det = Ecod::new();
let err = det.score(&[1.0_f32]).unwrap_err();
assert!(
matches!(err, AnomalyError::NotFitted),
"expected NotFitted, got {err:?}"
);
}
#[test]
fn ecod_outlier_higher_than_inlier() {
let data = make_1d(40);
let mut det = Ecod::new();
det.fit(&data, 40, 1)
.expect("fit should succeed on 1-D data");
let s_normal = det.score(&[20.0_f32]).expect("normal score should succeed");
let s_outlier = det
.score(&[1000.0_f32])
.expect("outlier score should succeed");
assert!(
s_outlier > s_normal,
"outlier={s_outlier} should > inlier={s_normal}"
);
}
#[test]
fn ecod_batch_correct_length() {
let data = make_2d(20);
let mut det = Ecod::new();
det.fit(&data, 20, 2)
.expect("fit should succeed on 2-D data");
let queries = make_2d(5);
let scores = det
.score_batch(&queries, 5)
.expect("batch score should succeed after fit");
assert_eq!(scores.len(), 5);
assert!(scores.iter().all(|s| s.is_finite()), "all scores finite");
}
#[test]
fn ecod_empty_input_error() {
let mut det = Ecod::new();
let err = det.fit(&[], 0, 3).unwrap_err();
assert!(matches!(err, AnomalyError::EmptyInput), "got {err:?}");
}
#[test]
fn ecod_fit_dimension_mismatch() {
let mut det = Ecod::new();
let err = det.fit(&[0.0_f32; 9], 5, 2).unwrap_err();
assert!(
matches!(err, AnomalyError::DimensionMismatch { .. }),
"got {err:?}"
);
}
#[test]
fn ecod_feature_count_mismatch_at_score() {
let data = make_2d(20);
let mut det = Ecod::new();
det.fit(&data, 20, 2)
.expect("fit should succeed on 2-D data");
let err = det.score(&[1.0_f32, 2.0, 3.0]).unwrap_err();
assert!(
matches!(
err,
AnomalyError::FeatureCountMismatch {
expected: 2,
got: 3
}
),
"got {err:?}"
);
}
#[test]
fn ecod_symmetric_skew_weights_balanced() {
let n = 21_usize;
let data: Vec<f32> = (0..n).map(|i| i as f32 - 10.0_f32).collect();
let mut det = Ecod::new();
det.fit(&data, n, 1)
.expect("fit should succeed on symmetric 1-D data");
let skew = det.skewness()[0];
let w_left = 0.5_f32 + 0.5_f32 * skew.tanh();
let w_right = 1.0_f32 - w_left;
assert!(
(w_left - 0.5_f32).abs() < 0.05_f32,
"w_left should be ≈0.5, got {w_left}"
);
assert!(
(w_right - 0.5_f32).abs() < 0.05_f32,
"w_right should be ≈0.5, got {w_right}"
);
}
#[test]
fn ecod_multi_feature_batch_finite() {
let data = make_2d(30);
let mut det = Ecod::new();
det.fit(&data, 30, 2)
.expect("fit should succeed on 2-D data");
#[rustfmt::skip]
let queries: Vec<f32> = vec![
5.0, 10.0, 29.0, 58.0, -1.0, -2.0, 500.0, 1000.0, 15.0, 30.0, ];
let scores = det
.score_batch(&queries, 5)
.expect("batch score should succeed after fit");
assert_eq!(scores.len(), 5);
for (idx, &s) in scores.iter().enumerate() {
assert!(s.is_finite(), "score[{idx}] is not finite: {s}");
assert!(s >= 0.0, "score[{idx}] should be non-negative, got {s}");
}
assert!(
scores[3] > scores[4],
"outlier score {} should exceed mid-range {}",
scores[3],
scores[4]
);
}
}