use crate::error::{AnomalyError, AnomalyResult};
#[derive(Debug, Clone)]
pub struct Abod {
data: Vec<f32>,
n_samples: usize,
n_features: usize,
fitted: bool,
}
impl Abod {
#[must_use]
pub fn new() -> Self {
Self {
data: Vec::new(),
n_samples: 0,
n_features: 0,
fitted: false,
}
}
pub fn fit(&mut self, data: &[f32], n_samples: usize, n_features: usize) -> AnomalyResult<()> {
if n_samples < 2 {
return Err(AnomalyError::InsufficientSamples {
need: 2,
got: n_samples,
});
}
if n_features == 0 {
return Err(AnomalyError::InvalidFeatureCount { n: 0 });
}
if data.len() != n_samples * n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n_samples * n_features,
got: data.len(),
});
}
self.data = data.to_vec();
self.n_samples = n_samples;
self.n_features = n_features;
self.fitted = true;
Ok(())
}
pub fn score(&self, x: &[f32]) -> AnomalyResult<f32> {
if !self.fitted {
return Err(AnomalyError::NotFitted);
}
if x.len() != self.n_features {
return Err(AnomalyError::FeatureCountMismatch {
expected: self.n_features,
got: x.len(),
});
}
let abof = compute_abof(x, &self.data, self.n_samples, self.n_features)?;
Ok(1.0 / (abof + 1e-10))
}
pub fn score_batch(&self, x: &[f32], n_samples: usize) -> AnomalyResult<Vec<f32>> {
if !self.fitted {
return Err(AnomalyError::NotFitted);
}
if x.len() != n_samples * self.n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n_samples * self.n_features,
got: x.len(),
});
}
let mut out = Vec::with_capacity(n_samples);
for i in 0..n_samples {
let row = &x[i * self.n_features..(i + 1) * self.n_features];
out.push(self.score(row)?);
}
Ok(out)
}
}
impl Default for Abod {
fn default() -> Self {
Self::new()
}
}
fn compute_abof(
p: &[f32],
data: &[f32],
n_samples: usize,
n_features: usize,
) -> AnomalyResult<f32> {
let d = n_features;
let mut pa_vecs: Vec<Vec<f32>> = Vec::with_capacity(n_samples);
let mut pa_sq: Vec<f32> = Vec::with_capacity(n_samples);
let mut valid: Vec<bool> = Vec::with_capacity(n_samples);
for i in 0..n_samples {
let a = &data[i * d..(i + 1) * d];
let pa: Vec<f32> = p.iter().zip(a.iter()).map(|(pi, ai)| pi - ai).collect();
let sq: f32 = pa.iter().map(|v| v * v).sum();
let is_valid = sq > 1e-20; pa_vecs.push(pa);
pa_sq.push(sq);
valid.push(is_valid);
}
let mut sum_f = 0.0_f64;
let mut sum_f2 = 0.0_f64;
let mut count = 0u64;
for i in 0..n_samples {
if !valid[i] {
continue;
}
for j in (i + 1)..n_samples {
if !valid[j] {
continue;
}
let dot: f32 = pa_vecs[i]
.iter()
.zip(pa_vecs[j].iter())
.map(|(a, b)| a * b)
.sum();
let denom = pa_sq[i] * pa_sq[j];
let f = (dot as f64) / (denom as f64);
sum_f += f;
sum_f2 += f * f;
count += 1;
}
}
if count == 0 {
return Err(AnomalyError::InsufficientSamples {
need: 2,
got: n_samples,
});
}
let n = count as f64;
let mean_f = sum_f / n;
let mean_f2 = sum_f2 / n;
let variance = (mean_f2 - mean_f * mean_f).max(0.0);
Ok(variance as f32)
}
#[cfg(test)]
mod tests {
use super::*;
fn cluster_2d(n: usize, cx: f32, cy: f32, r: f32, seed: u64) -> Vec<f32> {
let mut state = seed;
let mut data = Vec::with_capacity(n * 2);
for _ in 0..n {
state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
let x = ((state >> 33) as f32) / (u32::MAX as f32) * 2.0 * r - r + cx;
state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
let y = ((state >> 33) as f32) / (u32::MAX as f32) * 2.0 * r - r + cy;
data.push(x);
data.push(y);
}
data
}
#[test]
fn test_score_is_finite() {
let data: Vec<f32> = vec![0.0, 0.0, 1.0, 0.0, 0.0, 1.0];
let mut abod = Abod::new();
abod.fit(&data, 3, 2)
.expect("fit should succeed with 3 samples, 2 features");
let s = abod
.score(&[0.5, 0.5])
.expect("score should succeed for interior point");
assert!(s.is_finite(), "score={s}");
assert!(s > 0.0, "score must be positive, got {s}");
}
#[test]
fn test_outlier_scores_higher_than_inlier() {
let mut data = cluster_2d(10, 0.0, 0.0, 0.5, 42);
data.extend_from_slice(&[20.0_f32, 20.0]);
let n = 11;
let mut abod = Abod::new();
abod.fit(&data, n, 2).expect("fit should succeed");
let s_inlier = abod
.score(&[0.1_f32, 0.1])
.expect("inlier score should succeed");
let s_outlier = abod
.score(&[20.0_f32, 20.0])
.expect("outlier score should succeed");
assert!(
s_outlier > s_inlier,
"outlier({s_outlier}) should score higher than inlier({s_inlier})"
);
}
#[test]
fn test_cluster_center_has_lowest_score() {
let data = cluster_2d(20, 0.0, 0.0, 1.0, 7);
let mut abod = Abod::new();
abod.fit(&data, 20, 2)
.expect("fit should succeed with 20 samples");
let s_center = abod
.score(&[0.0_f32, 0.0])
.expect("center score should succeed");
let s_edge = abod
.score(&[3.0_f32, 3.0])
.expect("edge score should succeed");
assert!(
s_center < s_edge,
"center({s_center}) should score lower than far edge({s_edge})"
);
}
#[test]
fn test_two_inliers_one_outlier() {
let data: Vec<f32> = vec![0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0];
let mut abod = Abod::new();
abod.fit(&data, 4, 2)
.expect("fit should succeed with 4 samples");
let s_inlier = abod
.score(&[0.5_f32, 0.5])
.expect("inlier score should succeed"); let s_outlier = abod
.score(&[10.0_f32, 10.0])
.expect("outlier score should succeed"); assert!(
s_outlier > s_inlier,
"outlier({s_outlier}) > inlier({s_inlier})"
);
}
#[test]
fn test_score_batch_consistent() {
let data = cluster_2d(15, 0.0, 0.0, 1.0, 13);
let mut abod = Abod::new();
abod.fit(&data, 15, 2)
.expect("fit should succeed with 15 samples");
let queries: Vec<f32> = vec![0.0, 0.0, 5.0, 5.0, -5.0, -5.0];
let batch_scores = abod
.score_batch(&queries, 3)
.expect("batch score should succeed");
for i in 0..3 {
let single = abod
.score(&queries[i * 2..(i + 1) * 2])
.expect("single score should succeed");
assert!(
(batch_scores[i] - single).abs() < 1e-6,
"batch[{i}]={} vs single={}",
batch_scores[i],
single
);
}
}
#[test]
fn test_fit_requires_at_least_2_samples() {
let data: Vec<f32> = vec![1.0, 2.0];
let mut abod = Abod::new();
let result = abod.fit(&data, 1, 2);
assert!(matches!(
result,
Err(AnomalyError::InsufficientSamples { .. })
));
}
#[test]
fn test_score_before_fit_error() {
let abod = Abod::new();
let result = abod.score(&[0.0_f32, 0.0]);
assert!(matches!(result, Err(AnomalyError::NotFitted)));
}
#[test]
fn test_empty_features_error() {
let mut abod = Abod::new();
let result = abod.fit(&[], 3, 0);
assert!(matches!(
result,
Err(AnomalyError::InvalidFeatureCount { .. })
));
}
#[test]
fn test_dimension_mismatch_score() {
let data: Vec<f32> = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0];
let mut abod = Abod::new();
abod.fit(&data, 3, 2)
.expect("fit should succeed with 3 samples and 2 features");
let result = abod.score(&[0.0_f32, 0.0, 0.0]); assert!(matches!(
result,
Err(AnomalyError::FeatureCountMismatch { .. })
));
}
#[test]
fn test_score_deterministic() {
let data = cluster_2d(10, 0.0, 0.0, 1.0, 99);
let mut abod = Abod::new();
abod.fit(&data, 10, 2)
.expect("fit should succeed with 10 samples");
let s1 = abod
.score(&[2.0_f32, 0.0])
.expect("first score should be deterministic");
let s2 = abod
.score(&[2.0_f32, 0.0])
.expect("second score should be deterministic");
assert_eq!(s1, s2, "score should be deterministic");
}
#[test]
fn test_collinear_training_data_handled() {
let data: Vec<f32> = (0..10).flat_map(|i| vec![i as f32, 0.0_f32]).collect();
let mut abod = Abod::new();
abod.fit(&data, 10, 2)
.expect("fit should succeed for collinear data");
let s = abod
.score(&[5.0_f32, 0.0])
.expect("score on collinear data should not crash");
assert!(s.is_finite(), "collinear data score={s} should be finite");
}
#[test]
fn test_high_dimensional_abod() {
let d = 10_usize;
let n = 20_usize;
let mut state = 55u64;
let data: Vec<f32> = (0..n * d)
.map(|_| {
state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
((state >> 33) as f32) / (u32::MAX as f32)
})
.collect();
let mut abod = Abod::new();
abod.fit(&data, n, d)
.expect("fit should succeed for high-dimensional data");
let query: Vec<f32> = vec![0.5; d];
let s = abod
.score(&query)
.expect("score should succeed for high-dimensional query");
assert!(s.is_finite() && s > 0.0, "hd score={s}");
}
#[test]
fn test_inlier_lower_than_outlier_2d_cluster() {
let mut data = cluster_2d(20, 0.5, 0.5, 0.5, 111);
data.extend_from_slice(&[5.0_f32, 5.0]);
let n = 21;
let mut abod = Abod::new();
abod.fit(&data, n, 2)
.expect("fit should succeed with 21 samples");
let max_inlier_score: f32 = (0..20)
.map(|i| {
abod.score(&data[i * 2..(i + 1) * 2])
.expect("inlier score should succeed")
})
.fold(f32::NEG_INFINITY, f32::max);
let outlier_score = abod
.score(&[5.0_f32, 5.0])
.expect("outlier score should succeed");
assert!(
outlier_score > max_inlier_score,
"outlier({outlier_score}) must beat max inlier({max_inlier_score})"
);
}
#[test]
fn test_default_impl() {
let abod = Abod::default();
assert!(!abod.fitted, "fresh ABOD should not be fitted");
}
#[test]
fn test_data_dimension_mismatch_fit() {
let mut abod = Abod::new();
let result = abod.fit(&[1.0, 2.0, 3.0, 4.0, 5.0], 3, 2);
assert!(matches!(
result,
Err(AnomalyError::DimensionMismatch { .. })
));
}
#[test]
fn test_score_batch_dimension_mismatch() {
let data: Vec<f32> = vec![0.0, 0.0, 1.0, 0.0, 0.0, 1.0];
let mut abod = Abod::new();
abod.fit(&data, 3, 2)
.expect("fit should succeed before testing batch mismatch");
let result = abod.score_batch(&[0.0_f32, 0.0, 1.0], 2);
assert!(matches!(
result,
Err(AnomalyError::DimensionMismatch { .. })
));
}
}