use crate::error::{AnomalyError, AnomalyResult};
#[derive(Debug, Clone)]
pub struct AbodApprox {
pub k: usize,
data: Vec<f32>,
n_samples: usize,
n_features: usize,
fitted: bool,
}
impl AbodApprox {
pub fn new(k: usize) -> AnomalyResult<Self> {
if k == 0 {
return Err(AnomalyError::InvalidK { k });
}
Ok(Self {
k,
data: Vec::new(),
n_samples: 0,
n_features: 0,
fitted: false,
})
}
pub fn fit(&mut self, data: &[f32], n_samples: usize, n_features: usize) -> AnomalyResult<()> {
if n_features == 0 {
return Err(AnomalyError::InvalidFeatureCount { n: 0 });
}
if n_samples < self.k + 1 {
return Err(AnomalyError::InsufficientSamples {
need: self.k + 1,
got: n_samples,
});
}
if data.len() != n_samples * n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n_samples * n_features,
got: data.len(),
});
}
self.data = data.to_vec();
self.n_samples = n_samples;
self.n_features = n_features;
self.fitted = true;
Ok(())
}
pub fn score(&self, p: &[f32]) -> AnomalyResult<f32> {
if !self.fitted {
return Err(AnomalyError::NotFitted);
}
if p.len() != self.n_features {
return Err(AnomalyError::DimensionMismatch {
expected: self.n_features,
got: p.len(),
});
}
let mut dists: Vec<(f32, usize)> = (0..self.n_samples)
.map(|i| {
let row = &self.data[i * self.n_features..(i + 1) * self.n_features];
(euclidean_sq(p, row), i)
})
.collect();
dists.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
let knn_indices: Vec<usize> = dists.iter().take(self.k).map(|&(_, i)| i).collect();
let k = knn_indices.len();
if k < 2 {
return Ok(1.0); }
let mut sum_f = 0.0_f64;
let mut sum_f2 = 0.0_f64;
let mut count = 0_u64;
for i in 0..k {
for j in (i + 1)..k {
let a = &self.data
[knn_indices[i] * self.n_features..(knn_indices[i] + 1) * self.n_features];
let b = &self.data
[knn_indices[j] * self.n_features..(knn_indices[j] + 1) * self.n_features];
let pa: Vec<f64> = p
.iter()
.zip(a.iter())
.map(|(&pi, &ai)| (ai - pi) as f64)
.collect();
let pb: Vec<f64> = p
.iter()
.zip(b.iter())
.map(|(&pi, &bi)| (bi - pi) as f64)
.collect();
let dot: f64 = pa.iter().zip(pb.iter()).map(|(x, y)| x * y).sum();
let norm_pa_sq: f64 = pa.iter().map(|x| x * x).sum();
let norm_pb_sq: f64 = pb.iter().map(|x| x * x).sum();
let denom = norm_pa_sq * norm_pb_sq;
if denom < 1e-30 {
continue;
}
let f = dot / denom;
sum_f += f;
sum_f2 += f * f;
count += 1;
}
}
if count == 0 {
return Ok(1.0);
}
let n = count as f64;
let mean_f = sum_f / n;
let abof = (sum_f2 / n - mean_f * mean_f).max(0.0);
Ok((1.0 / (abof + 1e-10)) as f32)
}
pub fn score_batch(&self, queries: &[f32], n_queries: usize) -> AnomalyResult<Vec<f32>> {
if !self.fitted {
return Err(AnomalyError::NotFitted);
}
if queries.len() != n_queries * self.n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n_queries * self.n_features,
got: queries.len(),
});
}
(0..n_queries)
.map(|i| {
let row = &queries[i * self.n_features..(i + 1) * self.n_features];
self.score(row)
})
.collect()
}
}
#[inline]
fn euclidean_sq(a: &[f32], b: &[f32]) -> f32 {
a.iter()
.zip(b.iter())
.map(|(&x, &y)| {
let d = x - y;
d * d
})
.sum()
}
#[cfg(test)]
mod tests {
use super::*;
fn cluster_with_outlier() -> (Vec<f32>, usize) {
let mut data = Vec::new();
for i in 0..10_usize {
data.push(i as f32 * 0.1);
data.push(i as f32 * 0.1);
}
data.push(50.0_f32);
data.push(50.0_f32);
(data, 11)
}
#[test]
fn new_k_zero_error() {
assert!(AbodApprox::new(0).is_err());
}
#[test]
fn fit_too_few_samples_error() {
let mut det = AbodApprox::new(5).expect("k=5 is a valid neighbour count");
let data = vec![0.0_f32; 4 * 2]; assert!(det.fit(&data, 4, 2).is_err());
}
#[test]
fn fit_dim_mismatch_error() {
let mut det = AbodApprox::new(2).expect("k=2 is a valid neighbour count");
let data = vec![0.0_f32; 10]; assert!(det.fit(&data, 3, 2).is_err()); }
#[test]
fn score_not_fitted_error() {
let det = AbodApprox::new(3).expect("AbodApprox with k=3 should be created");
assert!(det.score(&[0.0, 0.0]).is_err());
}
#[test]
fn score_dim_mismatch_error() {
let mut det = AbodApprox::new(3).expect("AbodApprox with k=3 should be created");
let data: Vec<f32> = (0..20).map(|i| i as f32).collect();
det.fit(&data, 10, 2)
.expect("fit with 10 samples and 2 features should succeed");
assert!(det.score(&[0.0, 0.0, 0.0]).is_err());
}
#[test]
fn outlier_scores_higher_than_inlier() {
let (data, n) = cluster_with_outlier();
let mut det = AbodApprox::new(5).expect("k=5 is a valid neighbour count");
det.fit(&data, n, 2)
.expect("fit on cluster_with_outlier data should succeed");
let s_inlier = det
.score(&[0.3_f32, 0.3])
.expect("scoring an inlier point should succeed");
let s_outlier = det
.score(&[50.0_f32, 50.0])
.expect("scoring an outlier point should succeed");
assert!(
s_outlier > s_inlier,
"outlier score {s_outlier} should > inlier {s_inlier}"
);
}
#[test]
fn score_finite_positive() {
let (data, n) = cluster_with_outlier();
let mut det = AbodApprox::new(3).expect("AbodApprox with k=3 should be created");
det.fit(&data, n, 2)
.expect("fit on cluster_with_outlier data should succeed");
let s = det
.score(&[0.1_f32, 0.1])
.expect("scoring a valid inlier point should succeed");
assert!(s.is_finite() && s > 0.0, "score={s}");
}
#[test]
fn batch_scores_match_individual() {
let (data, n) = cluster_with_outlier();
let mut det = AbodApprox::new(4).expect("k=4 is a valid neighbour count");
det.fit(&data, n, 2)
.expect("fit on cluster_with_outlier data should succeed");
let queries = vec![0.2_f32, 0.2, 0.5, 0.5, 50.0, 50.0];
let batch = det
.score_batch(&queries, 3)
.expect("batch scoring 3 valid queries should succeed");
for (i, &bs) in batch.iter().enumerate() {
let q = &queries[i * 2..(i + 1) * 2];
let s = det
.score(q)
.expect("individual scoring of batch query should succeed");
assert!((bs - s).abs() < 1e-5, "batch[{i}]={bs} vs individual={s}");
}
}
#[test]
fn works_high_dimensional() {
let n = 20_usize;
let d = 16_usize;
let data: Vec<f32> = (0..n * d).map(|i| (i % 10) as f32 * 0.1).collect();
let mut det = AbodApprox::new(5).expect("k=5 is a valid neighbour count");
det.fit(&data, n, d)
.expect("fit on high-dimensional data should succeed");
let q = vec![0.5_f32; d];
let s = det
.score(&q)
.expect("scoring a high-dimensional point should succeed");
assert!(s.is_finite() && s > 0.0);
}
#[test]
fn large_k_works() {
let (data, n) = cluster_with_outlier(); let mut det = AbodApprox::new(10).expect("k=10 is valid (n-1 for n=11)"); det.fit(&data, n, 2).expect("fit with k=n-1 should succeed");
let s = det
.score(&[0.0_f32, 0.0])
.expect("scoring with k=n-1 should succeed");
assert!(s.is_finite());
}
}