use crate::error::{AnomalyError, AnomalyResult};
#[derive(Debug, Clone, Copy)]
pub struct SodConfig {
pub k: usize,
pub alpha: f32,
}
impl Default for SodConfig {
fn default() -> Self {
Self { k: 10, alpha: 0.8 }
}
}
#[derive(Debug, Clone)]
pub struct Sod {
config: SodConfig,
data: Vec<f32>,
n_samples: usize,
n_features: usize,
global_var: Vec<f32>,
fitted: bool,
}
impl Sod {
pub fn new(config: SodConfig) -> AnomalyResult<Self> {
if config.k == 0 {
return Err(AnomalyError::InvalidK { k: 0 });
}
Ok(Self {
config,
data: Vec::new(),
n_samples: 0,
n_features: 0,
global_var: Vec::new(),
fitted: false,
})
}
pub fn fit(&mut self, data: &[f32], n_samples: usize, n_features: usize) -> AnomalyResult<()> {
if n_features == 0 {
return Err(AnomalyError::InvalidFeatureCount { n: 0 });
}
if n_samples < self.config.k + 1 {
return Err(AnomalyError::InsufficientSamples {
need: self.config.k + 1,
got: n_samples,
});
}
if data.len() != n_samples * n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n_samples * n_features,
got: data.len(),
});
}
let mut global_mean = vec![0.0_f64; n_features];
for i in 0..n_samples {
for j in 0..n_features {
global_mean[j] += data[i * n_features + j] as f64;
}
}
for m in &mut global_mean {
*m /= n_samples as f64;
}
let mut global_var = vec![0.0_f32; n_features];
for i in 0..n_samples {
for j in 0..n_features {
let d = data[i * n_features + j] as f64 - global_mean[j];
global_var[j] += (d * d) as f32;
}
}
for v in &mut global_var {
*v /= n_samples as f32;
}
self.data = data.to_vec();
self.n_samples = n_samples;
self.n_features = n_features;
self.global_var = global_var;
self.fitted = true;
Ok(())
}
pub fn score(&self, p: &[f32]) -> AnomalyResult<f32> {
if !self.fitted {
return Err(AnomalyError::NotFitted);
}
if p.len() != self.n_features {
return Err(AnomalyError::DimensionMismatch {
expected: self.n_features,
got: p.len(),
});
}
let k = self.config.k;
let d = self.n_features;
let mut dists: Vec<(f32, usize)> = (0..self.n_samples)
.map(|i| {
let row = &self.data[i * d..(i + 1) * d];
let sq: f32 = p
.iter()
.zip(row.iter())
.map(|(&a, &b)| {
let diff = a - b;
diff * diff
})
.sum();
(sq, i)
})
.collect();
dists.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
let knn: Vec<usize> = dists.iter().take(k).map(|&(_, i)| i).collect();
let mut snn_mean = vec![0.0_f64; d];
for &idx in &knn {
for (j, m) in snn_mean.iter_mut().enumerate() {
*m += self.data[idx * d + j] as f64;
}
}
for m in &mut snn_mean {
*m /= k as f64;
}
let mut snn_var = vec![0.0_f32; d];
for &idx in &knn {
for (j, v) in snn_var.iter_mut().enumerate() {
let diff = self.data[idx * d + j] as f64 - snn_mean[j];
*v += (diff * diff) as f32;
}
}
for v in &mut snn_var {
*v /= k as f32;
}
let alpha = self.config.alpha;
let subspace: Vec<usize> = (0..d)
.filter(|&j| {
let gv = self.global_var[j];
if gv < 1e-12 {
false
} else {
snn_var[j] < alpha * gv
}
})
.collect();
if subspace.is_empty() {
let total_var: f32 = self.global_var.iter().sum();
let dist_sq: f32 = (0..d)
.map(|j| {
let diff = p[j] as f64 - snn_mean[j];
(diff * diff) as f32
})
.sum();
let normaliser = total_var.sqrt().max(1e-10);
return Ok((dist_sq.sqrt() / normaliser).max(0.0));
}
let mut dist_sq = 0.0_f32;
let mut var_sum = 0.0_f32;
for &j in &subspace {
let diff = p[j] as f64 - snn_mean[j];
dist_sq += (diff * diff) as f32;
var_sum += snn_var[j];
}
let normaliser = var_sum.sqrt().max(1e-10);
Ok((dist_sq.sqrt() / normaliser).max(0.0))
}
pub fn score_batch(&self, queries: &[f32], n_queries: usize) -> AnomalyResult<Vec<f32>> {
if !self.fitted {
return Err(AnomalyError::NotFitted);
}
if queries.len() != n_queries * self.n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n_queries * self.n_features,
got: queries.len(),
});
}
(0..n_queries)
.map(|i| {
let row = &queries[i * self.n_features..(i + 1) * self.n_features];
self.score(row)
})
.collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_data() -> (Vec<f32>, usize, usize) {
let mut data = Vec::new();
for i in 0..15_usize {
data.push((i % 5) as f32 * 0.1);
data.push((i / 5) as f32 * 0.1);
}
data.push(100.0_f32);
data.push(100.0_f32);
(data, 16, 2)
}
#[test]
fn new_k_zero_error() {
assert!(Sod::new(SodConfig { k: 0, alpha: 0.8 }).is_err());
}
#[test]
fn fit_insufficient_samples() {
let mut det = Sod::new(SodConfig { k: 10, alpha: 0.8 })
.expect("Sod::new with valid config should succeed");
let data = vec![0.0_f32; 5 * 2]; assert!(det.fit(&data, 5, 2).is_err());
}
#[test]
fn fit_dim_mismatch() {
let mut det =
Sod::new(SodConfig::default()).expect("Sod::new with default config should succeed");
let data = vec![0.0_f32; 11]; assert!(det.fit(&data, 5, 2).is_err());
}
#[test]
fn score_not_fitted_error() {
let det =
Sod::new(SodConfig::default()).expect("Sod::new with default config should succeed");
assert!(det.score(&[0.0, 0.0]).is_err());
}
#[test]
fn score_dim_mismatch_error() {
let (data, n, d) = make_data();
let mut det = Sod::new(SodConfig { k: 5, alpha: 0.8 })
.expect("Sod::new with valid config should succeed");
det.fit(&data, n, d).expect("SOD fit should succeed");
assert!(det.score(&[0.0_f32]).is_err()); }
#[test]
fn outlier_scores_higher() {
let (data, n, d) = make_data();
let mut det = Sod::new(SodConfig { k: 5, alpha: 0.8 })
.expect("Sod::new with valid config should succeed");
det.fit(&data, n, d).expect("SOD fit should succeed");
let s_in = det
.score(&[0.2_f32, 0.1])
.expect("inlier score should succeed");
let s_out = det
.score(&[100.0_f32, 100.0])
.expect("outlier score should succeed");
assert!(s_out > s_in, "outlier={s_out} should > inlier={s_in}");
}
#[test]
fn score_non_negative_finite() {
let (data, n, d) = make_data();
let mut det = Sod::new(SodConfig { k: 4, alpha: 0.7 })
.expect("Sod::new with valid config should succeed");
det.fit(&data, n, d).expect("SOD fit should succeed");
for i in 0..n {
let p = &data[i * d..(i + 1) * d];
let s = det
.score(p)
.expect("score should succeed for training point");
assert!(s.is_finite() && s >= 0.0, "s={s} for point {i}");
}
}
#[test]
fn batch_consistent_with_individual() {
let (data, n, d) = make_data();
let mut det = Sod::new(SodConfig { k: 3, alpha: 0.8 })
.expect("Sod::new with valid config should succeed");
det.fit(&data, n, d).expect("SOD fit should succeed");
let batch = det
.score_batch(&data, n)
.expect("batch score should succeed after fit");
for (i, &bs) in batch.iter().enumerate() {
let p = &data[i * d..(i + 1) * d];
let s = det
.score(p)
.expect("individual score should match batch score");
assert!((bs - s).abs() < 1e-5, "batch[{i}]={bs} vs {s}");
}
}
#[test]
fn works_high_dimensional() {
let n = 25_usize;
let d = 20_usize;
let mut data: Vec<f32> = (0..n * d).map(|i| (i % 7) as f32 * 0.1).collect();
for v in &mut data[(n - 1) * d..] {
*v = 50.0;
}
let mut det = Sod::new(SodConfig { k: 5, alpha: 0.9 })
.expect("Sod::new with valid config should succeed");
det.fit(&data, n, d).expect("SOD fit should succeed");
let s_normal = det
.score(&data[..d])
.expect("score on normal point should succeed");
let s_outlier = det
.score(&data[(n - 1) * d..])
.expect("score on outlier point should succeed");
assert!(
s_outlier >= s_normal,
"outlier={s_outlier} should >= inlier={s_normal}"
);
}
#[test]
fn alpha_zero_fallback() {
let (data, n, d) = make_data();
let mut det =
Sod::new(SodConfig { k: 4, alpha: 0.0 }).expect("Sod::new with alpha=0 should succeed");
det.fit(&data, n, d).expect("SOD fit should succeed");
let s = det
.score(&[0.2_f32, 0.1])
.expect("alpha=0 fallback score should succeed");
assert!(s.is_finite() && s >= 0.0, "s={s}");
}
}