use crate::error::{AnomalyError, AnomalyResult};
#[derive(Debug, Clone)]
pub struct Cof {
data: Vec<f32>,
n_samples: usize,
n_features: usize,
k: usize,
knn_indices: Vec<usize>,
knn_dists: Vec<f32>,
sbn_costs: Vec<f32>,
fitted: bool,
}
impl Cof {
pub fn new(k: usize) -> Self {
Self {
data: Vec::new(),
n_samples: 0,
n_features: 0,
k,
knn_indices: Vec::new(),
knn_dists: Vec::new(),
sbn_costs: Vec::new(),
fitted: false,
}
}
pub fn fit(&mut self, data: &[f32], n_samples: usize, n_features: usize) -> AnomalyResult<()> {
if n_features == 0 {
return Err(AnomalyError::InvalidFeatureCount { n: 0 });
}
if self.k == 0 || self.k >= n_samples {
return Err(AnomalyError::InvalidK { k: self.k });
}
if data.len() != n_samples * n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n_samples * n_features,
got: data.len(),
});
}
self.data = data.to_vec();
self.n_samples = n_samples;
self.n_features = n_features;
let (knn_indices, knn_dists) = build_knn(data, n_samples, n_features, self.k);
self.knn_indices = knn_indices;
self.knn_dists = knn_dists;
self.sbn_costs = (0..n_samples)
.map(|i| {
let pt = &data[i * n_features..(i + 1) * n_features];
let nn = &self.knn_indices[i * self.k..(i + 1) * self.k];
sbn_cost_from_training(pt, nn, data, n_features, self.k)
})
.collect();
self.fitted = true;
Ok(())
}
pub fn score(&self, x: &[f32]) -> AnomalyResult<f32> {
if !self.fitted {
return Err(AnomalyError::NotFitted);
}
if x.len() != self.n_features {
return Err(AnomalyError::FeatureCountMismatch {
expected: self.n_features,
got: x.len(),
});
}
Ok(self.compute_cof_score(x))
}
pub fn score_batch(&self, x: &[f32], n_samples: usize) -> AnomalyResult<Vec<f32>> {
if !self.fitted {
return Err(AnomalyError::NotFitted);
}
if x.len() != n_samples * self.n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n_samples * self.n_features,
got: x.len(),
});
}
let mut out = Vec::with_capacity(n_samples);
for i in 0..n_samples {
let row = &x[i * self.n_features..(i + 1) * self.n_features];
out.push(self.compute_cof_score(row));
}
Ok(out)
}
fn compute_cof_score(&self, x: &[f32]) -> f32 {
let nn = knn_query(x, &self.data, self.n_samples, self.n_features, self.k);
let query_sbn = sbn_cost_from_training(x, &nn, &self.data, self.n_features, self.k);
let mean_neighbour_sbn: f32 =
nn.iter().map(|&idx| self.sbn_costs[idx]).sum::<f32>() / self.k as f32;
if mean_neighbour_sbn < f32::EPSILON {
return 1.0;
}
query_sbn / mean_neighbour_sbn
}
}
impl Default for Cof {
fn default() -> Self {
Self::new(5)
}
}
fn knn_query(x: &[f32], data: &[f32], n_samples: usize, n_features: usize, k: usize) -> Vec<usize> {
let mut dists: Vec<(usize, f32)> = (0..n_samples)
.map(|i| {
let row = &data[i * n_features..(i + 1) * n_features];
(i, euclidean_sq(x, row))
})
.collect();
dists.select_nth_unstable_by(k - 1, |a, b| {
a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)
});
dists[..k].iter().map(|(i, _)| *i).collect()
}
fn build_knn(
data: &[f32],
n_samples: usize,
n_features: usize,
k: usize,
) -> (Vec<usize>, Vec<f32>) {
let mut all_indices = Vec::with_capacity(n_samples * k);
let mut all_dists = Vec::with_capacity(n_samples * k);
for i in 0..n_samples {
let xi = &data[i * n_features..(i + 1) * n_features];
let mut dists: Vec<(usize, f32)> = (0..n_samples)
.filter(|&j| j != i)
.map(|j| {
let xj = &data[j * n_features..(j + 1) * n_features];
(j, euclidean_sq(xi, xj).sqrt())
})
.collect();
dists.select_nth_unstable_by(k - 1, |a, b| {
a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)
});
let mut top_k = dists[..k].to_vec();
top_k.sort_unstable_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
for (idx, d) in top_k {
all_indices.push(idx);
all_dists.push(d);
}
}
(all_indices, all_dists)
}
fn sbn_cost_from_training(
origin: &[f32],
nn_set: &[usize],
data: &[f32],
n_features: usize,
k: usize,
) -> f32 {
if k == 0 {
return 0.0;
}
let mut visited = vec![false; nn_set.len()];
let mut prev: Vec<f32> = origin.to_vec();
let mut total = 0.0_f32;
for step in 1..=k {
let mut best_dist = f32::INFINITY;
let mut best_local = 0usize;
for (local_idx, &global_idx) in nn_set.iter().enumerate() {
if visited[local_idx] {
continue;
}
let pt = &data[global_idx * n_features..(global_idx + 1) * n_features];
let d = euclidean_dist(&prev, pt);
if d < best_dist {
best_dist = d;
best_local = local_idx;
}
}
visited[best_local] = true;
let next_idx = nn_set[best_local];
let next_pt = &data[next_idx * n_features..(next_idx + 1) * n_features];
total += step as f32 * euclidean_dist(&prev, next_pt);
prev = next_pt.to_vec();
}
let denom = k as f32 * (k as f32 + 1.0) / 2.0;
total / denom
}
#[inline]
fn euclidean_sq(a: &[f32], b: &[f32]) -> f32 {
a.iter().zip(b.iter()).map(|(x, y)| (x - y).powi(2)).sum()
}
#[inline]
fn euclidean_dist(a: &[f32], b: &[f32]) -> f32 {
euclidean_sq(a, b).sqrt()
}
#[cfg(test)]
mod tests {
use super::*;
fn lcg_next(state: &mut u64) -> f32 {
*state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(*state >> 33) as f32 / u32::MAX as f32
}
fn lcg_normal(state: &mut u64) -> f32 {
let u1 = lcg_next(state).max(1e-12);
let u2 = lcg_next(state);
(-2.0 * u1.ln()).sqrt() * (2.0 * std::f32::consts::PI * u2).cos()
}
fn cluster_2d(n: usize, cx: f32, cy: f32, sigma: f32, seed: u64) -> Vec<f32> {
let mut state = seed;
(0..n)
.flat_map(|_| {
let x = lcg_normal(&mut state) * sigma + cx;
let y = lcg_normal(&mut state) * sigma + cy;
[x, y]
})
.collect()
}
#[test]
fn test_fit_and_score_basic() {
let data = cluster_2d(20, 0.0, 0.0, 1.0, 1);
let mut cof = Cof::new(3);
cof.fit(&data, 20, 2).expect("COF fit should succeed");
let s = cof
.score(&[0.0_f32, 0.0])
.expect("COF score should succeed");
assert!(s.is_finite() && s > 0.0, "score={s}");
}
#[test]
fn test_inlier_vs_outlier_score() {
let mut data = cluster_2d(30, 0.0, 0.0, 0.5, 2);
data.extend_from_slice(&[50.0_f32, 50.0]);
let mut cof = Cof::new(4);
cof.fit(&data, 31, 2).expect("COF fit should succeed");
let inlier_score = cof
.score(&[0.0_f32, 0.0])
.expect("COF score should succeed");
let outlier_score = cof
.score(&[50.0_f32, 50.0])
.expect("COF score should succeed");
assert!(
outlier_score > inlier_score,
"outlier score {outlier_score:.3} should exceed inlier score {inlier_score:.3}"
);
}
#[test]
fn test_cluster_center_low_cof() {
let mut data = cluster_2d(40, 0.0, 0.0, 0.5, 3);
data.extend_from_slice(&[20.0_f32, 20.0]);
let mut cof = Cof::new(5);
cof.fit(&data, 41, 2).expect("COF fit should succeed");
let center_score = cof
.score(&[0.0_f32, 0.0])
.expect("COF score should succeed");
let outlier_score = cof
.score(&[20.0_f32, 20.0])
.expect("COF score should succeed");
assert!(
center_score < outlier_score,
"center COF {center_score:.3} should be < outlier COF {outlier_score:.3}"
);
}
#[test]
fn test_inlier_cof_near_unity() {
let data = cluster_2d(60, 0.0, 0.0, 1.0, 4);
let mut cof = Cof::new(5);
cof.fit(&data, 60, 2).expect("COF fit should succeed");
let scores = cof
.score_batch(&data, 60)
.expect("COF batch score should succeed");
let within_range = scores.iter().filter(|&&s| (0.3..=3.0).contains(&s)).count();
assert!(
within_range >= 50,
"at least 50/60 inlier COFs should be in [0.3, 3.0], got {within_range}"
);
}
#[test]
fn test_score_batch_matches_score() {
let data = cluster_2d(20, 0.0, 0.0, 1.0, 5);
let mut cof = Cof::new(3);
cof.fit(&data, 20, 2).expect("COF fit should succeed");
let queries = [0.0_f32, 0.0, 1.0, 1.0, -1.0, -1.0];
let batch = cof
.score_batch(&queries, 3)
.expect("COF batch score should succeed");
for i in 0..3 {
let single = cof
.score(&queries[i * 2..(i + 1) * 2])
.expect("COF single score should succeed");
assert!(
(batch[i] - single).abs() < 1e-5,
"batch[{i}]={} single={}",
batch[i],
single
);
}
}
#[test]
fn test_not_fitted_error() {
let cof = Cof::new(3);
let result = cof.score(&[0.0_f32, 0.0]);
assert!(matches!(result, Err(AnomalyError::NotFitted)));
}
#[test]
fn test_feature_count_mismatch() {
let data = cluster_2d(20, 0.0, 0.0, 1.0, 6);
let mut cof = Cof::new(3);
cof.fit(&data, 20, 2).expect("COF fit should succeed");
let result = cof.score(&[0.0_f32, 0.0, 0.0]); assert!(matches!(
result,
Err(AnomalyError::FeatureCountMismatch { .. })
));
}
#[test]
fn test_invalid_k_zero() {
let data = cluster_2d(10, 0.0, 0.0, 1.0, 7);
let mut cof = Cof::new(0);
let result = cof.fit(&data, 10, 2);
assert!(matches!(result, Err(AnomalyError::InvalidK { .. })));
}
#[test]
fn test_invalid_k_too_large() {
let data = cluster_2d(10, 0.0, 0.0, 1.0, 8);
let mut cof = Cof::new(10); let result = cof.fit(&data, 10, 2);
assert!(matches!(result, Err(AnomalyError::InvalidK { .. })));
}
#[test]
fn test_sbn_path_monotone_first_two_edges() {
let data = vec![
1.0_f32, 0.0, 2.0, 0.0, 3.0, 0.0, 4.0, 0.0, 5.0, 0.0, 0.0,
0.0, ];
let n = 6;
let k = 4;
let mut cof = Cof::new(k);
cof.fit(&data, n, 2).expect("COF fit should succeed");
let nn = &cof.knn_indices[5 * k..6 * k];
let dists: Vec<f32> = nn
.iter()
.map(|&idx| euclidean_dist(&data[idx * 2..(idx + 1) * 2], &[0.0, 0.0]))
.collect();
for w in dists.windows(2) {
assert!(
w[0] <= w[1] + 1e-6,
"knn_dists should be sorted: {:?}",
&dists
);
}
}
#[test]
fn test_high_dimensional_fit() {
let mut state = 42u64;
let data: Vec<f32> = (0..30 * 10).map(|_| lcg_normal(&mut state)).collect();
let mut cof = Cof::new(4);
cof.fit(&data, 30, 10)
.expect("COF high-dim fit should succeed");
let s = cof
.score(&[0.0_f32; 10])
.expect("COF high-dim score should succeed");
assert!(s.is_finite(), "score={s}");
}
#[test]
fn test_dimension_mismatch_fit() {
let data = vec![0.0_f32; 10]; let mut cof = Cof::new(3);
let result = cof.fit(&data, 5, 3);
assert!(matches!(
result,
Err(AnomalyError::DimensionMismatch { .. })
));
}
#[test]
fn test_two_clusters_separation() {
let mut data = cluster_2d(20, -5.0, 0.0, 0.3, 9);
let cluster_b = cluster_2d(20, 5.0, 0.0, 0.3, 10);
data.extend_from_slice(&cluster_b);
data.extend_from_slice(&[0.0_f32, 50.0]);
let n = 41;
let mut cof = Cof::new(5);
cof.fit(&data, n, 2).expect("COF fit should succeed");
let outlier_score = cof
.score(&[0.0_f32, 50.0])
.expect("COF outlier score should succeed");
let inlier_scores: Vec<f32> = (0..40)
.map(|i| {
cof.score(&data[i * 2..(i + 1) * 2])
.expect("COF score in iterator should succeed")
})
.collect();
let max_inlier = inlier_scores
.iter()
.cloned()
.fold(f32::NEG_INFINITY, f32::max);
assert!(
outlier_score > max_inlier,
"outlier COF {outlier_score:.3} should exceed max inlier COF {max_inlier:.3}"
);
}
#[test]
fn test_default_k() {
let cof = Cof::default();
assert_eq!(cof.k, 5);
}
#[test]
fn test_determinism() {
let data = cluster_2d(25, 0.0, 0.0, 1.0, 13);
let mut cof1 = Cof::new(4);
let mut cof2 = Cof::new(4);
cof1.fit(&data, 25, 2).expect("COF fit1 should succeed");
cof2.fit(&data, 25, 2).expect("COF fit2 should succeed");
let s1 = cof1
.score(&[1.0_f32, 1.0])
.expect("COF score1 should be deterministic");
let s2 = cof2
.score(&[1.0_f32, 1.0])
.expect("COF score2 should be deterministic");
assert!((s1 - s2).abs() < 1e-6, "scores must be deterministic");
}
#[test]
fn test_sbn_cost_collinear() {
let data = vec![
0.0_f32, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 4.0, 0.0, 5.0, 0.0, 6.0, 0.0, 100.0,
0.0, ];
let n = 8;
let mut cof = Cof::new(4);
cof.fit(&data, n, 2).expect("COF fit should succeed");
let origin_score = cof
.score(&[0.0_f32, 0.0])
.expect("COF origin score should succeed");
let outlier_score = cof
.score(&[100.0_f32, 0.0])
.expect("COF outlier score should succeed");
assert!(
outlier_score > origin_score,
"outlier COF {outlier_score:.3} should exceed origin COF {origin_score:.3}"
);
}
#[test]
fn test_sbn_weights_increase_with_position() {
let data = vec![1.0_f32, 0.0, 2.0, 0.0];
let origin = [0.0_f32, 0.0];
let nn_set = [0usize, 1];
let cost = sbn_cost_from_training(&origin, &nn_set, &data, 2, 2);
assert!(
(cost - 1.0).abs() < 1e-5,
"expected SBN cost 1.0, got {cost}"
);
}
}