use crate::error::{AnomalyError, AnomalyResult};
use crate::handle::LcgRng;
#[derive(Debug, Clone)]
pub struct LodaConfig {
pub n_projectors: usize,
pub n_bins: usize,
pub seed: u64,
}
impl Default for LodaConfig {
fn default() -> Self {
Self {
n_projectors: 100,
n_bins: 10,
seed: 42,
}
}
}
struct ProjectorHistogram {
weights: Vec<f32>,
bin_min: f32,
bin_width: f32,
densities: Vec<f32>,
n_bins: usize,
}
impl ProjectorHistogram {
#[inline]
fn density(&self, x: &[f32]) -> f32 {
let z: f32 = self
.weights
.iter()
.zip(x.iter())
.map(|(w, xi)| w * xi)
.sum();
if self.bin_width >= 1e-12
&& (z < self.bin_min || z > self.bin_min + self.bin_width * self.n_bins as f32)
{
return 0.0;
}
let idx = bin_index(z, self.bin_min, self.bin_width, self.n_bins);
self.densities[idx]
}
}
fn random_sparse_projection(n_features: usize, n_nonzero: usize, rng: &mut LcgRng) -> Vec<f32> {
let mut weights = vec![0.0_f32; n_features];
let mut indices: Vec<usize> = (0..n_features).collect();
let scale = 1.0_f32 / (n_nonzero as f32).sqrt();
for k in 0..n_nonzero {
let j = k + rng.next_usize(n_features - k);
indices.swap(k, j);
let sign = if rng.next_u32() & 1 == 0 {
1.0_f32
} else {
-1.0_f32
};
weights[indices[k]] = sign * scale;
}
weights
}
#[inline]
fn bin_index(val: f32, bin_min: f32, bin_width: f32, n_bins: usize) -> usize {
if bin_width < 1e-12 {
return 0;
}
let idx = ((val - bin_min) / bin_width) as isize;
idx.max(0).min(n_bins as isize - 1) as usize
}
pub struct Loda {
config: LodaConfig,
projectors: Vec<ProjectorHistogram>,
n_features: usize,
n_samples: usize,
}
impl Loda {
#[must_use]
pub fn new(config: LodaConfig) -> Self {
Self {
config,
projectors: Vec::new(),
n_features: 0,
n_samples: 0,
}
}
pub fn fit(&mut self, data: &[f32], n_samples: usize, n_features: usize) -> AnomalyResult<()> {
if n_samples == 0 {
return Err(AnomalyError::EmptyInput);
}
if n_features == 0 {
return Err(AnomalyError::InvalidFeatureCount { n: 0 });
}
if data.len() != n_samples * n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n_samples * n_features,
got: data.len(),
});
}
if self.config.n_projectors == 0 {
return Err(AnomalyError::Internal {
msg: "n_projectors must be > 0".into(),
});
}
if self.config.n_bins == 0 {
return Err(AnomalyError::Internal {
msg: "n_bins must be > 0".into(),
});
}
let mut rng = LcgRng::new(self.config.seed);
let n_nonzero = ((n_features as f32).sqrt().round() as usize).max(1);
let n_bins = self.config.n_bins;
let mut projectors = Vec::with_capacity(self.config.n_projectors);
for _ in 0..self.config.n_projectors {
let weights = random_sparse_projection(n_features, n_nonzero, &mut rng);
let mut projections: Vec<f32> = (0..n_samples)
.map(|i| {
let row = &data[i * n_features..(i + 1) * n_features];
weights.iter().zip(row.iter()).map(|(w, x)| w * x).sum()
})
.collect();
let z_min = projections.iter().copied().fold(f32::INFINITY, f32::min);
let z_max = projections
.iter()
.copied()
.fold(f32::NEG_INFINITY, f32::max);
let bin_width = if (z_max - z_min).abs() < 1e-12 {
1.0_f32 } else {
(z_max - z_min) / n_bins as f32
};
let mut counts = vec![0_u64; n_bins];
for &z in &projections {
let idx = bin_index(z, z_min, bin_width, n_bins);
counts[idx] += 1;
}
let denom = n_samples as f32 * bin_width;
let densities: Vec<f32> = counts.iter().map(|&c| c as f32 / denom).collect();
projections.clear();
projections.shrink_to_fit();
projectors.push(ProjectorHistogram {
weights,
bin_min: z_min,
bin_width,
densities,
n_bins,
});
}
self.projectors = projectors;
self.n_features = n_features;
self.n_samples = n_samples;
Ok(())
}
pub fn score(&self, x: &[f32]) -> AnomalyResult<f32> {
if self.n_samples == 0 {
return Err(AnomalyError::NotFitted);
}
if x.len() != self.n_features {
return Err(AnomalyError::FeatureCountMismatch {
expected: self.n_features,
got: x.len(),
});
}
const EPSILON: f32 = 1e-10;
let t = self.projectors.len() as f32;
let sum: f32 = self
.projectors
.iter()
.map(|ph| -(ph.density(x) + EPSILON).ln())
.sum();
Ok(sum / t)
}
pub fn score_batch(&self, x: &[f32], n: usize) -> AnomalyResult<Vec<f32>> {
if self.n_samples == 0 {
return Err(AnomalyError::NotFitted);
}
if x.len() != n * self.n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n * self.n_features,
got: x.len(),
});
}
let mut scores = Vec::with_capacity(n);
for i in 0..n {
let sample = &x[i * self.n_features..(i + 1) * self.n_features];
scores.push(self.score(sample)?);
}
Ok(scores)
}
#[inline]
#[must_use]
pub fn n_features(&self) -> usize {
self.n_features
}
#[inline]
#[must_use]
pub fn n_samples(&self) -> usize {
self.n_samples
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_2d_data() -> (Vec<f32>, usize, usize) {
let mut data: Vec<f32> = Vec::new();
for i in 0..10_i32 {
for j in 0..10_i32 {
data.push(i as f32 * 0.1);
data.push(j as f32 * 0.1);
}
}
let n = 100;
(data, n, 2)
}
fn make_1d_data() -> (Vec<f32>, usize, usize) {
let mut data: Vec<f32> = Vec::new();
for i in 0..50 {
data.push(i as f32 * 0.02); }
(data, 50, 1)
}
#[test]
fn test_fit_score_basic_2d() {
let (data, n, d) = make_2d_data();
let mut det = Loda::new(LodaConfig::default());
det.fit(&data, n, d).expect("fit should succeed");
let s = det.score(&[0.5_f32, 0.5]).expect("score should succeed");
assert!(s.is_finite(), "score should be finite, got {s}");
}
#[test]
fn test_unfitted_returns_not_fitted() {
let det = Loda::new(LodaConfig::default());
match det.score(&[0.0_f32]) {
Err(AnomalyError::NotFitted) => {}
other => panic!("expected NotFitted, got {other:?}"),
}
}
#[test]
fn test_outlier_score_exceeds_inlier() {
let (data, n, d) = make_1d_data();
let cfg = LodaConfig {
n_projectors: 200,
n_bins: 20,
seed: 17,
};
let mut det = Loda::new(cfg);
det.fit(&data, n, d).expect("fit");
let inlier_score = det.score(&[0.5_f32]).expect("inlier score");
let outlier_score = det.score(&[10.0_f32]).expect("outlier score");
assert!(
outlier_score > inlier_score,
"outlier ({outlier_score}) should score higher than inlier ({inlier_score})"
);
}
#[test]
fn test_score_batch_length() {
let (data, n, d) = make_2d_data();
let mut det = Loda::new(LodaConfig::default());
det.fit(&data, n, d).expect("fit");
let queries: Vec<f32> = vec![0.1, 0.2, 0.3, 0.4, 0.5, 0.6];
let scores = det.score_batch(&queries, 3).expect("batch score");
assert_eq!(scores.len(), 3);
assert!(scores.iter().all(|s| s.is_finite()), "all scores finite");
}
#[test]
fn test_empty_input_error() {
let mut det = Loda::new(LodaConfig::default());
match det.fit(&[], 0, 2) {
Err(AnomalyError::EmptyInput) => {}
other => panic!("expected EmptyInput, got {other:?}"),
}
}
#[test]
fn test_feature_count_mismatch() {
let (data, n, d) = make_2d_data();
let mut det = Loda::new(LodaConfig::default());
det.fit(&data, n, d).expect("fit");
match det.score(&[0.1_f32, 0.2, 0.3]) {
Err(AnomalyError::FeatureCountMismatch {
expected: 2,
got: 3,
}) => {}
other => panic!("expected FeatureCountMismatch, got {other:?}"),
}
}
#[test]
fn test_minimal_config() {
let (data, n, d) = make_2d_data();
let cfg = LodaConfig {
n_projectors: 1,
n_bins: 5,
seed: 99,
};
let mut det = Loda::new(cfg);
det.fit(&data, n, d).expect("fit with minimal config");
let s = det.score(&[0.3_f32, 0.7]).expect("score");
assert!(s.is_finite(), "score={s}");
}
#[test]
fn test_deterministic_same_seed() {
let (data, n, d) = make_2d_data();
let cfg = LodaConfig {
n_projectors: 50,
n_bins: 8,
seed: 7,
};
let mut det_a = Loda::new(cfg.clone());
let mut det_b = Loda::new(cfg);
det_a.fit(&data, n, d).expect("fit a");
det_b.fit(&data, n, d).expect("fit b");
let s_a = det_a.score(&[0.4_f32, 0.6]).expect("score a");
let s_b = det_b.score(&[0.4_f32, 0.6]).expect("score b");
assert!((s_a - s_b).abs() < 1e-6, "scores differ: {s_a} vs {s_b}");
}
#[test]
fn test_different_seeds_give_different_scores() {
let n = 80_usize;
let d = 5_usize;
let mut rng_gen = LcgRng::new(777);
let data: Vec<f32> = (0..n * d).map(|_| rng_gen.next_f32()).collect();
let cfg_a = LodaConfig {
seed: 1,
n_projectors: 60,
n_bins: 10,
};
let cfg_b = LodaConfig {
seed: 9999,
n_projectors: 60,
n_bins: 10,
};
let mut det_a = Loda::new(cfg_a);
let mut det_b = Loda::new(cfg_b);
det_a.fit(&data, n, d).expect("fit a");
det_b.fit(&data, n, d).expect("fit b");
let queries: Vec<[f32; 5]> = vec![
[0.1, 0.9, 0.3, 0.7, 0.5],
[0.8, 0.2, 0.6, 0.1, 0.4],
[0.5, 0.5, 0.5, 0.5, 0.5],
];
let all_same = queries.iter().all(|q| {
let sa = det_a
.score(q.as_ref())
.expect("score_a should succeed for valid fitted detector");
let sb = det_b
.score(q.as_ref())
.expect("score_b should succeed for valid fitted detector");
(sa - sb).abs() < 1e-9
});
assert!(
!all_same,
"expected at least one score to differ between seed=1 and seed=9999 on 5-D data"
);
}
#[test]
fn test_zero_n_features_error() {
let mut det = Loda::new(LodaConfig::default());
match det.fit(&[], 0, 0) {
Err(AnomalyError::EmptyInput) | Err(AnomalyError::InvalidFeatureCount { n: 0 }) => {}
other => panic!("expected EmptyInput or InvalidFeatureCount, got {other:?}"),
}
match det.fit(&[1.0_f32], 1, 0) {
Err(AnomalyError::InvalidFeatureCount { n: 0 }) => {}
other => panic!("expected InvalidFeatureCount{{n:0}}, got {other:?}"),
}
}
#[test]
fn test_data_dimension_mismatch() {
let mut det = Loda::new(LodaConfig::default());
match det.fit(&[1.0_f32, 2.0, 3.0, 4.0, 5.0], 2, 3) {
Err(AnomalyError::DimensionMismatch {
expected: 6,
got: 5,
}) => {}
other => panic!("expected DimensionMismatch, got {other:?}"),
}
}
}