use crate::error::{AnomalyError, AnomalyResult};
use crate::handle::LcgRng;
const RADIUS_FLOOR: f32 = 1e-12;
#[derive(Debug, Clone)]
pub struct InneConfig {
pub n_estimators: usize,
pub sample_size: usize,
pub seed: u64,
}
impl Default for InneConfig {
fn default() -> Self {
Self {
n_estimators: 200,
sample_size: 16,
seed: 42,
}
}
}
#[derive(Debug, Clone)]
struct InneEstimator {
centroids: Vec<f32>,
radii: Vec<f32>,
ratio: Vec<f32>,
}
impl InneEstimator {
fn isolation_score(&self, x: &[f32], n_features: usize) -> f32 {
let mut best_radius = f32::INFINITY;
let mut covered_ratio: Option<f32> = None;
for (c, centroid) in self.centroids.chunks_exact(n_features).enumerate() {
let dist = euclidean(x, centroid);
if dist <= self.radii[c] && self.radii[c] < best_radius {
best_radius = self.radii[c];
covered_ratio = Some(self.ratio[c]);
}
}
match covered_ratio {
Some(ratio) => 1.0 - ratio,
None => 1.0,
}
}
}
#[inline]
fn euclidean(a: &[f32], b: &[f32]) -> f32 {
a.iter()
.zip(b.iter())
.map(|(x, y)| {
let d = x - y;
d * d
})
.sum::<f32>()
.sqrt()
}
#[derive(Debug, Clone)]
pub struct InneDetector {
config: InneConfig,
estimators: Vec<InneEstimator>,
n_features: usize,
fitted: bool,
}
impl InneDetector {
#[must_use]
pub fn new(config: InneConfig) -> Self {
Self {
config,
estimators: Vec::new(),
n_features: 0,
fitted: false,
}
}
pub fn fit(&mut self, data: &[f32], n_samples: usize, n_features: usize) -> AnomalyResult<()> {
if n_samples == 0 {
return Err(AnomalyError::EmptyInput);
}
if n_features == 0 {
return Err(AnomalyError::InvalidFeatureCount { n: 0 });
}
if data.len() != n_samples * n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n_samples * n_features,
got: data.len(),
});
}
if self.config.n_estimators == 0 {
return Err(AnomalyError::Internal {
msg: "n_estimators must be > 0".into(),
});
}
let psi = self.config.sample_size;
if psi < 2 {
return Err(AnomalyError::InsufficientSamples { need: 2, got: psi });
}
if psi > n_samples {
return Err(AnomalyError::InsufficientSamples {
need: psi,
got: n_samples,
});
}
let mut rng = LcgRng::new(self.config.seed);
let mut estimators = Vec::with_capacity(self.config.n_estimators);
let mut index_pool: Vec<usize> = (0..n_samples).collect();
for _ in 0..self.config.n_estimators {
for k in 0..psi {
let j = k + rng.next_usize(n_samples - k);
index_pool.swap(k, j);
}
let mut centroids = vec![0.0_f32; psi * n_features];
for (slot, &src) in index_pool[..psi].iter().enumerate() {
let dst = &mut centroids[slot * n_features..(slot + 1) * n_features];
dst.copy_from_slice(&data[src * n_features..(src + 1) * n_features]);
}
let radius_nn: Vec<(f32, usize)> = (0..psi)
.map(|c| {
let row_c = ¢roids[c * n_features..(c + 1) * n_features];
let mut best = f32::INFINITY;
let mut best_j = c;
for d in 0..psi {
if d == c {
continue;
}
let dist =
euclidean(row_c, ¢roids[d * n_features..(d + 1) * n_features]);
if dist < best {
best = dist;
best_j = d;
}
}
(best, best_j)
})
.collect();
let radii: Vec<f32> = radius_nn.iter().map(|&(r, _)| r).collect();
let ratio: Vec<f32> = radius_nn
.iter()
.map(|&(r, nn)| radii[nn] / r.max(RADIUS_FLOOR))
.collect();
estimators.push(InneEstimator {
centroids,
radii,
ratio,
});
}
self.estimators = estimators;
self.n_features = n_features;
self.fitted = true;
Ok(())
}
pub fn score(&self, x: &[f32]) -> AnomalyResult<f32> {
if !self.fitted {
return Err(AnomalyError::NotFitted);
}
if x.len() != self.n_features {
return Err(AnomalyError::FeatureCountMismatch {
expected: self.n_features,
got: x.len(),
});
}
let sum: f32 = self
.estimators
.iter()
.map(|est| est.isolation_score(x, self.n_features))
.sum();
Ok(sum / self.estimators.len() as f32)
}
pub fn score_batch(&self, x: &[f32], n: usize) -> AnomalyResult<Vec<f32>> {
if !self.fitted {
return Err(AnomalyError::NotFitted);
}
if x.len() != n * self.n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n * self.n_features,
got: x.len(),
});
}
let mut scores = Vec::with_capacity(n);
for i in 0..n {
let sample = &x[i * self.n_features..(i + 1) * self.n_features];
scores.push(self.score(sample)?);
}
Ok(scores)
}
#[inline]
#[must_use]
pub fn n_features(&self) -> usize {
self.n_features
}
#[inline]
#[must_use]
pub fn n_estimators(&self) -> usize {
self.estimators.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn dense_cluster(n: usize, seed: u64) -> Vec<f32> {
let mut rng = LcgRng::new(seed);
let mut data = Vec::with_capacity(n * 2);
for _ in 0..n {
data.push(rng.next_f32() * 0.2);
data.push(rng.next_f32() * 0.2);
}
data
}
#[test]
fn outlier_scores_higher_than_inliers() {
let n = 40_usize;
let data = dense_cluster(n, 1);
let cfg = InneConfig {
n_estimators: 100,
sample_size: 8,
seed: 7,
};
let mut det = InneDetector::new(cfg);
det.fit(&data, n, 2).expect("fit");
let outlier = det.score(&[50.0_f32, 50.0]).expect("outlier");
let inliers = [[0.05_f32, 0.1], [0.1, 0.05], [0.15, 0.15], [0.08, 0.12]];
for inlier in &inliers {
let s_in = det.score(inlier).expect("inlier");
assert!(
outlier > s_in,
"outlier {outlier} should exceed inlier {s_in}"
);
}
}
#[test]
fn scores_finite_and_bounded() {
let n = 30_usize;
let data = dense_cluster(n, 2);
let mut det = InneDetector::new(InneConfig {
n_estimators: 64,
sample_size: 8,
seed: 11,
});
det.fit(&data, n, 2).expect("fit");
let queries = [
[0.1_f32, 0.1],
[0.5, 0.2],
[10.0, -10.0],
[0.0, 0.0],
[-3.0, 4.0],
];
for q in &queries {
let s = det.score(q).expect("score");
assert!(s.is_finite(), "score must be finite, got {s}");
assert!(s <= 1.0 + 1e-5, "score {s} must be ≤ 1");
assert!(s >= -5.0, "score {s} unexpectedly small");
}
}
#[test]
fn larger_ensemble_reduces_variance() {
let n = 36_usize;
let data = dense_cluster(n, 3);
let query = [0.12_f32, 0.09];
let variance_for = |n_estimators: usize| -> f32 {
let mut scores = Vec::new();
for seed in 0..8_u64 {
let mut det = InneDetector::new(InneConfig {
n_estimators,
sample_size: 6,
seed,
});
det.fit(&data, n, 2).expect("fit");
scores.push(det.score(&query).expect("score"));
}
let mean = scores.iter().sum::<f32>() / scores.len() as f32;
scores.iter().map(|s| (s - mean).powi(2)).sum::<f32>() / scores.len() as f32
};
let var_small = variance_for(2);
let var_large = variance_for(64);
assert!(
var_large < var_small,
"variance should shrink with more estimators: small={var_small}, large={var_large}"
);
}
#[test]
fn invalid_sample_size_and_empty_errors() {
let n = 10_usize;
let data = dense_cluster(n, 4);
let mut det = InneDetector::new(InneConfig {
n_estimators: 10,
sample_size: 50,
seed: 1,
});
assert!(matches!(
det.fit(&data, n, 2),
Err(AnomalyError::InsufficientSamples { .. })
));
let mut det2 = InneDetector::new(InneConfig::default());
assert!(matches!(det2.fit(&[], 0, 2), Err(AnomalyError::EmptyInput)));
let mut det3 = InneDetector::new(InneConfig {
n_estimators: 10,
sample_size: 1,
seed: 1,
});
assert!(matches!(
det3.fit(&data, n, 2),
Err(AnomalyError::InsufficientSamples { need: 2, got: 1 })
));
let det4 = InneDetector::new(InneConfig::default());
assert!(matches!(
det4.score(&[0.0_f32, 0.0]),
Err(AnomalyError::NotFitted)
));
}
#[test]
fn deterministic_with_fixed_seed() {
let n = 30_usize;
let data = dense_cluster(n, 5);
let cfg = InneConfig {
n_estimators: 50,
sample_size: 8,
seed: 123,
};
let mut det_a = InneDetector::new(cfg.clone());
let mut det_b = InneDetector::new(cfg);
det_a.fit(&data, n, 2).expect("fit a");
det_b.fit(&data, n, 2).expect("fit b");
for q in &[[0.1_f32, 0.1], [5.0, 5.0], [0.3, 0.0]] {
let sa = det_a.score(q).expect("score a");
let sb = det_b.score(q).expect("score b");
assert!((sa - sb).abs() < 1e-6, "scores differ: {sa} vs {sb}");
}
}
#[test]
fn feature_mismatch_and_batch() {
let n = 20_usize;
let data = dense_cluster(n, 6);
let mut det = InneDetector::new(InneConfig {
n_estimators: 20,
sample_size: 5,
seed: 9,
});
det.fit(&data, n, 2).expect("fit");
assert!(matches!(
det.score(&[0.1_f32, 0.2, 0.3]),
Err(AnomalyError::FeatureCountMismatch {
expected: 2,
got: 3
})
));
let batch = det.score_batch(&data, n).expect("batch");
assert_eq!(batch.len(), n);
assert!(batch.iter().all(|s| s.is_finite()));
}
}