use crate::error::{AnomalyError, AnomalyResult};
use crate::handle::LcgRng;
const EULER_MASCHERONI: f32 = 0.577_215_6_f32;
#[must_use]
pub fn c_factor(n: usize) -> f32 {
match n {
0 | 1 => 0.0,
2 => 1.0,
_ => {
let nm1 = (n - 1) as f32;
2.0 * (nm1.ln() + EULER_MASCHERONI) - 2.0 * nm1 / n as f32
}
}
}
#[must_use]
pub fn isolation_score_from_path(avg_path_length: f32, n: usize) -> f32 {
let c = c_factor(n);
if c < 1e-8 {
return 0.5;
}
let exp = -avg_path_length / c;
(2.0_f32).powf(exp)
}
pub struct IsolationScorer {
projections: Vec<Vec<f32>>,
proj_training: Vec<Vec<f32>>,
n_estimators: usize,
n_features: usize,
n_train: usize,
}
impl IsolationScorer {
pub fn new(n_estimators: usize, rng: &mut LcgRng) -> Self {
let mut projections = Vec::with_capacity(n_estimators);
for _ in 0..n_estimators {
let _ = rng.next_u32(); projections.push(Vec::new());
}
Self {
projections,
proj_training: Vec::new(),
n_estimators,
n_features: 0,
n_train: 0,
}
}
pub fn fit(
&mut self,
data: &[f32],
n_samples: usize,
n_features: usize,
rng: &mut LcgRng,
) -> AnomalyResult<()> {
if n_samples == 0 {
return Err(AnomalyError::EmptyInput);
}
if n_features == 0 {
return Err(AnomalyError::InvalidFeatureCount { n: 0 });
}
if data.len() != n_samples * n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n_samples * n_features,
got: data.len(),
});
}
self.n_features = n_features;
self.n_train = n_samples;
self.projections = Vec::with_capacity(self.n_estimators);
for _ in 0..self.n_estimators {
let mut dir: Vec<f32> = (0..n_features).map(|_| rng.next_normal()).collect();
let norm = dir.iter().map(|v| v * v).sum::<f32>().sqrt();
if norm > 1e-8 {
for v in &mut dir {
*v /= norm;
}
} else {
dir = vec![0.0_f32; n_features];
dir[0] = 1.0;
}
self.projections.push(dir);
}
self.proj_training = Vec::with_capacity(self.n_estimators);
for t in 0..self.n_estimators {
let dir = &self.projections[t];
let mut projs: Vec<f32> = (0..n_samples)
.map(|i| {
let row = &data[i * n_features..(i + 1) * n_features];
row.iter().zip(dir.iter()).map(|(x, w)| x * w).sum()
})
.collect();
projs.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
self.proj_training.push(projs);
}
Ok(())
}
pub fn path_length(&self, x: &[f32]) -> AnomalyResult<f32> {
if self.n_features == 0 {
return Err(AnomalyError::NotFitted);
}
if x.len() != self.n_features {
return Err(AnomalyError::FeatureCountMismatch {
expected: self.n_features,
got: x.len(),
});
}
let mut total_path = 0.0_f32;
for t in 0..self.n_estimators {
let dir = &self.projections[t];
let p_x: f32 = x.iter().zip(dir.iter()).map(|(xi, wi)| xi * wi).sum();
let sorted = &self.proj_training[t];
let rank = sorted.partition_point(|&v| v < p_x);
let rank_frac = rank as f32 / self.n_train as f32;
let path_contrib = -(((2.0 * rank_frac - 1.0).abs()) + 1e-8).log2();
total_path += path_contrib;
}
Ok(total_path / self.n_estimators as f32)
}
pub fn score(&self, x: &[f32]) -> AnomalyResult<f32> {
let path = self.path_length(x)?;
Ok(isolation_score_from_path(path, self.n_train))
}
pub fn score_batch(&self, x: &[f32], n: usize) -> AnomalyResult<Vec<f32>> {
if x.len() != n * self.n_features {
return Err(AnomalyError::DimensionMismatch {
expected: n * self.n_features,
got: x.len(),
});
}
let mut scores = Vec::with_capacity(n);
for i in 0..n {
let sample = &x[i * self.n_features..(i + 1) * self.n_features];
scores.push(self.score(sample)?);
}
Ok(scores)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn c_factor_values() {
assert!((c_factor(0)).abs() < 1e-6);
assert!((c_factor(1)).abs() < 1e-6);
assert!((c_factor(2) - 1.0).abs() < 1e-6);
assert!(c_factor(100) > 0.0);
}
#[test]
fn isolation_score_in_range() {
let s = isolation_score_from_path(5.0, 256);
assert!((0.0..=1.0).contains(&s), "s={s}");
}
#[test]
fn iforest_fit_score() {
let mut rng = LcgRng::new(42);
let mut scorer = IsolationScorer::new(50, &mut rng);
let data: Vec<f32> = (0..100_usize)
.flat_map(|i| vec![i as f32 * 0.1, i as f32 * 0.05])
.collect();
scorer
.fit(&data, 100, 2, &mut rng)
.expect("isolation scorer fit should succeed");
let s = scorer
.score(&[5.0_f32, 2.5])
.expect("isolation scorer score should succeed");
assert!((0.0..=1.0).contains(&s), "s={s}");
}
}