use crate::error::{AnomalyError, AnomalyResult};
#[derive(Debug, Clone)]
pub struct OnlineLofConfig {
pub k: usize,
}
impl Default for OnlineLofConfig {
fn default() -> Self {
Self { k: 5 }
}
}
#[inline]
fn sq_dist(a: &[f64], b: &[f64]) -> f64 {
a.iter().zip(b.iter()).map(|(x, y)| (x - y).powi(2)).sum()
}
#[inline]
fn cmp_dist_idx(a: &(f64, usize), b: &(f64, usize)) -> std::cmp::Ordering {
a.0.partial_cmp(&b.0)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.1.cmp(&b.1))
}
pub struct OnlineLof {
k: usize,
dim: usize,
points: Vec<f64>,
live: Vec<bool>,
n_live: usize,
neighbours: Vec<Vec<usize>>,
neigh_dist: Vec<Vec<f64>>,
kdist: Vec<f64>,
lrd: Vec<f64>,
lof: Vec<f64>,
rnn: Vec<Vec<usize>>,
}
impl OnlineLof {
pub fn new(k: usize, dim: usize) -> AnomalyResult<Self> {
if k == 0 {
return Err(AnomalyError::InvalidK { k: 0 });
}
if dim == 0 {
return Err(AnomalyError::InvalidFeatureCount { n: 0 });
}
Ok(Self {
k,
dim,
points: Vec::new(),
live: Vec::new(),
n_live: 0,
neighbours: Vec::new(),
neigh_dist: Vec::new(),
kdist: Vec::new(),
lrd: Vec::new(),
lof: Vec::new(),
rnn: Vec::new(),
})
}
pub fn from_config(cfg: &OnlineLofConfig, dim: usize) -> AnomalyResult<Self> {
Self::new(cfg.k, dim)
}
#[must_use]
#[inline]
pub fn k(&self) -> usize {
self.k
}
#[must_use]
#[inline]
pub fn dim(&self) -> usize {
self.dim
}
#[must_use]
#[inline]
pub fn len(&self) -> usize {
self.live.len()
}
#[must_use]
#[inline]
pub fn n_live(&self) -> usize {
self.n_live
}
#[must_use]
#[inline]
pub fn is_empty(&self) -> bool {
self.n_live == 0
}
#[inline]
fn effective_k(&self, exclude_self: bool) -> usize {
let available = if exclude_self {
self.n_live.saturating_sub(1)
} else {
self.n_live
};
self.k.min(available)
}
#[must_use]
#[inline]
pub fn lof_scores(&self) -> &[f64] {
&self.lof
}
pub fn score_of(&self, idx: usize) -> AnomalyResult<f64> {
if idx >= self.live.len() || !self.live[idx] {
return Err(AnomalyError::Internal {
msg: format!("score_of: index {idx} is out of range or has been removed"),
});
}
Ok(self.lof[idx])
}
#[inline]
fn point(&self, i: usize) -> &[f64] {
&self.points[i * self.dim..(i + 1) * self.dim]
}
#[inline]
fn dist(&self, i: usize, j: usize) -> f64 {
sq_dist(self.point(i), self.point(j)).sqrt()
}
fn knn_live(&self, query: &[f64], kk: usize, exclude: Option<usize>) -> (Vec<usize>, Vec<f64>) {
let mut cands: Vec<(f64, usize)> = Vec::with_capacity(self.n_live);
for j in 0..self.live.len() {
if !self.live[j] || Some(j) == exclude {
continue;
}
cands.push((sq_dist(query, self.point(j)).sqrt(), j));
}
cands.sort_unstable_by(cmp_dist_idx);
cands.truncate(kk);
let idx = cands.iter().map(|&(_, i)| i).collect();
let dst = cands.iter().map(|&(d, _)| d).collect();
(idx, dst)
}
fn compute_lrd(&self, i: usize) -> f64 {
let nbrs = &self.neighbours[i];
if nbrs.is_empty() {
return 0.0;
}
let mut sum_reach = 0.0_f64;
for (slot, &o) in nbrs.iter().enumerate() {
let d_io = self.neigh_dist[i][slot];
let reach = self.kdist[o].max(d_io).max(0.0);
sum_reach += reach;
}
if sum_reach < 1e-15 {
f64::INFINITY
} else {
nbrs.len() as f64 / sum_reach
}
}
fn compute_lof(&self, i: usize) -> f64 {
let nbrs = &self.neighbours[i];
if nbrs.is_empty() {
return 1.0;
}
let lrd_i = self.lrd[i];
if lrd_i.is_infinite() {
return 1.0;
}
if lrd_i <= 0.0 {
return 1.0;
}
let acc: f64 = nbrs.iter().map(|&o| self.lrd[o] / lrd_i).sum();
acc / nbrs.len() as f64
}
fn rebuild_neighbours(&mut self, i: usize) {
let old: Vec<usize> = std::mem::take(&mut self.neighbours[i]);
for o in old {
self.rnn[o].retain(|&x| x != i);
}
self.neigh_dist[i].clear();
let kk = self.effective_k(true);
let query_owned = self.point(i).to_vec();
let (idx, dst) = self.knn_live(&query_owned, kk, Some(i));
self.kdist[i] = dst.last().copied().unwrap_or(0.0);
for &o in &idx {
self.rnn[o].push(i);
}
self.neighbours[i] = idx;
self.neigh_dist[i] = dst;
}
pub fn insert(&mut self, point: &[f64]) -> AnomalyResult<f64> {
if point.len() != self.dim {
return Err(AnomalyError::DimensionMismatch {
expected: self.dim,
got: point.len(),
});
}
let pc = self.live.len();
self.points.extend_from_slice(point);
self.live.push(true);
self.n_live += 1;
self.neighbours.push(Vec::new());
self.neigh_dist.push(Vec::new());
self.kdist.push(0.0);
self.lrd.push(0.0);
self.lof.push(1.0);
self.rnn.push(Vec::new());
if self.n_live == 1 {
self.kdist[pc] = 0.0;
self.lrd[pc] = f64::INFINITY;
self.lof[pc] = 1.0;
return Ok(self.lof[pc]);
}
self.rebuild_neighbours(pc);
let mut s_update_k: Vec<usize> = Vec::new();
for p in 0..self.live.len() {
if p == pc || !self.live[p] {
continue;
}
let d = self.dist(p, pc);
let full = self.neighbours[p].len() >= self.k;
if !full || d <= self.kdist[p] {
s_update_k.push(p);
}
}
for &p in &s_update_k {
self.insert_neighbour(p, pc);
}
let mut lrd_dirty: Vec<bool> = vec![false; self.live.len()];
lrd_dirty[pc] = true;
for &p in &s_update_k {
lrd_dirty[p] = true;
let rev = self.rnn[p].clone();
for r in rev {
if r < lrd_dirty.len() {
lrd_dirty[r] = true;
}
}
}
let lrd_set: Vec<usize> = (0..self.live.len())
.filter(|&i| self.live[i] && lrd_dirty[i])
.collect();
for &i in &lrd_set {
self.lrd[i] = self.compute_lrd(i);
}
let mut lof_dirty: Vec<bool> = vec![false; self.live.len()];
for &i in &lrd_set {
if i < lof_dirty.len() {
lof_dirty[i] = true;
}
let rev = self.rnn[i].clone();
for r in rev {
if r < lof_dirty.len() {
lof_dirty[r] = true;
}
}
}
for i in (0..self.live.len()).filter(|&i| self.live[i] && lof_dirty[i]) {
self.lof[i] = self.compute_lof(i);
}
Ok(self.lof[pc])
}
fn insert_neighbour(&mut self, p: usize, cand: usize) {
if self.neighbours[p].contains(&cand) {
return;
}
let d = self.dist(p, cand);
let key = (d, cand);
let pos = self.neighbours[p]
.iter()
.zip(self.neigh_dist[p].iter())
.position(|(&idx, &dd)| cmp_dist_idx(&key, &(dd, idx)) == std::cmp::Ordering::Less)
.unwrap_or(self.neighbours[p].len());
self.neighbours[p].insert(pos, cand);
self.neigh_dist[p].insert(pos, d);
self.rnn[cand].push(p);
while self.neighbours[p].len() > self.k {
if let Some(evicted) = self.neighbours[p].pop() {
self.neigh_dist[p].pop();
self.rnn[evicted].retain(|&x| x != p);
}
}
self.kdist[p] = self.neigh_dist[p].last().copied().unwrap_or(0.0);
}
pub fn remove(&mut self, idx: usize) -> AnomalyResult<()> {
if idx >= self.live.len() || !self.live[idx] {
return Err(AnomalyError::Internal {
msg: format!("remove: index {idx} is out of range or already removed"),
});
}
let affected_holders: Vec<usize> = self.rnn[idx].clone();
self.live[idx] = false;
self.n_live -= 1;
self.lof[idx] = f64::NAN;
self.lrd[idx] = f64::NAN;
self.kdist[idx] = f64::NAN;
let own = std::mem::take(&mut self.neighbours[idx]);
for o in own {
self.rnn[o].retain(|&x| x != idx);
}
self.neigh_dist[idx].clear();
self.rnn[idx].clear();
if self.n_live == 0 {
return Ok(());
}
for &p in &affected_holders {
if self.live[p] {
self.rebuild_neighbours(p);
}
}
let mut lrd_dirty: Vec<bool> = vec![false; self.live.len()];
for &p in &affected_holders {
if !self.live[p] {
continue;
}
if p < lrd_dirty.len() {
lrd_dirty[p] = true;
}
let rev = self.rnn[p].clone();
for r in rev {
if r < lrd_dirty.len() {
lrd_dirty[r] = true;
}
}
}
let lrd_set: Vec<usize> = (0..self.live.len())
.filter(|&i| self.live[i] && lrd_dirty[i])
.collect();
for &i in &lrd_set {
self.lrd[i] = self.compute_lrd(i);
}
let mut lof_dirty: Vec<bool> = vec![false; self.live.len()];
for &i in &lrd_set {
if i < lof_dirty.len() {
lof_dirty[i] = true;
}
let rev = self.rnn[i].clone();
for r in rev {
if r < lof_dirty.len() {
lof_dirty[r] = true;
}
}
}
for i in (0..self.live.len()).filter(|&i| self.live[i] && lof_dirty[i]) {
self.lof[i] = self.compute_lof(i);
}
Ok(())
}
#[cfg(test)]
fn brute_force_lof(&self) -> Vec<f64> {
let n = self.live.len();
let mut out = vec![f64::NAN; n];
if self.n_live == 0 {
return out;
}
let mut nbrs: Vec<Vec<usize>> = vec![Vec::new(); n];
let mut nbr_d: Vec<Vec<f64>> = vec![Vec::new(); n];
let mut kd = vec![0.0_f64; n];
let kk = self.k.min(self.n_live.saturating_sub(1));
for i in 0..n {
if !self.live[i] {
continue;
}
let (idx, dst) = self.knn_live(self.point(i), kk, Some(i));
kd[i] = dst.last().copied().unwrap_or(0.0);
nbrs[i] = idx;
nbr_d[i] = dst;
}
let mut lrd = vec![0.0_f64; n];
for i in 0..n {
if !self.live[i] {
continue;
}
if nbrs[i].is_empty() {
lrd[i] = 0.0;
continue;
}
let mut sum_reach = 0.0_f64;
for (slot, &o) in nbrs[i].iter().enumerate() {
let reach = kd[o].max(nbr_d[i][slot]).max(0.0);
sum_reach += reach;
}
lrd[i] = if sum_reach < 1e-15 {
f64::INFINITY
} else {
nbrs[i].len() as f64 / sum_reach
};
}
for i in 0..n {
if !self.live[i] {
continue;
}
if nbrs[i].is_empty() {
out[i] = 1.0;
continue;
}
let lrd_i = lrd[i];
if lrd_i.is_infinite() || lrd_i <= 0.0 {
out[i] = 1.0;
continue;
}
let acc: f64 = nbrs[i].iter().map(|&o| lrd[o] / lrd_i).sum();
out[i] = acc / nbrs[i].len() as f64;
}
out
}
}
#[cfg(test)]
mod tests {
use super::*;
fn assert_lof_close(got: &[f64], want: &[f64], tol: f64) {
assert_eq!(got.len(), want.len(), "length mismatch");
for (i, (&g, &w)) in got.iter().zip(want.iter()).enumerate() {
if w.is_nan() {
assert!(g.is_nan(), "slot {i}: expected NaN tombstone, got {g}");
continue;
}
assert!(g.is_finite(), "slot {i}: incremental LOF not finite: {g}");
assert!(
(g - w).abs() <= tol,
"slot {i}: incremental {g} vs batch {w} (|Δ|={})",
(g - w).abs()
);
}
}
#[test]
fn incremental_equals_batch_after_each_insert() {
let pts: Vec<[f64; 2]> = vec![
[0.0, 0.0],
[0.1, 0.0],
[0.0, 0.1],
[0.1, 0.1],
[0.05, 0.05],
[5.0, 5.0],
[5.1, 5.0],
[5.0, 5.1],
[5.1, 5.1],
[5.05, 5.05],
[50.0, 50.0], [0.2, 0.05],
[4.9, 5.0],
];
let mut model = OnlineLof::new(3, 2).expect("construct online LOF");
for (step, p) in pts.iter().enumerate() {
let _lof_pc = model.insert(p).expect("insert should succeed");
let batch = model.brute_force_lof();
assert_lof_close(model.lof_scores(), &batch, 1e-9);
assert_eq!(model.n_live(), step + 1);
}
}
#[test]
fn outlier_high_cluster_near_one() {
let mut model = OnlineLof::new(4, 2).expect("construct");
let mut idx_cluster = Vec::new();
for i in 0..15_usize {
let x = (i % 4) as f64 * 0.01;
let y = (i / 4) as f64 * 0.01;
model.insert(&[x, y]).expect("insert cluster point");
idx_cluster.push(model.len() - 1);
}
let out_lof = model.insert(&[100.0, 100.0]).expect("insert outlier");
let out_idx = model.len() - 1;
assert!(
out_lof > 2.0,
"outlier LOF should be well above 1, got {out_lof}"
);
for &c in &idx_cluster {
let s = model.score_of(c).expect("cluster score");
assert!(s < 1.5, "cluster point {c} should have LOF ≈ 1, got {s}");
}
let scores = model.lof_scores();
let max_idx = scores
.iter()
.enumerate()
.filter(|(_, v)| v.is_finite())
.max_by(|a, b| a.1.partial_cmp(b.1).expect("finite comparison"))
.map(|(i, _)| i)
.expect("at least one finite score");
assert_eq!(max_idx, out_idx, "outlier should hold the max LOF");
}
#[test]
fn k_clamped_to_available_points() {
let mut model = OnlineLof::new(10, 1).expect("construct");
let first = model.insert(&[0.0]).expect("first insert");
assert_eq!(first, 1.0, "first ever point: LOF defaults to 1.0");
model.insert(&[1.0]).expect("second insert");
model.insert(&[2.0]).expect("third insert");
model.insert(&[3.0]).expect("fourth insert");
let batch = model.brute_force_lof();
assert_lof_close(model.lof_scores(), &batch, 1e-9);
for s in model.lof_scores() {
assert!(s.is_finite(), "clamped-k score not finite: {s}");
}
}
#[test]
fn single_point_has_unit_lof() {
let mut model = OnlineLof::new(3, 2).expect("construct");
let s = model.insert(&[1.0, 2.0]).expect("insert");
assert_eq!(s, 1.0);
assert_eq!(model.n_live(), 1);
assert_eq!(model.score_of(0).expect("score"), 1.0);
}
#[test]
fn error_k_zero() {
let res = OnlineLof::new(0, 2);
assert!(matches!(res, Err(AnomalyError::InvalidK { k: 0 })));
}
#[test]
fn error_dim_zero() {
let res = OnlineLof::new(3, 0);
assert!(matches!(
res,
Err(AnomalyError::InvalidFeatureCount { n: 0 })
));
}
#[test]
fn error_dim_mismatch_on_insert() {
let mut model = OnlineLof::new(3, 2).expect("construct");
let res = model.insert(&[1.0, 2.0, 3.0]); assert!(matches!(
res,
Err(AnomalyError::DimensionMismatch {
expected: 2,
got: 3
})
));
}
#[test]
fn error_score_of_invalid_index() {
let mut model = OnlineLof::new(2, 1).expect("construct");
model.insert(&[0.0]).expect("insert a");
model.insert(&[1.0]).expect("insert b");
model.insert(&[2.0]).expect("insert c");
assert!(model.score_of(99).is_err(), "out-of-range index errors");
model.remove(1).expect("remove b");
assert!(model.score_of(1).is_err(), "removed index errors");
}
#[test]
fn remove_then_matches_batch() {
let mut model = OnlineLof::new(3, 2).expect("construct");
let pts: Vec<[f64; 2]> = vec![
[0.0, 0.0],
[0.1, 0.0],
[0.0, 0.1],
[0.1, 0.1],
[0.05, 0.05],
[5.0, 5.0],
[5.1, 5.0],
[5.0, 5.1],
[40.0, 40.0],
[0.2, 0.2],
];
for p in &pts {
model.insert(p).expect("insert");
}
model.remove(8).expect("remove outlier");
let batch = model.brute_force_lof();
assert_lof_close(model.lof_scores(), &batch, 1e-9);
model.remove(4).expect("remove cluster member");
let batch2 = model.brute_force_lof();
assert_lof_close(model.lof_scores(), &batch2, 1e-9);
assert_eq!(model.n_live(), pts.len() - 2);
}
#[test]
fn interleaved_insert_remove_consistent() {
let mut model = OnlineLof::new(4, 2).expect("construct");
for i in 0..20_usize {
let x = (i as f64).sin();
let y = (i as f64 * 0.7).cos();
model.insert(&[x, y]).expect("insert");
if i.is_multiple_of(5) && i > 0 {
let target = i / 5;
if target < model.len() && model.score_of(target).is_ok() {
model.remove(target).expect("remove");
}
}
let batch = model.brute_force_lof();
assert_lof_close(model.lof_scores(), &batch, 1e-9);
}
}
#[test]
fn insert_returns_stored_score() {
let mut model = OnlineLof::new(3, 2).expect("construct");
let mut last_idx = 0;
let mut last_returned = 1.0;
for i in 0..12_usize {
let p = [i as f64 * 0.3, (i as f64 * 0.3).fract()];
last_returned = model.insert(&p).expect("insert");
last_idx = model.len() - 1;
}
let stored = model.score_of(last_idx).expect("score");
assert!(
(stored - last_returned).abs() <= 1e-12,
"insert returned {last_returned} but stored {stored}"
);
}
#[test]
fn one_dim_gap_outlier() {
let mut model = OnlineLof::new(3, 1).expect("construct");
for i in 0..20_usize {
model.insert(&[i as f64 * 0.1]).expect("dense insert");
}
let gap_lof = model.insert(&[100.0]).expect("gap insert");
assert!(
gap_lof > 2.0,
"gapped point LOF should be high, got {gap_lof}"
);
let batch = model.brute_force_lof();
assert_lof_close(model.lof_scores(), &batch, 1e-9);
}
}