use crate::error::AnalyticsError;
use std::collections::VecDeque;
#[derive(Debug, Clone)]
pub struct ZScoreDetector {
window_size: usize,
threshold: f64,
window: VecDeque<f64>,
sum: f64,
sum_sq: f64,
}
impl ZScoreDetector {
pub fn new(window: usize) -> Result<Self, AnalyticsError> {
if window < 2 {
return Err(AnalyticsError::InvalidInput(
"window size must be ≥ 2".into(),
));
}
Ok(Self {
window_size: window,
threshold: 3.0,
window: VecDeque::with_capacity(window + 1),
sum: 0.0,
sum_sq: 0.0,
})
}
pub fn with_threshold(window: usize, threshold: f64) -> Result<Self, AnalyticsError> {
if window < 2 {
return Err(AnalyticsError::InvalidInput(
"window size must be ≥ 2".into(),
));
}
if threshold <= 0.0 {
return Err(AnalyticsError::InvalidInput("threshold must be > 0".into()));
}
Ok(Self {
window_size: window,
threshold,
window: VecDeque::with_capacity(window + 1),
sum: 0.0,
sum_sq: 0.0,
})
}
#[must_use]
pub fn update(&mut self, value: f64) -> Option<f64> {
if self.window.len() == self.window_size {
if let Some(old) = self.window.pop_front() {
self.sum -= old;
self.sum_sq -= old * old;
}
}
self.window.push_back(value);
self.sum += value;
self.sum_sq += value * value;
let n = self.window.len();
if n < 2 {
return None;
}
let mean = self.sum / n as f64;
let variance = (self.sum_sq / n as f64) - (mean * mean);
let std_dev = variance.max(0.0).sqrt();
if std_dev < f64::EPSILON {
return None;
}
let z = (value - mean) / std_dev;
if z.abs() > self.threshold {
Some(z)
} else {
None
}
}
#[must_use]
pub fn mean(&self) -> Option<f64> {
if self.window.is_empty() {
None
} else {
Some(self.sum / self.window.len() as f64)
}
}
#[must_use]
pub fn window_len(&self) -> usize {
self.window.len()
}
pub fn reset(&mut self) {
self.window.clear();
self.sum = 0.0;
self.sum_sq = 0.0;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn window_less_than_two_errors() {
assert!(ZScoreDetector::new(0).is_err());
assert!(ZScoreDetector::new(1).is_err());
}
#[test]
fn valid_threshold_required() {
assert!(ZScoreDetector::with_threshold(5, 0.0).is_err());
assert!(ZScoreDetector::with_threshold(5, -1.0).is_err());
}
#[test]
fn first_observation_none() {
let mut d = ZScoreDetector::new(5).expect("valid");
assert!(d.update(1.0).is_none());
}
#[test]
fn constant_window_no_anomaly() {
let mut d = ZScoreDetector::new(5).expect("valid");
for _ in 0..5 {
assert!(d.update(42.0).is_none());
}
}
#[test]
fn outlier_detected() {
let mut d = ZScoreDetector::new(20).expect("valid");
for i in 0..19 {
let _ = d.update(10.0 + (i % 2) as f64 * 0.01);
}
let result = d.update(1000.0);
assert!(result.is_some(), "extreme outlier should be detected");
assert!(result.unwrap() > 0.0, "positive z-score");
}
#[test]
fn negative_outlier_detected() {
let mut d = ZScoreDetector::new(20).expect("valid");
for _ in 0..19 {
let _ = d.update(100.0);
}
let result = d.update(-1000.0);
assert!(result.is_some());
assert!(result.unwrap() < 0.0, "negative z-score");
}
#[test]
fn normal_values_not_flagged() {
let mut d = ZScoreDetector::new(10).expect("valid");
let values = [1.0, 1.1, 0.9, 1.05, 0.95, 1.02, 0.98, 1.0, 1.03, 0.97];
for &v in &values {
let r = d.update(v);
assert!(r.is_none(), "normal value {v} should not be flagged");
}
}
#[test]
fn reset_clears_state() {
let mut d = ZScoreDetector::new(5).expect("valid");
for _ in 0..5 {
let _ = d.update(1.0);
}
d.reset();
assert_eq!(d.window_len(), 0);
assert!(d.mean().is_none());
assert!(d.update(999.0).is_none()); }
#[test]
fn mean_tracks_window() {
let mut d = ZScoreDetector::new(4).expect("valid");
d.update(2.0);
d.update(4.0);
let mean = d.mean().expect("some mean");
assert!((mean - 3.0).abs() < 1e-9);
}
}