use crate::error::{AnalyticsError, Result};
use scirs2_core::ndarray::{Array1, ArrayView1};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AnomalyMethod {
ZScore,
IQR,
ModifiedZScore,
}
#[derive(Debug, Clone)]
pub struct AnomalyResult {
pub anomaly_indices: Vec<usize>,
pub scores: Array1<f64>,
pub threshold: f64,
pub method: AnomalyMethod,
}
pub struct AnomalyDetector {
method: AnomalyMethod,
threshold: f64,
}
impl AnomalyDetector {
pub fn new(method: AnomalyMethod, threshold: f64) -> Self {
Self { method, threshold }
}
pub fn detect(&self, values: &ArrayView1<f64>) -> Result<AnomalyResult> {
let scores = match self.method {
AnomalyMethod::ZScore => self.z_score(values)?,
AnomalyMethod::IQR => self.iqr_score(values)?,
AnomalyMethod::ModifiedZScore => self.modified_z_score(values)?,
};
let anomaly_indices: Vec<usize> = scores
.iter()
.enumerate()
.filter_map(|(i, &score)| {
if score.abs() >= self.threshold {
Some(i)
} else {
None
}
})
.collect();
Ok(AnomalyResult {
anomaly_indices,
scores,
threshold: self.threshold,
method: self.method,
})
}
fn z_score(&self, values: &ArrayView1<f64>) -> Result<Array1<f64>> {
if values.len() < 2 {
return Err(AnalyticsError::insufficient_data(
"Z-score requires at least 2 data points",
));
}
let n = values.len() as f64;
let mean = values.sum() / n;
let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n;
let std = variance.sqrt();
if std < f64::EPSILON {
return Err(AnalyticsError::numerical_instability(
"Standard deviation is too small",
));
}
let mut scores = Array1::zeros(values.len());
for (i, &value) in values.iter().enumerate() {
scores[i] = (value - mean) / std;
}
Ok(scores)
}
fn iqr_score(&self, values: &ArrayView1<f64>) -> Result<Array1<f64>> {
if values.len() < 4 {
return Err(AnalyticsError::insufficient_data(
"IQR requires at least 4 data points",
));
}
let mut sorted = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let q1 = percentile(&sorted, 25.0)?;
let q3 = percentile(&sorted, 75.0)?;
let iqr = q3 - q1;
if iqr < f64::EPSILON {
return Err(AnalyticsError::numerical_instability("IQR is too small"));
}
let median = percentile(&sorted, 50.0)?;
let mut scores = Array1::zeros(values.len());
for (i, &value) in values.iter().enumerate() {
scores[i] = (value - median).abs() / iqr;
}
Ok(scores)
}
fn modified_z_score(&self, values: &ArrayView1<f64>) -> Result<Array1<f64>> {
if values.len() < 2 {
return Err(AnalyticsError::insufficient_data(
"Modified Z-score requires at least 2 data points",
));
}
let mut sorted = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let median = percentile(&sorted, 50.0)?;
let mut abs_deviations: Vec<f64> = values.iter().map(|x| (x - median).abs()).collect();
abs_deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let mad = percentile(&abs_deviations, 50.0)?;
let consistency_constant = 1.4826;
let normalized_mad = consistency_constant * mad;
if normalized_mad < f64::EPSILON {
return Err(AnalyticsError::numerical_instability("MAD is too small"));
}
let mut scores = Array1::zeros(values.len());
for (i, &value) in values.iter().enumerate() {
scores[i] = 0.6745 * (value - median) / normalized_mad;
}
Ok(scores)
}
}
fn percentile(sorted_data: &[f64], percentile: f64) -> Result<f64> {
if sorted_data.is_empty() {
return Err(AnalyticsError::insufficient_data("Data is empty"));
}
if !(0.0..=100.0).contains(&percentile) {
return Err(AnalyticsError::invalid_parameter(
"percentile",
"must be between 0 and 100",
));
}
let n = sorted_data.len();
if n == 1 {
return Ok(sorted_data[0]);
}
let rank = (percentile / 100.0) * ((n - 1) as f64);
let lower_idx = rank.floor() as usize;
let upper_idx = rank.ceil() as usize;
let fraction = rank - (lower_idx as f64);
Ok(sorted_data[lower_idx] + fraction * (sorted_data[upper_idx] - sorted_data[lower_idx]))
}
pub fn detect_change_points(values: &ArrayView1<f64>, threshold: f64) -> Result<Vec<usize>> {
if values.len() < 3 {
return Err(AnalyticsError::insufficient_data(
"Change point detection requires at least 3 data points",
));
}
let n = values.len() as f64;
let mean = values.sum() / n;
let mut cusum_pos = 0.0;
let mut cusum_neg = 0.0;
let mut change_points = Vec::new();
for (i, &value) in values.iter().enumerate() {
let deviation = value - mean;
cusum_pos = (cusum_pos + deviation).max(0.0);
cusum_neg = (cusum_neg - deviation).max(0.0);
if cusum_pos > threshold || cusum_neg > threshold {
change_points.push(i);
cusum_pos = 0.0;
cusum_neg = 0.0;
}
}
Ok(change_points)
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
use scirs2_core::ndarray::array;
#[test]
fn test_z_score_detection() {
let values = array![1.0, 2.0, 3.0, 4.0, 100.0]; let detector = AnomalyDetector::new(AnomalyMethod::ZScore, 1.9);
let result = detector
.detect(&values.view())
.expect("Z-score detection should succeed with valid data");
assert!(!result.anomaly_indices.is_empty());
assert!(result.anomaly_indices.contains(&4));
}
#[test]
fn test_iqr_detection() {
let values = array![1.0, 2.0, 3.0, 4.0, 5.0, 100.0]; let detector = AnomalyDetector::new(AnomalyMethod::IQR, 1.5);
let result = detector
.detect(&values.view())
.expect("IQR detection should succeed with valid data");
assert!(!result.anomaly_indices.is_empty());
assert!(result.anomaly_indices.contains(&5));
}
#[test]
fn test_modified_z_score() {
let values = array![1.0, 2.0, 3.0, 4.0, 5.0, 100.0]; let detector = AnomalyDetector::new(AnomalyMethod::ModifiedZScore, 3.5);
let result = detector
.detect(&values.view())
.expect("Modified Z-score detection should succeed with valid data");
assert!(!result.anomaly_indices.is_empty());
}
#[test]
fn test_percentile() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let p50 = percentile(&data, 50.0).expect("50th percentile calculation should succeed");
assert_abs_diff_eq!(p50, 3.0, epsilon = 1e-10);
let p25 = percentile(&data, 25.0).expect("25th percentile calculation should succeed");
assert_abs_diff_eq!(p25, 2.0, epsilon = 1e-10);
let p75 = percentile(&data, 75.0).expect("75th percentile calculation should succeed");
assert_abs_diff_eq!(p75, 4.0, epsilon = 1e-10);
}
#[test]
fn test_change_point_detection() {
let values = array![1.0, 1.0, 1.0, 5.0, 5.0, 5.0]; let change_points = detect_change_points(&values.view(), 1.0)
.expect("Change point detection should succeed with valid data");
assert!(!change_points.is_empty());
}
#[test]
fn test_no_anomalies() {
let values = array![1.0, 2.0, 3.0, 4.0, 5.0];
let detector = AnomalyDetector::new(AnomalyMethod::ZScore, 3.0);
let result = detector
.detect(&values.view())
.expect("Anomaly detection should succeed with valid data");
assert!(result.anomaly_indices.is_empty());
}
}