use crate::error::{AnomalyError, AnomalyResult};
#[derive(Debug, Clone)]
pub struct GpdFit {
pub threshold: f32,
pub xi: f32,
pub sigma: f32,
pub n_exceedances: usize,
}
impl GpdFit {
#[must_use]
pub fn survival(&self, x: f32) -> f32 {
let z = (x - self.threshold).max(0.0);
if self.sigma <= 0.0 {
return 0.0;
}
if self.xi.abs() < 1e-6 {
(-z / self.sigma).exp()
} else {
let arg = 1.0 + self.xi * z / self.sigma;
if arg <= 0.0 {
0.0
} else {
arg.powf(-1.0 / self.xi)
}
}
}
#[must_use]
pub fn p_value(&self, score: f32) -> f32 {
if score <= self.threshold {
return 1.0; }
self.survival(score)
}
}
#[derive(Debug, Clone)]
pub struct GpdDetector {
pub alpha: f32,
pub threshold_quantile: f32,
pub fit: Option<GpdFit>,
train_scores: Vec<f32>,
}
impl GpdDetector {
pub fn new(alpha: f32, threshold_quantile: f32) -> AnomalyResult<Self> {
if !(alpha > 0.0 && alpha < 1.0) {
return Err(AnomalyError::InvalidThresholdPercentile { p: alpha });
}
if !(threshold_quantile > 0.0 && threshold_quantile < 1.0) {
return Err(AnomalyError::InvalidThresholdPercentile {
p: threshold_quantile,
});
}
Ok(Self {
alpha,
threshold_quantile,
fit: None,
train_scores: Vec::new(),
})
}
pub fn fit(&mut self, scores: &[f32]) -> AnomalyResult<()> {
if scores.is_empty() {
return Err(AnomalyError::EmptyInput);
}
self.train_scores = scores.to_vec();
let mut sorted = scores.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted.len();
let q_idx = ((self.threshold_quantile * n as f32).ceil() as usize).min(n - 1);
let threshold = sorted[q_idx];
let exceedances: Vec<f32> = sorted[q_idx + 1..].iter().map(|&v| v - threshold).collect();
if exceedances.len() < 5 {
if exceedances.is_empty() {
return Err(AnomalyError::InsufficientSamples { need: 5, got: 0 });
}
let mean_exc = exceedances.iter().sum::<f32>() / exceedances.len() as f32;
self.fit = Some(GpdFit {
threshold,
xi: 0.0,
sigma: mean_exc.max(1e-6),
n_exceedances: exceedances.len(),
});
return Ok(());
}
let n_exc = exceedances.len();
let l1 = exceedances.iter().sum::<f32>() / n_exc as f32;
let mut sorted_exc = exceedances.clone();
sorted_exc.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let l2: f32 = sorted_exc
.iter()
.enumerate()
.map(|(i, &x)| {
let w = (2 * i as i64 + 2 - n_exc as i64 - 1) as f32 / (n_exc as f32 - 1.0);
w * x
})
.sum::<f32>()
/ 2.0;
let denom = l1 - 2.0 * l2;
let (xi, sigma) = if denom.abs() < 1e-8 || l2 <= 0.0 {
(0.0_f32, l1.max(1e-6))
} else {
let xi_est = 2.0 - l1 / denom;
let sigma_est = (2.0 * l1 * l2 / denom).max(1e-6);
(xi_est.clamp(-2.0, 2.0), sigma_est)
};
self.fit = Some(GpdFit {
threshold,
xi,
sigma,
n_exceedances: n_exc,
});
Ok(())
}
pub fn p_value(&self, score: f32) -> AnomalyResult<f32> {
match &self.fit {
None => Err(AnomalyError::NotFitted),
Some(f) => Ok(f.p_value(score)),
}
}
pub fn predict(&self, score: f32) -> AnomalyResult<bool> {
Ok(self.p_value(score)? < self.alpha)
}
pub fn anomaly_scores(&self, scores: &[f32]) -> AnomalyResult<Vec<f32>> {
scores.iter().map(|&s| Ok(1.0 - self.p_value(s)?)).collect()
}
pub fn training_false_positive_rate(&self) -> AnomalyResult<f32> {
if self.fit.is_none() {
return Err(AnomalyError::NotFitted);
}
if self.train_scores.is_empty() {
return Ok(0.0);
}
let n = self.train_scores.len() as f32;
let flagged = self
.train_scores
.iter()
.filter(|&&s| self.predict(s).unwrap_or(false))
.count() as f32;
Ok(flagged / n)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_scores(n: usize) -> Vec<f32> {
(0..n).map(|i| i as f32 / (n - 1) as f32).collect()
}
#[test]
fn new_invalid_alpha_error() {
assert!(GpdDetector::new(0.0, 0.9).is_err());
assert!(GpdDetector::new(1.0, 0.9).is_err());
assert!(GpdDetector::new(0.05, 0.0).is_err());
}
#[test]
fn fit_empty_error() {
let mut det =
GpdDetector::new(0.05, 0.9).expect("GpdDetector::new should succeed with valid params");
assert!(det.fit(&[]).is_err());
}
#[test]
fn fit_succeeds() {
let scores = make_scores(100);
let mut det =
GpdDetector::new(0.05, 0.9).expect("GpdDetector::new should succeed with valid params");
det.fit(&scores).expect("GPD fit should succeed");
assert!(det.fit.is_some());
}
#[test]
fn p_value_inlier_not_flagged() {
let scores = make_scores(200);
let mut det =
GpdDetector::new(0.05, 0.9).expect("GpdDetector::new should succeed with valid params");
det.fit(&scores).expect("GPD fit should succeed");
let pv = det
.p_value(0.5)
.expect("p_value should succeed after fitting");
assert!(pv >= det.alpha, "p={pv} for median score");
}
#[test]
fn extreme_outlier_flagged() {
let scores = make_scores(200);
let mut det =
GpdDetector::new(0.05, 0.9).expect("GpdDetector::new should succeed with valid params");
det.fit(&scores).expect("GPD fit should succeed");
let pv = det
.p_value(1000.0)
.expect("p_value should succeed after fitting");
assert!(
pv < det.alpha,
"extreme score should have p < alpha, got {pv}"
);
}
#[test]
fn predict_below_threshold_not_anomaly() {
let scores = make_scores(200);
let mut det =
GpdDetector::new(0.05, 0.9).expect("GpdDetector::new should succeed with valid params");
det.fit(&scores).expect("GPD fit should succeed");
let thr = det
.fit
.as_ref()
.expect("fit should be Some after fitting")
.threshold;
let is_anom = det
.predict(thr * 0.5)
.expect("predict should succeed after fitting");
assert!(!is_anom, "score below threshold should not be anomaly");
}
#[test]
fn anomaly_scores_finite() {
let scores = make_scores(100);
let mut det =
GpdDetector::new(0.05, 0.9).expect("GpdDetector::new should succeed with valid params");
det.fit(&scores).expect("GPD fit should succeed");
let anom_scores = det
.anomaly_scores(&scores)
.expect("anomaly_scores should succeed after fitting");
assert!(anom_scores.iter().all(|s| s.is_finite()), "not all finite");
}
#[test]
fn gpd_survival_monotone() {
let fit = GpdFit {
threshold: 0.0,
xi: 0.2,
sigma: 1.0,
n_exceedances: 50,
};
let mut prev = 1.0_f32;
for i in 0..20 {
let x = i as f32 * 0.5;
let s = fit.survival(x);
assert!(s <= prev + 1e-6, "not monotone at x={x}");
assert!((0.0..=1.0).contains(&s), "out of range at x={x}");
prev = s;
}
}
#[test]
fn gpd_survival_exponential_case() {
let sigma = 2.0_f32;
let fit = GpdFit {
threshold: 0.0,
xi: 0.0,
sigma,
n_exceedances: 20,
};
for &x in &[0.5_f32, 1.0, 2.0, 4.0] {
let expected = (-x / sigma).exp();
let got = fit.survival(x);
assert!(
(got - expected).abs() < 0.01,
"x={x}: got={got} exp={expected}"
);
}
}
#[test]
fn training_fpr_near_alpha() {
let scores = make_scores(200);
let alpha = 0.1_f32;
let mut det = GpdDetector::new(alpha, 0.9)
.expect("GpdDetector::new should succeed with valid params");
det.fit(&scores).expect("GPD fit should succeed");
let fpr = det
.training_false_positive_rate()
.expect("training_false_positive_rate should succeed after fitting");
assert!(
fpr <= alpha + 0.15,
"FPR={fpr} should be ≤ alpha+0.15={}",
alpha + 0.15
);
}
#[test]
fn fit_single_exceedance_no_crash() {
let mut scores = vec![0.0_f32; 99];
scores.push(10.0_f32); let mut det = GpdDetector::new(0.05, 0.98)
.expect("GpdDetector::new should succeed with valid params");
let _ = det.fit(&scores);
}
#[test]
fn p_value_not_fitted_error() {
let det =
GpdDetector::new(0.05, 0.9).expect("GpdDetector::new should succeed with valid params");
assert!(det.p_value(0.5).is_err());
}
}