use crate::value::ColType;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AnomalyClass {
Point,
Contextual,
Collective,
Distributional,
Structural,
Multivariate,
Cadence,
}
impl AnomalyClass {
pub fn token(self) -> &'static str {
match self {
AnomalyClass::Point => "point",
AnomalyClass::Contextual => "contextual",
AnomalyClass::Collective => "collective",
AnomalyClass::Distributional => "distributional",
AnomalyClass::Structural => "structural",
AnomalyClass::Multivariate => "multivariate",
AnomalyClass::Cadence => "cadence",
}
}
pub const ALL: [AnomalyClass; 7] = [
AnomalyClass::Point,
AnomalyClass::Contextual,
AnomalyClass::Collective,
AnomalyClass::Distributional,
AnomalyClass::Structural,
AnomalyClass::Multivariate,
AnomalyClass::Cadence,
];
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Severity {
Info,
Low,
Medium,
High,
Critical,
}
impl Severity {
pub fn from_confidence(c: f64) -> Severity {
match c {
c if c >= 0.95 => Severity::Critical,
c if c >= 0.85 => Severity::High,
c if c >= 0.65 => Severity::Medium,
c if c >= 0.40 => Severity::Low,
_ => Severity::Info,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum Handle {
Column { name: String },
Cell { column: String, row: usize },
Range {
column: String,
start: usize,
end: usize,
},
Dist { column: String },
Row { row: usize },
}
impl Handle {
pub fn canonical(&self) -> String {
match self {
Handle::Column { name } => format!("col:{name}"),
Handle::Cell { column, row } => format!("cell:{column}:{row}"),
Handle::Range { column, start, end } => format!("range:{column}:{start}:{end}"),
Handle::Dist { column } => format!("dist:{column}"),
Handle::Row { row } => format!("row:{row}"),
}
}
pub fn parse(s: &str) -> Option<Handle> {
let (kind, rest) = s.split_once(':')?;
match kind {
"col" => Some(Handle::Column {
name: rest.to_string(),
}),
"dist" => Some(Handle::Dist {
column: rest.to_string(),
}),
"row" => Some(Handle::Row {
row: rest.parse().ok()?,
}),
"cell" => {
let (column, row) = rest.rsplit_once(':')?;
Some(Handle::Cell {
column: column.to_string(),
row: row.parse().ok()?,
})
}
"range" => {
let mut it = rest.rsplitn(3, ':');
let end = it.next()?.parse().ok()?;
let start = it.next()?.parse().ok()?;
let column = it.next()?.to_string();
Some(Handle::Range { column, start, end })
}
_ => None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Finding {
pub detector: String,
pub class: AnomalyClass,
pub handle: Handle,
pub confidence: f64,
pub severity: Severity,
pub score: f64,
pub reason: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub col_type: Option<ColType>,
}
impl Finding {
pub fn new(
detector: impl Into<String>,
class: AnomalyClass,
handle: Handle,
confidence: f64,
score: f64,
reason: impl Into<String>,
) -> Self {
let confidence = confidence.clamp(0.0, 1.0);
Finding {
detector: detector.into(),
class,
handle,
confidence,
severity: Severity::from_confidence(confidence),
score,
reason: reason.into(),
col_type: None,
}
}
pub fn with_col_type(mut self, ty: ColType) -> Self {
self.col_type = Some(ty);
self
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn handle_roundtrips() {
let cases = [
Handle::Column {
name: "status".into(),
},
Handle::Cell {
column: "amount".into(),
row: 42,
},
Handle::Range {
column: "ts".into(),
start: 3,
end: 9,
},
Handle::Dist {
column: "score".into(),
},
Handle::Row { row: 7 },
];
for h in cases {
let s = h.canonical();
assert_eq!(Handle::parse(&s), Some(h), "roundtrip failed for {s}");
}
}
#[test]
fn handle_rejects_garbage() {
assert_eq!(Handle::parse("nope"), None);
assert_eq!(Handle::parse("cell:amount:notanumber"), None);
}
#[test]
fn class_tokens_are_exact() {
assert_eq!(AnomalyClass::Point.token(), "point");
assert_eq!(AnomalyClass::Distributional.token(), "distributional");
assert_eq!(AnomalyClass::Cadence.token(), "cadence");
let mut seen = std::collections::HashSet::new();
for c in AnomalyClass::ALL {
assert!(!c.token().is_empty());
assert!(seen.insert(c.token()), "duplicate token {}", c.token());
}
}
#[test]
fn severity_buckets_are_exact_at_boundaries() {
let cases = [
(0.96, Severity::Critical),
(0.95, Severity::Critical),
(0.90, Severity::High),
(0.85, Severity::High),
(0.70, Severity::Medium),
(0.65, Severity::Medium),
(0.50, Severity::Low),
(0.40, Severity::Low),
(0.30, Severity::Info),
(0.0, Severity::Info),
];
for (c, want) in cases {
assert_eq!(Severity::from_confidence(c), want, "confidence {c}");
}
}
#[test]
fn severity_is_monotonic_in_confidence() {
let mut prev = Severity::Info;
for c in [0.0, 0.4, 0.65, 0.85, 0.95, 1.0] {
let s = Severity::from_confidence(c);
assert!(s >= prev);
prev = s;
}
}
#[test]
fn confidence_is_clamped() {
let f = Finding::new(
"d",
AnomalyClass::Point,
Handle::Column { name: "x".into() },
5.0,
9.0,
"r",
);
assert_eq!(f.confidence, 1.0);
assert_eq!(f.severity, Severity::Critical);
}
}