use crate::dict::Dict;
use crate::finding::{AnomalyClass, Finding, Severity};
use serde::Serialize;
pub const PROTOCOL: &str = "anomalyx/tq1";
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(i32)]
pub enum ExitCode {
Clean = 0,
Anomalies = 1,
Error = 2,
}
impl ExitCode {
pub fn code(self) -> i32 {
self as i32
}
}
#[derive(Debug, Clone, Serialize)]
pub struct Absence {
pub detector: String,
pub reason: String,
}
pub const FINDING_COLUMNS: [&str; 7] = [
"detector", "class", "handle", "confidence", "severity", "score", "reason", ];
#[derive(Debug, Clone, Serialize)]
pub struct Summary {
pub total: usize,
pub max_severity: Option<Severity>,
pub by_class: Vec<ClassCount>,
}
#[derive(Debug, Clone, Serialize)]
pub struct ClassCount {
pub class: AnomalyClass,
pub count: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct Scope {
#[serde(skip_serializing_if = "Option::is_none")]
pub min_severity: Option<Severity>,
#[serde(skip_serializing_if = "Option::is_none")]
pub top: Option<usize>,
pub detected: usize,
pub emitted: usize,
pub dropped: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct Envelope {
pub protocol: String,
pub config_version: String,
pub source: String,
pub format: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub baseline: Option<String>,
pub rows_scanned: usize,
pub dict: Dict,
pub columns: Vec<String>,
pub rows: Vec<Vec<serde_json::Value>>,
pub absent: Vec<Absence>,
pub summary: Summary,
#[serde(skip_serializing_if = "Option::is_none")]
pub scope: Option<Scope>,
pub exit: i32,
}
pub struct EnvelopeBuilder {
config_version: String,
source: String,
format: String,
baseline: Option<String>,
rows_scanned: usize,
findings: Vec<Finding>,
absent: Vec<Absence>,
min_severity: Option<Severity>,
top: Option<usize>,
}
impl EnvelopeBuilder {
pub fn new(
config_version: impl Into<String>,
source: impl Into<String>,
format: impl Into<String>,
rows_scanned: usize,
) -> Self {
EnvelopeBuilder {
config_version: config_version.into(),
source: source.into(),
format: format.into(),
baseline: None,
rows_scanned,
findings: Vec::new(),
absent: Vec::new(),
min_severity: None,
top: None,
}
}
pub fn min_severity(mut self, s: Severity) -> Self {
self.min_severity = Some(s);
self
}
pub fn top(mut self, n: usize) -> Self {
self.top = Some(n);
self
}
pub fn baseline(mut self, source: impl Into<String>) -> Self {
self.baseline = Some(source.into());
self
}
pub fn findings(mut self, mut findings: Vec<Finding>) -> Self {
self.findings.append(&mut findings);
self
}
pub fn absent(mut self, detector: impl Into<String>, reason: impl Into<String>) -> Self {
self.absent.push(Absence {
detector: detector.into(),
reason: reason.into(),
});
self
}
pub fn build(mut self) -> Envelope {
self.findings.sort_by(|a, b| {
b.severity
.cmp(&a.severity)
.then_with(|| a.class.token().cmp(b.class.token()))
.then_with(|| a.handle.canonical().cmp(&b.handle.canonical()))
.then_with(|| a.detector.cmp(&b.detector))
});
let detected = self.findings.len();
let max_severity = self.findings.iter().map(|f| f.severity).max();
let by_class = AnomalyClass::ALL
.iter()
.map(|&class| ClassCount {
class,
count: self.findings.iter().filter(|f| f.class == class).count(),
})
.collect();
let exit = if detected == 0 {
ExitCode::Clean
} else {
ExitCode::Anomalies
};
if let Some(min) = self.min_severity {
self.findings.retain(|f| f.severity >= min);
}
if let Some(n) = self.top {
self.findings.truncate(n);
}
let scope = if self.min_severity.is_some() || self.top.is_some() {
Some(Scope {
min_severity: self.min_severity,
top: self.top,
detected,
emitted: self.findings.len(),
dropped: detected - self.findings.len(),
})
} else {
None
};
let mut dict = Dict::new();
let mut rows = Vec::with_capacity(self.findings.len());
for f in &self.findings {
let detector = dict.intern(&f.detector);
let class = dict.intern(f.class.token());
let handle = dict.intern(&f.handle.canonical());
let severity = dict.intern(severity_token(f.severity));
let reason = dict.intern(&f.reason);
rows.push(vec![
json_u32(detector),
json_u32(class),
json_u32(handle),
json_f64(f.confidence),
json_u32(severity),
json_f64(f.score),
json_u32(reason),
]);
}
let summary = Summary {
total: detected,
max_severity,
by_class,
};
Envelope {
protocol: PROTOCOL.to_string(),
config_version: self.config_version,
source: self.source,
format: self.format,
baseline: self.baseline,
rows_scanned: self.rows_scanned,
dict,
columns: FINDING_COLUMNS.iter().map(|s| s.to_string()).collect(),
rows,
absent: self.absent,
summary,
scope,
exit: exit.code(),
}
}
}
fn severity_token(s: Severity) -> &'static str {
match s {
Severity::Info => "info",
Severity::Low => "low",
Severity::Medium => "medium",
Severity::High => "high",
Severity::Critical => "critical",
}
}
fn json_u32(v: u32) -> serde_json::Value {
serde_json::Value::from(v)
}
fn json_f64(v: f64) -> serde_json::Value {
serde_json::Number::from_f64(v)
.map(serde_json::Value::Number)
.unwrap_or(serde_json::Value::Null)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::finding::Handle;
fn finding(conf: f64, class: AnomalyClass, col: &str) -> Finding {
Finding::new(
"d",
class,
Handle::Column { name: col.into() },
conf,
conf,
"r",
)
}
#[test]
fn exit_codes_are_committed() {
assert_eq!(ExitCode::Clean.code(), 0);
assert_eq!(ExitCode::Anomalies.code(), 1);
assert_eq!(ExitCode::Error.code(), 2);
}
#[test]
fn empty_is_clean() {
let env = EnvelopeBuilder::new("v", "-", "csv", 0).build();
assert_eq!(env.exit, ExitCode::Clean.code());
assert_eq!(env.summary.total, 0);
assert_eq!(env.summary.max_severity, None);
}
#[test]
fn by_class_counts_only_matching_class() {
let env = EnvelopeBuilder::new("v", "-", "csv", 3)
.findings(vec![
finding(0.9, AnomalyClass::Point, "a"),
finding(0.9, AnomalyClass::Point, "b"),
finding(0.9, AnomalyClass::Structural, "c"),
])
.build();
let count = |class: AnomalyClass| {
env.summary
.by_class
.iter()
.find(|cc| cc.class == class)
.map(|cc| cc.count)
.unwrap()
};
assert_eq!(count(AnomalyClass::Point), 2);
assert_eq!(count(AnomalyClass::Structural), 1);
assert_eq!(count(AnomalyClass::Cadence), 0);
}
#[test]
fn no_scoping_omits_the_scope_block() {
let env = EnvelopeBuilder::new("v", "-", "csv", 2)
.findings(vec![
finding(0.9, AnomalyClass::Point, "a"),
finding(0.5, AnomalyClass::Point, "b"),
])
.build();
assert!(env.scope.is_none(), "no scoping ⇒ no scope block");
assert_eq!(env.summary.total, 2);
assert_eq!(env.rows.len(), 2, "all findings emitted");
}
#[test]
fn top_caps_emitted_but_summary_and_exit_reflect_all_detected() {
let env = EnvelopeBuilder::new("v", "-", "csv", 3)
.findings(vec![
finding(0.99, AnomalyClass::Point, "crit"), finding(0.50, AnomalyClass::Point, "lo1"), finding(0.50, AnomalyClass::Point, "lo2"), ])
.top(1)
.build();
assert_eq!(env.rows.len(), 1, "only the top finding emitted");
assert_eq!(env.summary.total, 3, "summary.total is the detected count");
assert_eq!(env.exit, ExitCode::Anomalies.code());
let scope = env.scope.unwrap();
assert_eq!(scope.top, Some(1));
assert_eq!((scope.detected, scope.emitted, scope.dropped), (3, 1, 2));
}
#[test]
fn min_severity_filters_at_or_above_the_floor() {
let env = EnvelopeBuilder::new("v", "-", "csv", 3)
.findings(vec![
finding(0.99, AnomalyClass::Point, "crit"), finding(0.86, AnomalyClass::Point, "high"), finding(0.50, AnomalyClass::Point, "low"), ])
.min_severity(Severity::High)
.build();
assert_eq!(env.rows.len(), 2);
let scope = env.scope.unwrap();
assert_eq!(scope.min_severity, Some(Severity::High));
assert_eq!((scope.detected, scope.emitted, scope.dropped), (3, 2, 1));
}
#[test]
fn scoping_to_zero_findings_still_exits_anomalies() {
let env = EnvelopeBuilder::new("v", "-", "csv", 2)
.findings(vec![
finding(0.50, AnomalyClass::Point, "a"), finding(0.50, AnomalyClass::Point, "b"), ])
.min_severity(Severity::Critical)
.build();
assert_eq!(env.rows.len(), 0, "nothing meets the critical floor");
assert_eq!(
env.exit,
ExitCode::Anomalies.code(),
"but anomalies WERE found"
);
assert_eq!(env.summary.total, 2);
assert_eq!(env.summary.max_severity, Some(Severity::Low));
let scope = env.scope.unwrap();
assert_eq!((scope.detected, scope.emitted, scope.dropped), (2, 0, 2));
}
#[test]
fn row_encodes_confidence_and_score_as_numbers() {
let env = EnvelopeBuilder::new("v", "-", "csv", 1)
.findings(vec![finding(0.77, AnomalyClass::Point, "a")])
.build();
assert_eq!(env.rows[0][3].as_f64(), Some(0.77));
assert_eq!(env.rows[0][5].as_f64(), Some(0.77));
}
#[test]
fn findings_set_anomalies_exit_and_max_severity() {
let env = EnvelopeBuilder::new("v", "-", "csv", 3)
.findings(vec![
finding(0.99, AnomalyClass::Point, "a"),
finding(0.50, AnomalyClass::Structural, "b"),
])
.build();
assert_eq!(env.exit, ExitCode::Anomalies.code());
assert_eq!(env.summary.total, 2);
assert_eq!(env.summary.max_severity, Some(Severity::Critical));
assert_eq!(env.columns.len(), FINDING_COLUMNS.len());
let first_sev_idx = env.rows[0][4].as_u64().unwrap() as u32;
assert_eq!(env.dict.get(first_sev_idx), Some("critical"));
}
#[test]
fn build_is_order_independent() {
let a = EnvelopeBuilder::new("v", "-", "csv", 2)
.findings(vec![
finding(0.9, AnomalyClass::Point, "a"),
finding(0.5, AnomalyClass::Point, "b"),
])
.build();
let b = EnvelopeBuilder::new("v", "-", "csv", 2)
.findings(vec![
finding(0.5, AnomalyClass::Point, "b"),
finding(0.9, AnomalyClass::Point, "a"),
])
.build();
assert_eq!(
serde_json::to_string(&a).unwrap(),
serde_json::to_string(&b).unwrap()
);
}
}