use crate::config::DetectConfig;
use crate::{robustz, Detector, Report, ScanContext};
use ax_core::finding::Handle;
use ax_core::{AnomalyClass, Column, Finding, Value};
#[derive(Debug, Default, Clone)]
pub struct PointDetector;
impl Detector for PointDetector {
fn id(&self) -> &'static str {
"point.modz"
}
fn class(&self) -> AnomalyClass {
AnomalyClass::Point
}
fn detect(&self, ctx: &ScanContext, cfg: &DetectConfig, out: &mut Report) {
let mut applicable = 0usize;
for col in &ctx.current.columns {
if !col.ty.is_numeric() {
continue;
}
let xs = col.numeric();
if xs.len() < cfg.point_min_n {
continue;
}
applicable += 1;
self.scan_column(col, &xs, cfg, out);
}
if applicable == 0 {
out.mark_absent(
self.id(),
format!(
"no numeric column with at least {} finite values",
cfg.point_min_n
),
);
}
}
}
impl PointDetector {
fn scan_column(&self, col: &Column, xs: &[f64], cfg: &DetectConfig, out: &mut Report) {
let Some((center, scale, k)) = robustz::center_scale(xs) else {
return;
};
for (row, cell) in col.cells.iter().enumerate() {
let Some(x) = numeric_cell(cell) else {
continue;
};
let modz = robustz::score(x, center, scale, k);
if modz <= cfg.point_threshold {
continue;
}
let confidence = robustz::confidence(modz, cfg.point_threshold);
let reason = format!(
"{} = {:.6}: modified z-score {:.3} exceeds {:.3} (center={:.6}, scale={:.6})",
col.name, x, modz, cfg.point_threshold, center, scale
);
out.push(
Finding::new(
self.id(),
AnomalyClass::Point,
Handle::Cell {
column: col.name.clone(),
row,
},
confidence,
modz,
reason,
)
.with_col_type(col.ty),
);
}
}
}
fn numeric_cell(v: &Value) -> Option<f64> {
v.as_f64().filter(|x| x.is_finite())
}
#[cfg(test)]
mod tests {
use super::*;
use proptest::prelude::*;
fn col(name: &str, xs: &[f64]) -> Column {
Column::new(name, xs.iter().map(|&x| Value::Float(x)).collect())
}
fn run(xs: &[f64]) -> Report {
let rs = ax_core::RecordSet::new("-", "test", vec![col("x", xs)]);
let mut out = Report::new();
PointDetector.detect(
&ScanContext::single(&rs),
&DetectConfig::default(),
&mut out,
);
out
}
fn flagged_values(xs: &[f64]) -> Vec<u64> {
let report = run(xs);
let mut v: Vec<u64> = report
.findings
.iter()
.map(|f| match &f.handle {
Handle::Cell { row, .. } => xs[*row].to_bits(),
_ => unreachable!("point detector emits cell handles"),
})
.collect();
v.sort_unstable();
v
}
#[test]
fn obvious_outlier_is_flagged() {
let mut xs = vec![10.0; 30];
xs.push(1000.0);
let report = run(&xs);
assert_eq!(report.findings.len(), 1);
assert!(matches!(
report.findings[0].handle,
Handle::Cell { row: 30, .. }
));
assert!(report.findings[0].confidence > 0.5);
}
#[test]
fn constant_column_has_no_findings() {
let report = run(&[7.0; 20]);
assert!(report.is_clean());
assert!(report.absent.is_empty());
}
#[test]
fn non_numeric_corpus_marks_absent() {
let rs = ax_core::RecordSet::new(
"-",
"test",
vec![Column::new(
"name",
(0..20).map(|i| Value::Str(format!("u{i}"))).collect(),
)],
);
let mut out = Report::new();
PointDetector.detect(
&ScanContext::single(&rs),
&DetectConfig::default(),
&mut out,
);
assert!(out.is_clean());
assert_eq!(out.absent.len(), 1);
assert_eq!(out.absent[0].detector, "point.modz");
}
#[test]
fn too_few_values_marks_absent() {
let report = run(&[1.0, 2.0, 100.0]); assert!(report.is_clean());
assert_eq!(report.absent.len(), 1);
}
#[test]
fn exactly_min_n_values_is_assessed() {
let report = run(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0]);
assert_eq!(
report.findings.len(),
1,
"the 100.0 outlier must be flagged"
);
assert!(report.absent.is_empty(), "8 values is enough to assess");
}
#[test]
fn robust_path_catches_what_sigma_path_misses() {
let report = run(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1000.0]);
assert_eq!(report.findings.len(), 1);
assert!(matches!(
report.findings[0].handle,
Handle::Cell { row: 8, .. }
));
assert!(
report.findings[0].score > 100.0,
"MAD-scaled score is large"
);
}
proptest! {
#[test]
fn shift_invariant(shift in -1e6f64..1e6, base in 1.0f64..5.0) {
let mut xs: Vec<f64> = (0..40).map(|i| base + (i % 5) as f64 * 0.01).collect();
xs.push(base + 500.0); let original = flagged_values(&xs);
let shifted: Vec<f64> = xs.iter().map(|x| x + shift).collect();
let report = run(&shifted);
let mut rows: Vec<usize> = report.findings.iter().map(|f| match &f.handle {
Handle::Cell { row, .. } => *row,
_ => unreachable!(),
}).collect();
rows.sort_unstable();
let mut orig_rows: Vec<usize> = run(&xs).findings.iter().map(|f| match &f.handle {
Handle::Cell { row, .. } => *row,
_ => unreachable!(),
}).collect();
orig_rows.sort_unstable();
prop_assert_eq!(rows, orig_rows);
let _ = original;
}
#[test]
fn scale_invariant(scale in 0.001f64..1000.0) {
let mut xs: Vec<f64> = (0..40).map(|i| 100.0 + (i % 7) as f64).collect();
xs.push(100_000.0); let base_rows = flagged_rows(&xs);
let scaled: Vec<f64> = xs.iter().map(|x| x * scale).collect();
prop_assert_eq!(flagged_rows(&scaled), base_rows);
}
#[test]
fn deterministic(seed in 0u64..1000) {
let xs: Vec<f64> = (0..50).map(|i| ((i as u64).wrapping_mul(seed) % 97) as f64).collect();
let a = run(&xs);
let b = run(&xs);
prop_assert_eq!(
serde_json::to_string(&a.findings).unwrap(),
serde_json::to_string(&b.findings).unwrap()
);
}
#[test]
fn permutation_invariant_values(rot in 1usize..39) {
let mut xs: Vec<f64> = (0..40).map(|i| 50.0 + (i % 3) as f64).collect();
xs.push(9999.0);
let base = flagged_values(&xs);
xs.rotate_left(rot);
prop_assert_eq!(flagged_values(&xs), base);
}
}
fn flagged_rows(xs: &[f64]) -> Vec<usize> {
let mut rows: Vec<usize> = run(xs)
.findings
.iter()
.map(|f| match &f.handle {
Handle::Cell { row, .. } => *row,
_ => unreachable!(),
})
.collect();
rows.sort_unstable();
rows
}
}