use anyhow::Result;
use crate::detect::AnomalyDetector;
use crate::ingest::{Embedder, VectorCache};
const CALIBRATION_TEMPLATES: usize = 64;
const CENTROID_K: usize = 8;
const THRESHOLD_FLOOR: f32 = 0.10;
const DIM: usize = 384;
pub struct LineResult {
pub template: String,
pub score: Option<f32>,
pub is_anomaly: bool,
}
pub struct LocalPipeline {
cache: VectorCache,
detector: Option<AnomalyDetector>,
calibration_buf: Vec<f32>,
calibration_count: usize,
threshold_override: Option<f32>,
seen_templates: std::collections::HashSet<String>,
}
impl LocalPipeline {
pub fn new(embedder: Embedder, threshold_override: Option<f32>) -> Self {
Self {
cache: VectorCache::new(embedder),
detector: None,
calibration_buf: Vec::new(),
calibration_count: 0,
threshold_override,
seen_templates: std::collections::HashSet::new(),
}
}
pub fn process(&mut self, line: &str) -> Result<LineResult> {
let (parsed, vector) = self.cache.get_or_embed(line)?;
if self.detector.is_none() {
if self.seen_templates.insert(parsed.template.clone())
&& self.calibration_count < CALIBRATION_TEMPLATES
{
self.calibration_buf.extend_from_slice(&vector);
self.calibration_count += 1;
}
if self.calibration_count >= CALIBRATION_TEMPLATES {
let k = CENTROID_K.min(self.calibration_count);
self.detector = Some(AnomalyDetector::fit_auto(
&self.calibration_buf,
DIM,
k,
THRESHOLD_FLOOR,
));
}
return Ok(LineResult {
template: parsed.template,
score: None,
is_anomaly: false,
});
}
let detector = self.detector.as_ref().unwrap();
let score = detector.min_distance(&vector);
let threshold = self
.threshold_override
.unwrap_or_else(|| detector.threshold());
let is_anomaly = score > threshold;
Ok(LineResult {
template: parsed.template,
score: Some(score),
is_anomaly,
})
}
pub fn calibrated(&self) -> bool {
self.detector.is_some()
}
pub fn calibration_progress(&self) -> usize {
self.calibration_count
}
}