1use crate::dict::Dict;
10use crate::finding::{AnomalyClass, Finding, Severity};
11use crate::roles::ColumnRole;
12use serde::Serialize;
13
14pub const PROTOCOL: &str = "anomalyx/tq1";
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20#[repr(i32)]
21pub enum ExitCode {
22 Clean = 0,
24 Anomalies = 1,
26 Error = 2,
28}
29
30impl ExitCode {
31 pub fn code(self) -> i32 {
32 self as i32
33 }
34}
35
36#[derive(Debug, Clone, Serialize)]
40pub struct Absence {
41 pub detector: String,
42 pub reason: String,
43}
44
45pub const FINDING_COLUMNS: [&str; 7] = [
48 "detector", "class", "handle", "confidence", "severity", "score", "reason", ];
56
57#[derive(Debug, Clone, Serialize)]
59pub struct Summary {
60 pub total: usize,
61 pub max_severity: Option<Severity>,
62 pub by_class: Vec<ClassCount>,
64}
65
66#[derive(Debug, Clone, Serialize)]
67pub struct ClassCount {
68 pub class: AnomalyClass,
69 pub count: usize,
70}
71
72#[derive(Debug, Clone, Serialize)]
78pub struct Scope {
79 #[serde(skip_serializing_if = "Option::is_none")]
81 pub min_severity: Option<Severity>,
82 #[serde(skip_serializing_if = "Option::is_none")]
84 pub top: Option<usize>,
85 pub detected: usize,
87 pub emitted: usize,
89 pub dropped: usize,
91}
92
93#[derive(Debug, Clone, Serialize)]
95pub struct Envelope {
96 pub protocol: String,
98 pub config_version: String,
100 pub source: String,
101 pub format: String,
102 #[serde(skip_serializing_if = "Option::is_none")]
105 pub baseline: Option<String>,
106 pub rows_scanned: usize,
107 pub dict: Dict,
109 pub columns: Vec<String>,
111 pub rows: Vec<Vec<serde_json::Value>>,
113 pub absent: Vec<Absence>,
115 pub roles: Vec<ColumnRole>,
119 pub summary: Summary,
120 #[serde(skip_serializing_if = "Option::is_none")]
123 pub scope: Option<Scope>,
124 pub exit: i32,
126}
127
128pub struct EnvelopeBuilder {
130 config_version: String,
131 source: String,
132 format: String,
133 baseline: Option<String>,
134 rows_scanned: usize,
135 findings: Vec<Finding>,
136 absent: Vec<Absence>,
137 roles: Vec<ColumnRole>,
138 min_severity: Option<Severity>,
139 top: Option<usize>,
140}
141
142impl EnvelopeBuilder {
143 pub fn new(
144 config_version: impl Into<String>,
145 source: impl Into<String>,
146 format: impl Into<String>,
147 rows_scanned: usize,
148 ) -> Self {
149 EnvelopeBuilder {
150 config_version: config_version.into(),
151 source: source.into(),
152 format: format.into(),
153 baseline: None,
154 rows_scanned,
155 findings: Vec::new(),
156 absent: Vec::new(),
157 roles: Vec::new(),
158 min_severity: None,
159 top: None,
160 }
161 }
162
163 pub fn roles(mut self, roles: Vec<ColumnRole>) -> Self {
165 self.roles = roles;
166 self
167 }
168
169 pub fn min_severity(mut self, s: Severity) -> Self {
172 self.min_severity = Some(s);
173 self
174 }
175
176 pub fn top(mut self, n: usize) -> Self {
179 self.top = Some(n);
180 self
181 }
182
183 pub fn baseline(mut self, source: impl Into<String>) -> Self {
185 self.baseline = Some(source.into());
186 self
187 }
188
189 pub fn findings(mut self, mut findings: Vec<Finding>) -> Self {
190 self.findings.append(&mut findings);
191 self
192 }
193
194 pub fn absent(mut self, detector: impl Into<String>, reason: impl Into<String>) -> Self {
195 self.absent.push(Absence {
196 detector: detector.into(),
197 reason: reason.into(),
198 });
199 self
200 }
201
202 pub fn build(mut self) -> Envelope {
206 self.findings.sort_by(|a, b| {
207 b.severity
208 .cmp(&a.severity)
209 .then_with(|| a.class.token().cmp(b.class.token()))
210 .then_with(|| a.handle.canonical().cmp(&b.handle.canonical()))
211 .then_with(|| a.detector.cmp(&b.detector))
212 });
213
214 let detected = self.findings.len();
218 let max_severity = self.findings.iter().map(|f| f.severity).max();
219 let by_class = AnomalyClass::ALL
220 .iter()
221 .map(|&class| ClassCount {
222 class,
223 count: self.findings.iter().filter(|f| f.class == class).count(),
224 })
225 .collect();
226 let exit = if detected == 0 {
227 ExitCode::Clean
228 } else {
229 ExitCode::Anomalies
230 };
231
232 if let Some(min) = self.min_severity {
235 self.findings.retain(|f| f.severity >= min);
236 }
237 if let Some(n) = self.top {
238 self.findings.truncate(n);
239 }
240 let scope = if self.min_severity.is_some() || self.top.is_some() {
241 Some(Scope {
242 min_severity: self.min_severity,
243 top: self.top,
244 detected,
245 emitted: self.findings.len(),
246 dropped: detected - self.findings.len(),
247 })
248 } else {
249 None
250 };
251
252 let mut dict = Dict::new();
253 let mut rows = Vec::with_capacity(self.findings.len());
254 for f in &self.findings {
255 let detector = dict.intern(&f.detector);
256 let class = dict.intern(f.class.token());
257 let handle = dict.intern(&f.handle.canonical());
258 let severity = dict.intern(severity_token(f.severity));
259 let reason = dict.intern(&f.reason);
260 rows.push(vec![
261 json_u32(detector),
262 json_u32(class),
263 json_u32(handle),
264 json_f64(f.confidence),
265 json_u32(severity),
266 json_f64(f.score),
267 json_u32(reason),
268 ]);
269 }
270
271 let summary = Summary {
272 total: detected,
273 max_severity,
274 by_class,
275 };
276
277 Envelope {
278 protocol: PROTOCOL.to_string(),
279 config_version: self.config_version,
280 source: self.source,
281 format: self.format,
282 baseline: self.baseline,
283 rows_scanned: self.rows_scanned,
284 dict,
285 columns: FINDING_COLUMNS.iter().map(|s| s.to_string()).collect(),
286 rows,
287 absent: self.absent,
288 roles: self.roles,
289 summary,
290 scope,
291 exit: exit.code(),
292 }
293 }
294}
295
296fn severity_token(s: Severity) -> &'static str {
297 match s {
298 Severity::Info => "info",
299 Severity::Low => "low",
300 Severity::Medium => "medium",
301 Severity::High => "high",
302 Severity::Critical => "critical",
303 }
304}
305
306fn json_u32(v: u32) -> serde_json::Value {
307 serde_json::Value::from(v)
308}
309
310fn json_f64(v: f64) -> serde_json::Value {
311 serde_json::Number::from_f64(v)
312 .map(serde_json::Value::Number)
313 .unwrap_or(serde_json::Value::Null)
314}
315
316#[cfg(test)]
317mod tests {
318 use super::*;
319 use crate::finding::Handle;
320
321 fn finding(conf: f64, class: AnomalyClass, col: &str) -> Finding {
322 Finding::new(
323 "d",
324 class,
325 Handle::Column { name: col.into() },
326 conf,
327 conf,
328 "r",
329 )
330 }
331
332 #[test]
333 fn exit_codes_are_committed() {
334 assert_eq!(ExitCode::Clean.code(), 0);
335 assert_eq!(ExitCode::Anomalies.code(), 1);
336 assert_eq!(ExitCode::Error.code(), 2);
337 }
338
339 #[test]
340 fn empty_is_clean() {
341 let env = EnvelopeBuilder::new("v", "-", "csv", 0).build();
342 assert_eq!(env.exit, ExitCode::Clean.code());
343 assert_eq!(env.summary.total, 0);
344 assert_eq!(env.summary.max_severity, None);
345 }
346
347 #[test]
348 fn by_class_counts_only_matching_class() {
349 let env = EnvelopeBuilder::new("v", "-", "csv", 3)
350 .findings(vec![
351 finding(0.9, AnomalyClass::Point, "a"),
352 finding(0.9, AnomalyClass::Point, "b"),
353 finding(0.9, AnomalyClass::Structural, "c"),
354 ])
355 .build();
356 let count = |class: AnomalyClass| {
357 env.summary
358 .by_class
359 .iter()
360 .find(|cc| cc.class == class)
361 .map(|cc| cc.count)
362 .unwrap()
363 };
364 assert_eq!(count(AnomalyClass::Point), 2);
365 assert_eq!(count(AnomalyClass::Structural), 1);
366 assert_eq!(count(AnomalyClass::Cadence), 0);
367 }
368
369 #[test]
370 fn no_scoping_omits_the_scope_block() {
371 let env = EnvelopeBuilder::new("v", "-", "csv", 2)
372 .findings(vec![
373 finding(0.9, AnomalyClass::Point, "a"),
374 finding(0.5, AnomalyClass::Point, "b"),
375 ])
376 .build();
377 assert!(env.scope.is_none(), "no scoping ⇒ no scope block");
378 assert_eq!(env.summary.total, 2);
379 assert_eq!(env.rows.len(), 2, "all findings emitted");
380 }
381
382 #[test]
383 fn top_caps_emitted_but_summary_and_exit_reflect_all_detected() {
384 let env = EnvelopeBuilder::new("v", "-", "csv", 3)
387 .findings(vec![
388 finding(0.99, AnomalyClass::Point, "crit"), finding(0.50, AnomalyClass::Point, "lo1"), finding(0.50, AnomalyClass::Point, "lo2"), ])
392 .top(1)
393 .build();
394 assert_eq!(env.rows.len(), 1, "only the top finding emitted");
395 assert_eq!(env.summary.total, 3, "summary.total is the detected count");
396 assert_eq!(env.exit, ExitCode::Anomalies.code());
397 let scope = env.scope.unwrap();
398 assert_eq!(scope.top, Some(1));
399 assert_eq!((scope.detected, scope.emitted, scope.dropped), (3, 1, 2));
400 }
401
402 #[test]
403 fn min_severity_filters_at_or_above_the_floor() {
404 let env = EnvelopeBuilder::new("v", "-", "csv", 3)
405 .findings(vec![
406 finding(0.99, AnomalyClass::Point, "crit"), finding(0.86, AnomalyClass::Point, "high"), finding(0.50, AnomalyClass::Point, "low"), ])
410 .min_severity(Severity::High)
411 .build();
412 assert_eq!(env.rows.len(), 2);
414 let scope = env.scope.unwrap();
415 assert_eq!(scope.min_severity, Some(Severity::High));
416 assert_eq!((scope.detected, scope.emitted, scope.dropped), (3, 2, 1));
417 }
418
419 #[test]
420 fn scoping_to_zero_findings_still_exits_anomalies() {
421 let env = EnvelopeBuilder::new("v", "-", "csv", 2)
425 .findings(vec![
426 finding(0.50, AnomalyClass::Point, "a"), finding(0.50, AnomalyClass::Point, "b"), ])
429 .min_severity(Severity::Critical)
430 .build();
431 assert_eq!(env.rows.len(), 0, "nothing meets the critical floor");
432 assert_eq!(
433 env.exit,
434 ExitCode::Anomalies.code(),
435 "but anomalies WERE found"
436 );
437 assert_eq!(env.summary.total, 2);
438 assert_eq!(env.summary.max_severity, Some(Severity::Low));
439 let scope = env.scope.unwrap();
440 assert_eq!((scope.detected, scope.emitted, scope.dropped), (2, 0, 2));
441 }
442
443 #[test]
444 fn row_encodes_confidence_and_score_as_numbers() {
445 let env = EnvelopeBuilder::new("v", "-", "csv", 1)
446 .findings(vec![finding(0.77, AnomalyClass::Point, "a")])
447 .build();
448 assert_eq!(env.rows[0][3].as_f64(), Some(0.77));
450 assert_eq!(env.rows[0][5].as_f64(), Some(0.77));
451 }
452
453 #[test]
454 fn findings_set_anomalies_exit_and_max_severity() {
455 let env = EnvelopeBuilder::new("v", "-", "csv", 3)
456 .findings(vec![
457 finding(0.99, AnomalyClass::Point, "a"),
458 finding(0.50, AnomalyClass::Structural, "b"),
459 ])
460 .build();
461 assert_eq!(env.exit, ExitCode::Anomalies.code());
462 assert_eq!(env.summary.total, 2);
463 assert_eq!(env.summary.max_severity, Some(Severity::Critical));
464 assert_eq!(env.columns.len(), FINDING_COLUMNS.len());
465 let first_sev_idx = env.rows[0][4].as_u64().unwrap() as u32;
467 assert_eq!(env.dict.get(first_sev_idx), Some("critical"));
468 }
469
470 #[test]
471 fn build_is_order_independent() {
472 let a = EnvelopeBuilder::new("v", "-", "csv", 2)
473 .findings(vec![
474 finding(0.9, AnomalyClass::Point, "a"),
475 finding(0.5, AnomalyClass::Point, "b"),
476 ])
477 .build();
478 let b = EnvelopeBuilder::new("v", "-", "csv", 2)
479 .findings(vec![
480 finding(0.5, AnomalyClass::Point, "b"),
481 finding(0.9, AnomalyClass::Point, "a"),
482 ])
483 .build();
484 assert_eq!(
485 serde_json::to_string(&a).unwrap(),
486 serde_json::to_string(&b).unwrap()
487 );
488 }
489}