Skip to main content

anomalyx_core/
envelope.rs

1//! The `tq1` output envelope — the wire contract.
2//!
3//! This is the article's "typed, dense output (not pretty text)": a versioned
4//! JSON envelope with a dictionary-pinned string table, an explicit column
5//! ordering for the dense finding rows, honest `absent` entries for detectors
6//! that could not run, and a committed exit code. Changing any field here is an
7//! API change and must break a contract test.
8
9use crate::dict::Dict;
10use crate::finding::{AnomalyClass, Finding, Severity};
11use crate::roles::ColumnRole;
12use serde::Serialize;
13
14/// Protocol identifier. Bump on any breaking change to the envelope shape.
15pub const PROTOCOL: &str = "anomalyx/tq1";
16
17/// Committed process exit codes. These are part of the contract: weakening them
18/// must break a test.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20#[repr(i32)]
21pub enum ExitCode {
22    /// No anomalies found.
23    Clean = 0,
24    /// Anomalies found.
25    Anomalies = 1,
26    /// The tool could not complete (bad input, unresolved handle, …).
27    Error = 2,
28}
29
30impl ExitCode {
31    pub fn code(self) -> i32 {
32        self as i32
33    }
34}
35
36/// A detector that declined to run, with a machine-readable reason. Recorded so
37/// absence is *explicit* — an unavailable detector contributes nothing and says
38/// so, rather than implying the data looked fine.
39#[derive(Debug, Clone, Serialize)]
40pub struct Absence {
41    pub detector: String,
42    pub reason: String,
43}
44
45/// The fixed column order of a dense finding row. Each row in
46/// [`Envelope::rows`] is an array whose entries align to these names.
47pub const FINDING_COLUMNS: [&str; 7] = [
48    "detector",   // dict index
49    "class",      // dict index
50    "handle",     // dict index (canonical handle string)
51    "confidence", // float
52    "severity",   // dict index
53    "score",      // float
54    "reason",     // dict index
55];
56
57/// Per-class and overall counts, for the compact summary an agent reads first.
58#[derive(Debug, Clone, Serialize)]
59pub struct Summary {
60    pub total: usize,
61    pub max_severity: Option<Severity>,
62    /// Counts keyed by class token, in [`AnomalyClass::ALL`] order.
63    pub by_class: Vec<ClassCount>,
64}
65
66#[derive(Debug, Clone, Serialize)]
67pub struct ClassCount {
68    pub class: AnomalyClass,
69    pub count: usize,
70}
71
72/// Records output scoping (`--top` / `--min-severity`) when it is applied, so a
73/// truncated finding list is never silently mistaken for the whole story. The
74/// `summary`, `max_severity`, and `exit` always describe *everything detected*;
75/// `rows` carries only the `emitted` subset, and this block says how many were
76/// withheld and why.
77#[derive(Debug, Clone, Serialize)]
78pub struct Scope {
79    /// Minimum severity retained, if `--min-severity` was set.
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub min_severity: Option<Severity>,
82    /// Cap on emitted findings, if `--top` was set.
83    #[serde(skip_serializing_if = "Option::is_none")]
84    pub top: Option<usize>,
85    /// Findings detected in total (before scoping).
86    pub detected: usize,
87    /// Findings emitted in `rows` (after scoping).
88    pub emitted: usize,
89    /// Findings withheld from `rows` (`detected − emitted`).
90    pub dropped: usize,
91}
92
93/// The full envelope. Build it with [`EnvelopeBuilder`].
94#[derive(Debug, Clone, Serialize)]
95pub struct Envelope {
96    /// Protocol id, e.g. `"anomalyx/tq1"`.
97    pub protocol: String,
98    /// Config/version fingerprint. Same inputs + same fingerprint ⇒ same bytes.
99    pub config_version: String,
100    pub source: String,
101    pub format: String,
102    /// Source of the baseline corpus when scanning in compare mode; absent for
103    /// a single-corpus scan.
104    #[serde(skip_serializing_if = "Option::is_none")]
105    pub baseline: Option<String>,
106    pub rows_scanned: usize,
107    /// Dictionary-pinned string table; all `*_idx` values index into this.
108    pub dict: Dict,
109    /// Names for the dense row columns (always [`FINDING_COLUMNS`]).
110    pub columns: Vec<String>,
111    /// Dense finding rows — arrays aligned to `columns`.
112    pub rows: Vec<Vec<serde_json::Value>>,
113    /// Detectors that could not run (honest absence).
114    pub absent: Vec<Absence>,
115    /// The classified [`Role`](crate::Role) of each scanned column. Always
116    /// reported (transparent): detectors may skip columns by role, and an agent
117    /// can see and override that. Empty only for a column-less corpus.
118    pub roles: Vec<ColumnRole>,
119    pub summary: Summary,
120    /// Output scoping applied to `rows`, present only when `--top`/`--min-severity`
121    /// withheld findings. Absent ⇒ `rows` is the complete detected set.
122    #[serde(skip_serializing_if = "Option::is_none")]
123    pub scope: Option<Scope>,
124    /// Committed exit code as an integer, mirrored into the envelope.
125    pub exit: i32,
126}
127
128/// Assembles an [`Envelope`] from findings, interning strings deterministically.
129pub struct EnvelopeBuilder {
130    config_version: String,
131    source: String,
132    format: String,
133    baseline: Option<String>,
134    rows_scanned: usize,
135    findings: Vec<Finding>,
136    absent: Vec<Absence>,
137    roles: Vec<ColumnRole>,
138    min_severity: Option<Severity>,
139    top: Option<usize>,
140}
141
142impl EnvelopeBuilder {
143    pub fn new(
144        config_version: impl Into<String>,
145        source: impl Into<String>,
146        format: impl Into<String>,
147        rows_scanned: usize,
148    ) -> Self {
149        EnvelopeBuilder {
150            config_version: config_version.into(),
151            source: source.into(),
152            format: format.into(),
153            baseline: None,
154            rows_scanned,
155            findings: Vec::new(),
156            absent: Vec::new(),
157            roles: Vec::new(),
158            min_severity: None,
159            top: None,
160        }
161    }
162
163    /// Records the classified role of each scanned column (transparency).
164    pub fn roles(mut self, roles: Vec<ColumnRole>) -> Self {
165        self.roles = roles;
166        self
167    }
168
169    /// Restricts emitted findings to those at or above severity `s` (the full
170    /// detected set still drives `summary`/`exit`). Output scoping, not detection.
171    pub fn min_severity(mut self, s: Severity) -> Self {
172        self.min_severity = Some(s);
173        self
174    }
175
176    /// Caps emitted findings to the `n` most severe (findings are sorted
177    /// severity-first, so this keeps the worst). Output scoping, not detection.
178    pub fn top(mut self, n: usize) -> Self {
179        self.top = Some(n);
180        self
181    }
182
183    /// Records the baseline source for a compare-mode scan.
184    pub fn baseline(mut self, source: impl Into<String>) -> Self {
185        self.baseline = Some(source.into());
186        self
187    }
188
189    pub fn findings(mut self, mut findings: Vec<Finding>) -> Self {
190        self.findings.append(&mut findings);
191        self
192    }
193
194    pub fn absent(mut self, detector: impl Into<String>, reason: impl Into<String>) -> Self {
195        self.absent.push(Absence {
196            detector: detector.into(),
197            reason: reason.into(),
198        });
199        self
200    }
201
202    /// Finalizes the envelope. Findings are sorted into a deterministic order
203    /// (severity desc, then class, then handle, then detector) so the output is
204    /// stable regardless of the order detectors ran or emitted in.
205    pub fn build(mut self) -> Envelope {
206        self.findings.sort_by(|a, b| {
207            b.severity
208                .cmp(&a.severity)
209                .then_with(|| a.class.token().cmp(b.class.token()))
210                .then_with(|| a.handle.canonical().cmp(&b.handle.canonical()))
211                .then_with(|| a.detector.cmp(&b.detector))
212        });
213
214        // `summary`, `max_severity`, and `exit` describe everything *detected*,
215        // so output scoping can never make anomalies look absent (or flip the
216        // exit code). Compute them before any scoping filter.
217        let detected = self.findings.len();
218        let max_severity = self.findings.iter().map(|f| f.severity).max();
219        let by_class = AnomalyClass::ALL
220            .iter()
221            .map(|&class| ClassCount {
222                class,
223                count: self.findings.iter().filter(|f| f.class == class).count(),
224            })
225            .collect();
226        let exit = if detected == 0 {
227            ExitCode::Clean
228        } else {
229            ExitCode::Anomalies
230        };
231
232        // Apply output scoping to the (already severity-sorted) findings. The
233        // most-severe survive `--top`; `--min-severity` keeps the floor and up.
234        if let Some(min) = self.min_severity {
235            self.findings.retain(|f| f.severity >= min);
236        }
237        if let Some(n) = self.top {
238            self.findings.truncate(n);
239        }
240        let scope = if self.min_severity.is_some() || self.top.is_some() {
241            Some(Scope {
242                min_severity: self.min_severity,
243                top: self.top,
244                detected,
245                emitted: self.findings.len(),
246                dropped: detected - self.findings.len(),
247            })
248        } else {
249            None
250        };
251
252        let mut dict = Dict::new();
253        let mut rows = Vec::with_capacity(self.findings.len());
254        for f in &self.findings {
255            let detector = dict.intern(&f.detector);
256            let class = dict.intern(f.class.token());
257            let handle = dict.intern(&f.handle.canonical());
258            let severity = dict.intern(severity_token(f.severity));
259            let reason = dict.intern(&f.reason);
260            rows.push(vec![
261                json_u32(detector),
262                json_u32(class),
263                json_u32(handle),
264                json_f64(f.confidence),
265                json_u32(severity),
266                json_f64(f.score),
267                json_u32(reason),
268            ]);
269        }
270
271        let summary = Summary {
272            total: detected,
273            max_severity,
274            by_class,
275        };
276
277        Envelope {
278            protocol: PROTOCOL.to_string(),
279            config_version: self.config_version,
280            source: self.source,
281            format: self.format,
282            baseline: self.baseline,
283            rows_scanned: self.rows_scanned,
284            dict,
285            columns: FINDING_COLUMNS.iter().map(|s| s.to_string()).collect(),
286            rows,
287            absent: self.absent,
288            roles: self.roles,
289            summary,
290            scope,
291            exit: exit.code(),
292        }
293    }
294}
295
296fn severity_token(s: Severity) -> &'static str {
297    match s {
298        Severity::Info => "info",
299        Severity::Low => "low",
300        Severity::Medium => "medium",
301        Severity::High => "high",
302        Severity::Critical => "critical",
303    }
304}
305
306fn json_u32(v: u32) -> serde_json::Value {
307    serde_json::Value::from(v)
308}
309
310fn json_f64(v: f64) -> serde_json::Value {
311    serde_json::Number::from_f64(v)
312        .map(serde_json::Value::Number)
313        .unwrap_or(serde_json::Value::Null)
314}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319    use crate::finding::Handle;
320
321    fn finding(conf: f64, class: AnomalyClass, col: &str) -> Finding {
322        Finding::new(
323            "d",
324            class,
325            Handle::Column { name: col.into() },
326            conf,
327            conf,
328            "r",
329        )
330    }
331
332    #[test]
333    fn exit_codes_are_committed() {
334        assert_eq!(ExitCode::Clean.code(), 0);
335        assert_eq!(ExitCode::Anomalies.code(), 1);
336        assert_eq!(ExitCode::Error.code(), 2);
337    }
338
339    #[test]
340    fn empty_is_clean() {
341        let env = EnvelopeBuilder::new("v", "-", "csv", 0).build();
342        assert_eq!(env.exit, ExitCode::Clean.code());
343        assert_eq!(env.summary.total, 0);
344        assert_eq!(env.summary.max_severity, None);
345    }
346
347    #[test]
348    fn by_class_counts_only_matching_class() {
349        let env = EnvelopeBuilder::new("v", "-", "csv", 3)
350            .findings(vec![
351                finding(0.9, AnomalyClass::Point, "a"),
352                finding(0.9, AnomalyClass::Point, "b"),
353                finding(0.9, AnomalyClass::Structural, "c"),
354            ])
355            .build();
356        let count = |class: AnomalyClass| {
357            env.summary
358                .by_class
359                .iter()
360                .find(|cc| cc.class == class)
361                .map(|cc| cc.count)
362                .unwrap()
363        };
364        assert_eq!(count(AnomalyClass::Point), 2);
365        assert_eq!(count(AnomalyClass::Structural), 1);
366        assert_eq!(count(AnomalyClass::Cadence), 0);
367    }
368
369    #[test]
370    fn no_scoping_omits_the_scope_block() {
371        let env = EnvelopeBuilder::new("v", "-", "csv", 2)
372            .findings(vec![
373                finding(0.9, AnomalyClass::Point, "a"),
374                finding(0.5, AnomalyClass::Point, "b"),
375            ])
376            .build();
377        assert!(env.scope.is_none(), "no scoping ⇒ no scope block");
378        assert_eq!(env.summary.total, 2);
379        assert_eq!(env.rows.len(), 2, "all findings emitted");
380    }
381
382    #[test]
383    fn top_caps_emitted_but_summary_and_exit_reflect_all_detected() {
384        // Three findings, keep the single most severe. summary/exit still report
385        // the full detected reality; scope records the truncation.
386        let env = EnvelopeBuilder::new("v", "-", "csv", 3)
387            .findings(vec![
388                finding(0.99, AnomalyClass::Point, "crit"), // Critical
389                finding(0.50, AnomalyClass::Point, "lo1"),  // Low
390                finding(0.50, AnomalyClass::Point, "lo2"),  // Low
391            ])
392            .top(1)
393            .build();
394        assert_eq!(env.rows.len(), 1, "only the top finding emitted");
395        assert_eq!(env.summary.total, 3, "summary.total is the detected count");
396        assert_eq!(env.exit, ExitCode::Anomalies.code());
397        let scope = env.scope.unwrap();
398        assert_eq!(scope.top, Some(1));
399        assert_eq!((scope.detected, scope.emitted, scope.dropped), (3, 1, 2));
400    }
401
402    #[test]
403    fn min_severity_filters_at_or_above_the_floor() {
404        let env = EnvelopeBuilder::new("v", "-", "csv", 3)
405            .findings(vec![
406                finding(0.99, AnomalyClass::Point, "crit"), // Critical
407                finding(0.86, AnomalyClass::Point, "high"), // High
408                finding(0.50, AnomalyClass::Point, "low"),  // Low
409            ])
410            .min_severity(Severity::High)
411            .build();
412        // Critical and High survive (>= High); Low is dropped.
413        assert_eq!(env.rows.len(), 2);
414        let scope = env.scope.unwrap();
415        assert_eq!(scope.min_severity, Some(Severity::High));
416        assert_eq!((scope.detected, scope.emitted, scope.dropped), (3, 2, 1));
417    }
418
419    #[test]
420    fn scoping_to_zero_findings_still_exits_anomalies() {
421        // The honesty guarantee: filtering every finding out of view must not
422        // make anomalies look absent — exit stays 1 and the scope block shows
423        // that 2 were detected and dropped.
424        let env = EnvelopeBuilder::new("v", "-", "csv", 2)
425            .findings(vec![
426                finding(0.50, AnomalyClass::Point, "a"), // Low
427                finding(0.50, AnomalyClass::Point, "b"), // Low
428            ])
429            .min_severity(Severity::Critical)
430            .build();
431        assert_eq!(env.rows.len(), 0, "nothing meets the critical floor");
432        assert_eq!(
433            env.exit,
434            ExitCode::Anomalies.code(),
435            "but anomalies WERE found"
436        );
437        assert_eq!(env.summary.total, 2);
438        assert_eq!(env.summary.max_severity, Some(Severity::Low));
439        let scope = env.scope.unwrap();
440        assert_eq!((scope.detected, scope.emitted, scope.dropped), (2, 0, 2));
441    }
442
443    #[test]
444    fn row_encodes_confidence_and_score_as_numbers() {
445        let env = EnvelopeBuilder::new("v", "-", "csv", 1)
446            .findings(vec![finding(0.77, AnomalyClass::Point, "a")])
447            .build();
448        // columns: [detector, class, handle, confidence, severity, score, reason]
449        assert_eq!(env.rows[0][3].as_f64(), Some(0.77));
450        assert_eq!(env.rows[0][5].as_f64(), Some(0.77));
451    }
452
453    #[test]
454    fn findings_set_anomalies_exit_and_max_severity() {
455        let env = EnvelopeBuilder::new("v", "-", "csv", 3)
456            .findings(vec![
457                finding(0.99, AnomalyClass::Point, "a"),
458                finding(0.50, AnomalyClass::Structural, "b"),
459            ])
460            .build();
461        assert_eq!(env.exit, ExitCode::Anomalies.code());
462        assert_eq!(env.summary.total, 2);
463        assert_eq!(env.summary.max_severity, Some(Severity::Critical));
464        assert_eq!(env.columns.len(), FINDING_COLUMNS.len());
465        // highest severity sorts first
466        let first_sev_idx = env.rows[0][4].as_u64().unwrap() as u32;
467        assert_eq!(env.dict.get(first_sev_idx), Some("critical"));
468    }
469
470    #[test]
471    fn build_is_order_independent() {
472        let a = EnvelopeBuilder::new("v", "-", "csv", 2)
473            .findings(vec![
474                finding(0.9, AnomalyClass::Point, "a"),
475                finding(0.5, AnomalyClass::Point, "b"),
476            ])
477            .build();
478        let b = EnvelopeBuilder::new("v", "-", "csv", 2)
479            .findings(vec![
480                finding(0.5, AnomalyClass::Point, "b"),
481                finding(0.9, AnomalyClass::Point, "a"),
482            ])
483            .build();
484        assert_eq!(
485            serde_json::to_string(&a).unwrap(),
486            serde_json::to_string(&b).unwrap()
487        );
488    }
489}