Skip to main content

anomalyx_core/
envelope.rs

1//! The `tq1` output envelope — the wire contract.
2//!
3//! This is the article's "typed, dense output (not pretty text)": a versioned
4//! JSON envelope with a dictionary-pinned string table, an explicit column
5//! ordering for the dense finding rows, honest `absent` entries for detectors
6//! that could not run, and a committed exit code. Changing any field here is an
7//! API change and must break a contract test.
8
9use crate::dict::Dict;
10use crate::finding::{AnomalyClass, Finding, Severity};
11use serde::Serialize;
12
13/// Protocol identifier. Bump on any breaking change to the envelope shape.
14pub const PROTOCOL: &str = "anomalyx/tq1";
15
16/// Committed process exit codes. These are part of the contract: weakening them
17/// must break a test.
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19#[repr(i32)]
20pub enum ExitCode {
21    /// No anomalies found.
22    Clean = 0,
23    /// Anomalies found.
24    Anomalies = 1,
25    /// The tool could not complete (bad input, unresolved handle, …).
26    Error = 2,
27}
28
29impl ExitCode {
30    pub fn code(self) -> i32 {
31        self as i32
32    }
33}
34
35/// A detector that declined to run, with a machine-readable reason. Recorded so
36/// absence is *explicit* — an unavailable detector contributes nothing and says
37/// so, rather than implying the data looked fine.
38#[derive(Debug, Clone, Serialize)]
39pub struct Absence {
40    pub detector: String,
41    pub reason: String,
42}
43
44/// The fixed column order of a dense finding row. Each row in
45/// [`Envelope::rows`] is an array whose entries align to these names.
46pub const FINDING_COLUMNS: [&str; 7] = [
47    "detector",   // dict index
48    "class",      // dict index
49    "handle",     // dict index (canonical handle string)
50    "confidence", // float
51    "severity",   // dict index
52    "score",      // float
53    "reason",     // dict index
54];
55
56/// Per-class and overall counts, for the compact summary an agent reads first.
57#[derive(Debug, Clone, Serialize)]
58pub struct Summary {
59    pub total: usize,
60    pub max_severity: Option<Severity>,
61    /// Counts keyed by class token, in [`AnomalyClass::ALL`] order.
62    pub by_class: Vec<ClassCount>,
63}
64
65#[derive(Debug, Clone, Serialize)]
66pub struct ClassCount {
67    pub class: AnomalyClass,
68    pub count: usize,
69}
70
71/// The full envelope. Build it with [`EnvelopeBuilder`].
72#[derive(Debug, Clone, Serialize)]
73pub struct Envelope {
74    /// Protocol id, e.g. `"anomalyx/tq1"`.
75    pub protocol: String,
76    /// Config/version fingerprint. Same inputs + same fingerprint ⇒ same bytes.
77    pub config_version: String,
78    pub source: String,
79    pub format: String,
80    /// Source of the baseline corpus when scanning in compare mode; absent for
81    /// a single-corpus scan.
82    #[serde(skip_serializing_if = "Option::is_none")]
83    pub baseline: Option<String>,
84    pub rows_scanned: usize,
85    /// Dictionary-pinned string table; all `*_idx` values index into this.
86    pub dict: Dict,
87    /// Names for the dense row columns (always [`FINDING_COLUMNS`]).
88    pub columns: Vec<String>,
89    /// Dense finding rows — arrays aligned to `columns`.
90    pub rows: Vec<Vec<serde_json::Value>>,
91    /// Detectors that could not run (honest absence).
92    pub absent: Vec<Absence>,
93    pub summary: Summary,
94    /// Committed exit code as an integer, mirrored into the envelope.
95    pub exit: i32,
96}
97
98/// Assembles an [`Envelope`] from findings, interning strings deterministically.
99pub struct EnvelopeBuilder {
100    config_version: String,
101    source: String,
102    format: String,
103    baseline: Option<String>,
104    rows_scanned: usize,
105    findings: Vec<Finding>,
106    absent: Vec<Absence>,
107}
108
109impl EnvelopeBuilder {
110    pub fn new(
111        config_version: impl Into<String>,
112        source: impl Into<String>,
113        format: impl Into<String>,
114        rows_scanned: usize,
115    ) -> Self {
116        EnvelopeBuilder {
117            config_version: config_version.into(),
118            source: source.into(),
119            format: format.into(),
120            baseline: None,
121            rows_scanned,
122            findings: Vec::new(),
123            absent: Vec::new(),
124        }
125    }
126
127    /// Records the baseline source for a compare-mode scan.
128    pub fn baseline(mut self, source: impl Into<String>) -> Self {
129        self.baseline = Some(source.into());
130        self
131    }
132
133    pub fn findings(mut self, mut findings: Vec<Finding>) -> Self {
134        self.findings.append(&mut findings);
135        self
136    }
137
138    pub fn absent(mut self, detector: impl Into<String>, reason: impl Into<String>) -> Self {
139        self.absent.push(Absence {
140            detector: detector.into(),
141            reason: reason.into(),
142        });
143        self
144    }
145
146    /// Finalizes the envelope. Findings are sorted into a deterministic order
147    /// (severity desc, then class, then handle, then detector) so the output is
148    /// stable regardless of the order detectors ran or emitted in.
149    pub fn build(mut self) -> Envelope {
150        self.findings.sort_by(|a, b| {
151            b.severity
152                .cmp(&a.severity)
153                .then_with(|| a.class.token().cmp(b.class.token()))
154                .then_with(|| a.handle.canonical().cmp(&b.handle.canonical()))
155                .then_with(|| a.detector.cmp(&b.detector))
156        });
157
158        let mut dict = Dict::new();
159        let mut rows = Vec::with_capacity(self.findings.len());
160        for f in &self.findings {
161            let detector = dict.intern(&f.detector);
162            let class = dict.intern(f.class.token());
163            let handle = dict.intern(&f.handle.canonical());
164            let severity = dict.intern(severity_token(f.severity));
165            let reason = dict.intern(&f.reason);
166            rows.push(vec![
167                json_u32(detector),
168                json_u32(class),
169                json_u32(handle),
170                json_f64(f.confidence),
171                json_u32(severity),
172                json_f64(f.score),
173                json_u32(reason),
174            ]);
175        }
176
177        let max_severity = self.findings.iter().map(|f| f.severity).max();
178        let by_class = AnomalyClass::ALL
179            .iter()
180            .map(|&class| ClassCount {
181                class,
182                count: self.findings.iter().filter(|f| f.class == class).count(),
183            })
184            .collect();
185        let summary = Summary {
186            total: self.findings.len(),
187            max_severity,
188            by_class,
189        };
190
191        let exit = if self.findings.is_empty() {
192            ExitCode::Clean
193        } else {
194            ExitCode::Anomalies
195        };
196
197        Envelope {
198            protocol: PROTOCOL.to_string(),
199            config_version: self.config_version,
200            source: self.source,
201            format: self.format,
202            baseline: self.baseline,
203            rows_scanned: self.rows_scanned,
204            dict,
205            columns: FINDING_COLUMNS.iter().map(|s| s.to_string()).collect(),
206            rows,
207            absent: self.absent,
208            summary,
209            exit: exit.code(),
210        }
211    }
212}
213
214fn severity_token(s: Severity) -> &'static str {
215    match s {
216        Severity::Info => "info",
217        Severity::Low => "low",
218        Severity::Medium => "medium",
219        Severity::High => "high",
220        Severity::Critical => "critical",
221    }
222}
223
224fn json_u32(v: u32) -> serde_json::Value {
225    serde_json::Value::from(v)
226}
227
228fn json_f64(v: f64) -> serde_json::Value {
229    serde_json::Number::from_f64(v)
230        .map(serde_json::Value::Number)
231        .unwrap_or(serde_json::Value::Null)
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237    use crate::finding::Handle;
238
239    fn finding(conf: f64, class: AnomalyClass, col: &str) -> Finding {
240        Finding::new(
241            "d",
242            class,
243            Handle::Column { name: col.into() },
244            conf,
245            conf,
246            "r",
247        )
248    }
249
250    #[test]
251    fn exit_codes_are_committed() {
252        assert_eq!(ExitCode::Clean.code(), 0);
253        assert_eq!(ExitCode::Anomalies.code(), 1);
254        assert_eq!(ExitCode::Error.code(), 2);
255    }
256
257    #[test]
258    fn empty_is_clean() {
259        let env = EnvelopeBuilder::new("v", "-", "csv", 0).build();
260        assert_eq!(env.exit, ExitCode::Clean.code());
261        assert_eq!(env.summary.total, 0);
262        assert_eq!(env.summary.max_severity, None);
263    }
264
265    #[test]
266    fn by_class_counts_only_matching_class() {
267        let env = EnvelopeBuilder::new("v", "-", "csv", 3)
268            .findings(vec![
269                finding(0.9, AnomalyClass::Point, "a"),
270                finding(0.9, AnomalyClass::Point, "b"),
271                finding(0.9, AnomalyClass::Structural, "c"),
272            ])
273            .build();
274        let count = |class: AnomalyClass| {
275            env.summary
276                .by_class
277                .iter()
278                .find(|cc| cc.class == class)
279                .map(|cc| cc.count)
280                .unwrap()
281        };
282        assert_eq!(count(AnomalyClass::Point), 2);
283        assert_eq!(count(AnomalyClass::Structural), 1);
284        assert_eq!(count(AnomalyClass::Cadence), 0);
285    }
286
287    #[test]
288    fn row_encodes_confidence_and_score_as_numbers() {
289        let env = EnvelopeBuilder::new("v", "-", "csv", 1)
290            .findings(vec![finding(0.77, AnomalyClass::Point, "a")])
291            .build();
292        // columns: [detector, class, handle, confidence, severity, score, reason]
293        assert_eq!(env.rows[0][3].as_f64(), Some(0.77));
294        assert_eq!(env.rows[0][5].as_f64(), Some(0.77));
295    }
296
297    #[test]
298    fn findings_set_anomalies_exit_and_max_severity() {
299        let env = EnvelopeBuilder::new("v", "-", "csv", 3)
300            .findings(vec![
301                finding(0.99, AnomalyClass::Point, "a"),
302                finding(0.50, AnomalyClass::Structural, "b"),
303            ])
304            .build();
305        assert_eq!(env.exit, ExitCode::Anomalies.code());
306        assert_eq!(env.summary.total, 2);
307        assert_eq!(env.summary.max_severity, Some(Severity::Critical));
308        assert_eq!(env.columns.len(), FINDING_COLUMNS.len());
309        // highest severity sorts first
310        let first_sev_idx = env.rows[0][4].as_u64().unwrap() as u32;
311        assert_eq!(env.dict.get(first_sev_idx), Some("critical"));
312    }
313
314    #[test]
315    fn build_is_order_independent() {
316        let a = EnvelopeBuilder::new("v", "-", "csv", 2)
317            .findings(vec![
318                finding(0.9, AnomalyClass::Point, "a"),
319                finding(0.5, AnomalyClass::Point, "b"),
320            ])
321            .build();
322        let b = EnvelopeBuilder::new("v", "-", "csv", 2)
323            .findings(vec![
324                finding(0.5, AnomalyClass::Point, "b"),
325                finding(0.9, AnomalyClass::Point, "a"),
326            ])
327            .build();
328        assert_eq!(
329            serde_json::to_string(&a).unwrap(),
330            serde_json::to_string(&b).unwrap()
331        );
332    }
333}