Skip to main content

anodizer_core/
determinism_report.rs

1//! Determinism harness report types.
2//!
3//! `DeterminismReport` is the canonical JSON shape emitted by
4//! `anodize check determinism` at
5//! `dist/run-<commit>/determinism.json`. The shape is fixed by the
6//! release-resilience spec ([determinism harness report]) — every
7//! field is consumed by downstream CI parsers, so the serde contract is
8//! load-bearing:
9//!
10//! - `schema_version: 1` (constant; bump only on a breaking shape change).
11//! - `#[serde(deny_unknown_fields)]` enforced on every struct so a typo'd
12//!   field in a downstream-edited report fails loudly instead of being
13//!   silently dropped.
14//!
15//! These types live in `anodizer-core` (not the CLI crate) so future CI
16//! parsers can deserialize the report without pulling in the entire CLI
17//! dependency tree.
18
19use serde::{Deserialize, Serialize};
20
21/// Current schema version emitted by the harness. Bump on any breaking
22/// field rename or removal; deserialization callers should match on this
23/// before consuming the rest of the payload.
24pub const CURRENT_SCHEMA_VERSION: u32 = 1;
25
26/// Top-level determinism report shape.
27///
28/// Emitted at `dist/run-<commit>/determinism.json` after every
29/// `anodize check determinism` run. Non-zero exit accompanies a non-empty
30/// `drift` list.
31#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
32#[serde(deny_unknown_fields)]
33pub struct DeterminismReport {
34    /// Schema version — currently `1`. See [`CURRENT_SCHEMA_VERSION`].
35    pub schema_version: u32,
36    /// `anodize` crate version that produced the report.
37    pub anodize_version: String,
38    /// Full commit SHA of HEAD at harness invocation time.
39    pub commit: String,
40    /// Committer timestamp (seconds since UNIX epoch) of `commit`. In
41    /// `--snapshot` mode this is the resolved snapshot-SDE, which may
42    /// differ from the raw commit timestamp when the tree is dirty.
43    pub commit_timestamp: i64,
44    /// Number of from-clean rebuilds the harness performed.
45    pub runs: u32,
46    /// Ordered list of stage names actually exercised (e.g.
47    /// `["build", "archive", "sbom", "sign", "checksum"]`).
48    pub stages_under_test: Vec<String>,
49    /// Compile-time and runtime allow-lists carried through from
50    /// [`crate::DeterminismState`].
51    pub allowlist: AllowList,
52    /// Per-artifact row, one entry per distinct artifact name seen across
53    /// any run. Includes both deterministic and drifting artifacts.
54    pub artifacts: Vec<ArtifactRow>,
55    /// Drift rows — one entry per artifact whose SHA256 differed across
56    /// runs AND was NOT covered by `allowlist`. Empty when the harness
57    /// passes.
58    pub drift: Vec<DriftRow>,
59    /// `drift.len() as u32`, hoisted to a top-level field so CI parsers
60    /// can short-circuit on the integer without walking the array.
61    pub drift_count: u32,
62}
63
64/// Compile-time + runtime allow-list pair, mirroring
65/// [`crate::DeterminismState::compile_time_allowlist`] /
66/// [`crate::DeterminismState::runtime_allowlist`].
67///
68/// `#[serde(default)]` so an absent `allowlist` field deserializes to an
69/// empty pair instead of erroring; harness emits the field always.
70#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
71#[serde(default, deny_unknown_fields)]
72pub struct AllowList {
73    /// Compile-time entries seeded by [`crate::DeterminismState::seed_from_commit`].
74    pub compile_time: Vec<AllowListEntry>,
75    /// Runtime entries added via `anodize release --allow-nondeterministic`.
76    pub runtime: Vec<AllowListEntry>,
77}
78
79/// One allow-list entry: an artifact name (or `*.ext` glob) and the
80/// operator-facing reason it is exempt from drift counting.
81#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
82#[serde(deny_unknown_fields)]
83pub struct AllowListEntry {
84    /// Artifact name or `*.ext` glob (see
85    /// [`crate::DeterminismState`] for pattern semantics).
86    pub artifact: String,
87    /// Human-readable reason surfaced into the report so consumers can
88    /// audit the rationale alongside the SHA256SUMS file.
89    pub reason: String,
90}
91
92/// One row per emitted artifact.
93///
94/// `deterministic=true` artifacts carry a single `hash`; drifting
95/// artifacts carry the per-run array under `hashes` (and may still have
96/// `nondeterministic_reason` set when allow-listed).
97#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
98#[serde(deny_unknown_fields)]
99pub struct ArtifactRow {
100    /// Dist-root-relative path of the artifact (forward-slash-normalized,
101    /// `dist/` prefix stripped). Multi-arch artifacts sharing a basename
102    /// (e.g. per-target makeself scratch dirs) get distinct entries here.
103    /// Raw cargo binaries discovered under `<worktree>/.det-tmp/target/`
104    /// instead get a `target/<triple>/release/<bin>` key so they are not
105    /// confused with same-basename `dist/` artifacts.
106    pub name: String,
107    /// Path as seen by the harness — workspace-relative when possible,
108    /// absolute otherwise.
109    pub path: String,
110    /// Size in bytes, taken from the last run that produced the artifact.
111    pub size_bytes: u64,
112    /// Stage name responsible for the artifact (e.g. `archive`, `sbom`).
113    /// Best-effort — the harness infers from output path conventions and
114    /// falls back to `"unknown"` when it cannot attribute.
115    pub stage: String,
116    /// `true` when every run produced an identical SHA256.
117    pub deterministic: bool,
118    /// Set when the artifact is on the allow-list. Drives the
119    /// "allowlist excluded this from drift_count" UX.
120    #[serde(skip_serializing_if = "Option::is_none")]
121    pub nondeterministic_reason: Option<String>,
122    /// Single hash when the artifact is deterministic; `None` otherwise.
123    /// Mutually exclusive with `hashes`.
124    #[serde(skip_serializing_if = "Option::is_none")]
125    pub hash: Option<String>,
126    /// Per-run hash array when the artifact drifted (length == runs).
127    /// `skip_serializing_if = "Vec::is_empty"` keeps the JSON compact for
128    /// deterministic rows.
129    #[serde(default, skip_serializing_if = "Vec::is_empty")]
130    pub hashes: Vec<String>,
131}
132
133/// One drift entry. Mirrors the spec's example shape:
134/// `{ artifact, hashes, differing_bytes_summary? }`.
135#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
136#[serde(deny_unknown_fields)]
137pub struct DriftRow {
138    /// Artifact name (matches the corresponding `ArtifactRow.name`).
139    pub artifact: String,
140    /// Per-run SHA256 hashes that differed.
141    pub hashes: Vec<String>,
142    /// Optional human-readable summary of where the bytes diverge (e.g.
143    /// `"tar entry mtimes differ at offset 0x1234"`). Heuristic; the
144    /// harness emits `None` when it cannot localize the drift.
145    #[serde(skip_serializing_if = "Option::is_none")]
146    pub differing_bytes_summary: Option<String>,
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152
153    fn sample_report() -> DeterminismReport {
154        DeterminismReport {
155            schema_version: CURRENT_SCHEMA_VERSION,
156            anodize_version: "0.2.1".into(),
157            commit: "abc123".into(),
158            commit_timestamp: 1_715_000_000,
159            runs: 2,
160            stages_under_test: vec!["archive".into(), "checksum".into()],
161            allowlist: AllowList {
162                compile_time: vec![AllowListEntry {
163                    artifact: "anodizer-0.2.1.crate".into(),
164                    reason: "cargo package non-determinism".into(),
165                }],
166                runtime: vec![],
167            },
168            artifacts: vec![
169                ArtifactRow {
170                    name: "anodizer_0.2.1_linux_amd64.tar.gz".into(),
171                    path: "dist/anodizer_0.2.1_linux_amd64.tar.gz".into(),
172                    size_bytes: 5_242_880,
173                    stage: "archive".into(),
174                    deterministic: true,
175                    nondeterministic_reason: None,
176                    hash: Some("sha256:abc".into()),
177                    hashes: vec![],
178                },
179                ArtifactRow {
180                    name: "anodizer-0.2.1.crate".into(),
181                    path: "dist/anodizer-0.2.1.crate".into(),
182                    size_bytes: 1_048_576,
183                    stage: "cargo-package".into(),
184                    deterministic: false,
185                    nondeterministic_reason: Some("cargo package non-determinism".into()),
186                    hash: None,
187                    hashes: vec!["sha256:a".into(), "sha256:b".into()],
188                },
189            ],
190            drift: vec![],
191            drift_count: 0,
192        }
193    }
194
195    #[test]
196    fn report_roundtrips_through_json() {
197        let r = sample_report();
198        let s = serde_json::to_string(&r).unwrap();
199        let back: DeterminismReport = serde_json::from_str(&s).unwrap();
200        assert_eq!(back, r);
201    }
202
203    #[test]
204    fn schema_version_constant_is_one() {
205        assert_eq!(CURRENT_SCHEMA_VERSION, 1);
206    }
207
208    #[test]
209    fn deterministic_row_skips_hashes_array_in_json() {
210        let r = sample_report();
211        let s = serde_json::to_string(&r).unwrap();
212        // First artifact is deterministic — should NOT serialize a
213        // `hashes` array (the array would imply per-run drift).
214        let first = &r.artifacts[0];
215        assert!(first.hashes.is_empty());
216        assert!(
217            !s.contains("\"hashes\":[]"),
218            "deterministic rows must omit empty hashes array, got: {}",
219            s
220        );
221    }
222
223    #[test]
224    fn nondeterministic_row_skips_singular_hash_field_in_json() {
225        let r = sample_report();
226        // Second artifact (nondeterministic) has `hash: None`.
227        let second = &r.artifacts[1];
228        assert!(second.hash.is_none());
229        let s = serde_json::to_string(&r).unwrap();
230        // The `hash` key must not appear with a null value on the second
231        // artifact.
232        let second_segment = s.split("anodizer-0.2.1.crate").nth(1).unwrap();
233        assert!(
234            !second_segment.contains("\"hash\":null"),
235            "nondeterministic rows must omit null hash field, got: {}",
236            s
237        );
238    }
239
240    #[test]
241    fn unknown_fields_are_rejected() {
242        let s = r#"{
243            "schema_version": 1,
244            "anodize_version": "0.2.1",
245            "commit": "abc",
246            "commit_timestamp": 0,
247            "runs": 1,
248            "stages_under_test": [],
249            "allowlist": { "compile_time": [], "runtime": [] },
250            "artifacts": [],
251            "drift": [],
252            "drift_count": 0,
253            "bogus_field": "should reject"
254        }"#;
255        let res: Result<DeterminismReport, _> = serde_json::from_str(s);
256        assert!(
257            res.is_err(),
258            "deny_unknown_fields must reject the bogus_field"
259        );
260    }
261
262    #[test]
263    fn unknown_fields_rejected_on_allowlist_entry() {
264        let s = r#"{
265            "schema_version": 1,
266            "anodize_version": "0.2.1",
267            "commit": "abc",
268            "commit_timestamp": 0,
269            "runs": 1,
270            "stages_under_test": [],
271            "allowlist": {
272                "compile_time": [
273                    {"artifact": "x", "reason": "y", "extra": "boom"}
274                ],
275                "runtime": []
276            },
277            "artifacts": [],
278            "drift": [],
279            "drift_count": 0
280        }"#;
281        let res: Result<DeterminismReport, _> = serde_json::from_str(s);
282        assert!(res.is_err(), "AllowListEntry must reject unknown fields");
283    }
284
285    #[test]
286    fn drift_row_with_optional_summary_serializes() {
287        let d = DriftRow {
288            artifact: "foo.tar.gz".into(),
289            hashes: vec!["sha256:1".into(), "sha256:2".into()],
290            differing_bytes_summary: Some("tar mtime offset 0x100".into()),
291        };
292        let s = serde_json::to_string(&d).unwrap();
293        assert!(s.contains("differing_bytes_summary"));
294        let back: DriftRow = serde_json::from_str(&s).unwrap();
295        assert_eq!(back, d);
296    }
297
298    #[test]
299    fn drift_row_omits_summary_when_none() {
300        let d = DriftRow {
301            artifact: "foo.tar.gz".into(),
302            hashes: vec!["sha256:1".into(), "sha256:2".into()],
303            differing_bytes_summary: None,
304        };
305        let s = serde_json::to_string(&d).unwrap();
306        assert!(!s.contains("differing_bytes_summary"));
307    }
308}