anodizer_core/determinism_report.rs
1//! Determinism harness report types.
2//!
3//! `DeterminismReport` is the canonical JSON shape emitted by
4//! `anodize check determinism` at
5//! `dist/run-<commit>/determinism.json`. The shape is fixed by the
6//! release-resilience spec ([determinism harness report]) — every
7//! field is consumed by downstream CI parsers, so the serde contract is
8//! load-bearing:
9//!
10//! - `schema_version: 1` (constant; bump only on a breaking shape change).
11//! - `#[serde(deny_unknown_fields)]` enforced on every struct so a typo'd
12//! field in a downstream-edited report fails loudly instead of being
13//! silently dropped.
14//!
15//! These types live in `anodizer-core` (not the CLI crate) so future CI
16//! parsers can deserialize the report without pulling in the entire CLI
17//! dependency tree.
18
19use serde::{Deserialize, Serialize};
20
21/// Current schema version emitted by the harness. Bump on any breaking
22/// field rename or removal; deserialization callers should match on this
23/// before consuming the rest of the payload.
24pub const CURRENT_SCHEMA_VERSION: u32 = 1;
25
26/// Top-level determinism report shape.
27///
28/// Emitted at `dist/run-<commit>/determinism.json` after every
29/// `anodize check determinism` run. Non-zero exit accompanies a non-empty
30/// `drift` list.
31#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
32#[serde(deny_unknown_fields)]
33pub struct DeterminismReport {
34 /// Schema version — currently `1`. See [`CURRENT_SCHEMA_VERSION`].
35 pub schema_version: u32,
36 /// `anodize` crate version that produced the report.
37 pub anodize_version: String,
38 /// Full commit SHA of HEAD at harness invocation time.
39 pub commit: String,
40 /// Committer timestamp (seconds since UNIX epoch) of `commit`. In
41 /// `--snapshot` mode this is the resolved snapshot-SDE, which may
42 /// differ from the raw commit timestamp when the tree is dirty.
43 pub commit_timestamp: i64,
44 /// Number of from-clean rebuilds the harness performed.
45 pub runs: u32,
46 /// Ordered list of stage names actually exercised (e.g.
47 /// `["build", "archive", "sbom", "sign", "checksum"]`).
48 pub stages_under_test: Vec<String>,
49 /// Compile-time and runtime allow-lists carried through from
50 /// [`crate::DeterminismState`].
51 pub allowlist: AllowList,
52 /// Per-artifact row, one entry per distinct artifact name seen across
53 /// any run. Includes both deterministic and drifting artifacts.
54 pub artifacts: Vec<ArtifactRow>,
55 /// Drift rows — one entry per artifact whose SHA256 differed across
56 /// runs AND was NOT covered by `allowlist`. Empty when the harness
57 /// passes.
58 pub drift: Vec<DriftRow>,
59 /// `drift.len() as u32`, hoisted to a top-level field so CI parsers
60 /// can short-circuit on the integer without walking the array.
61 pub drift_count: u32,
62}
63
64/// Compile-time + runtime allow-list pair, mirroring
65/// [`crate::DeterminismState::compile_time_allowlist`] /
66/// [`crate::DeterminismState::runtime_allowlist`].
67///
68/// `#[serde(default)]` so an absent `allowlist` field deserializes to an
69/// empty pair instead of erroring; harness emits the field always.
70#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
71#[serde(default, deny_unknown_fields)]
72pub struct AllowList {
73 /// Compile-time entries seeded by [`crate::DeterminismState::seed_from_commit`].
74 pub compile_time: Vec<AllowListEntry>,
75 /// Runtime entries added via `anodize release --allow-nondeterministic`.
76 pub runtime: Vec<AllowListEntry>,
77}
78
79/// One allow-list entry: an artifact name (or `*.ext` glob) and the
80/// operator-facing reason it is exempt from drift counting.
81#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
82#[serde(deny_unknown_fields)]
83pub struct AllowListEntry {
84 /// Artifact name or `*.ext` glob (see
85 /// [`crate::DeterminismState`] for pattern semantics).
86 pub artifact: String,
87 /// Human-readable reason surfaced into the report so consumers can
88 /// audit the rationale alongside the SHA256SUMS file.
89 pub reason: String,
90}
91
92/// One row per emitted artifact.
93///
94/// `deterministic=true` artifacts carry a single `hash`; drifting
95/// artifacts carry the per-run array under `hashes` (and may still have
96/// `nondeterministic_reason` set when allow-listed).
97#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
98#[serde(deny_unknown_fields)]
99pub struct ArtifactRow {
100 /// Dist-root-relative path of the artifact (forward-slash-normalized,
101 /// `dist/` prefix stripped). Multi-arch artifacts sharing a basename
102 /// (e.g. per-target makeself scratch dirs) get distinct entries here.
103 /// Raw cargo binaries discovered under `<worktree>/.det-tmp/target/`
104 /// instead get a `target/<triple>/release/<bin>` key so they are not
105 /// confused with same-basename `dist/` artifacts.
106 pub name: String,
107 /// Path as seen by the harness — workspace-relative when possible,
108 /// absolute otherwise.
109 pub path: String,
110 /// Size in bytes, taken from the last run that produced the artifact.
111 pub size_bytes: u64,
112 /// Stage name responsible for the artifact (e.g. `archive`, `sbom`).
113 /// Best-effort — the harness infers from output path conventions and
114 /// falls back to `"unknown"` when it cannot attribute.
115 pub stage: String,
116 /// `true` when every run produced an identical SHA256.
117 pub deterministic: bool,
118 /// Set when the artifact is on the allow-list. Drives the
119 /// "allowlist excluded this from drift_count" UX.
120 #[serde(skip_serializing_if = "Option::is_none")]
121 pub nondeterministic_reason: Option<String>,
122 /// Single hash when the artifact is deterministic; `None` otherwise.
123 /// Mutually exclusive with `hashes`.
124 #[serde(skip_serializing_if = "Option::is_none")]
125 pub hash: Option<String>,
126 /// Per-run hash array when the artifact drifted (length == runs).
127 /// `skip_serializing_if = "Vec::is_empty"` keeps the JSON compact for
128 /// deterministic rows.
129 #[serde(default, skip_serializing_if = "Vec::is_empty")]
130 pub hashes: Vec<String>,
131}
132
133/// One drift entry. Mirrors the spec's example shape:
134/// `{ artifact, hashes, differing_bytes_summary? }`.
135#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
136#[serde(deny_unknown_fields)]
137pub struct DriftRow {
138 /// Artifact name (matches the corresponding `ArtifactRow.name`).
139 pub artifact: String,
140 /// Per-run SHA256 hashes that differed.
141 pub hashes: Vec<String>,
142 /// Optional human-readable summary of where the bytes diverge (e.g.
143 /// `"tar entry mtimes differ at offset 0x1234"`). Heuristic; the
144 /// harness emits `None` when it cannot localize the drift.
145 #[serde(skip_serializing_if = "Option::is_none")]
146 pub differing_bytes_summary: Option<String>,
147}
148
149#[cfg(test)]
150mod tests {
151 use super::*;
152
153 fn sample_report() -> DeterminismReport {
154 DeterminismReport {
155 schema_version: CURRENT_SCHEMA_VERSION,
156 anodize_version: "0.2.1".into(),
157 commit: "abc123".into(),
158 commit_timestamp: 1_715_000_000,
159 runs: 2,
160 stages_under_test: vec!["archive".into(), "checksum".into()],
161 allowlist: AllowList {
162 compile_time: vec![AllowListEntry {
163 artifact: "anodizer-0.2.1.crate".into(),
164 reason: "cargo package non-determinism".into(),
165 }],
166 runtime: vec![],
167 },
168 artifacts: vec![
169 ArtifactRow {
170 name: "anodizer_0.2.1_linux_amd64.tar.gz".into(),
171 path: "dist/anodizer_0.2.1_linux_amd64.tar.gz".into(),
172 size_bytes: 5_242_880,
173 stage: "archive".into(),
174 deterministic: true,
175 nondeterministic_reason: None,
176 hash: Some("sha256:abc".into()),
177 hashes: vec![],
178 },
179 ArtifactRow {
180 name: "anodizer-0.2.1.crate".into(),
181 path: "dist/anodizer-0.2.1.crate".into(),
182 size_bytes: 1_048_576,
183 stage: "cargo-package".into(),
184 deterministic: false,
185 nondeterministic_reason: Some("cargo package non-determinism".into()),
186 hash: None,
187 hashes: vec!["sha256:a".into(), "sha256:b".into()],
188 },
189 ],
190 drift: vec![],
191 drift_count: 0,
192 }
193 }
194
195 #[test]
196 fn report_roundtrips_through_json() {
197 let r = sample_report();
198 let s = serde_json::to_string(&r).unwrap();
199 let back: DeterminismReport = serde_json::from_str(&s).unwrap();
200 assert_eq!(back, r);
201 }
202
203 #[test]
204 fn schema_version_constant_is_one() {
205 assert_eq!(CURRENT_SCHEMA_VERSION, 1);
206 }
207
208 #[test]
209 fn deterministic_row_skips_hashes_array_in_json() {
210 let r = sample_report();
211 let s = serde_json::to_string(&r).unwrap();
212 // First artifact is deterministic — should NOT serialize a
213 // `hashes` array (the array would imply per-run drift).
214 let first = &r.artifacts[0];
215 assert!(first.hashes.is_empty());
216 assert!(
217 !s.contains("\"hashes\":[]"),
218 "deterministic rows must omit empty hashes array, got: {}",
219 s
220 );
221 }
222
223 #[test]
224 fn nondeterministic_row_skips_singular_hash_field_in_json() {
225 let r = sample_report();
226 // Second artifact (nondeterministic) has `hash: None`.
227 let second = &r.artifacts[1];
228 assert!(second.hash.is_none());
229 let s = serde_json::to_string(&r).unwrap();
230 // The `hash` key must not appear with a null value on the second
231 // artifact.
232 let second_segment = s.split("anodizer-0.2.1.crate").nth(1).unwrap();
233 assert!(
234 !second_segment.contains("\"hash\":null"),
235 "nondeterministic rows must omit null hash field, got: {}",
236 s
237 );
238 }
239
240 #[test]
241 fn unknown_fields_are_rejected() {
242 let s = r#"{
243 "schema_version": 1,
244 "anodize_version": "0.2.1",
245 "commit": "abc",
246 "commit_timestamp": 0,
247 "runs": 1,
248 "stages_under_test": [],
249 "allowlist": { "compile_time": [], "runtime": [] },
250 "artifacts": [],
251 "drift": [],
252 "drift_count": 0,
253 "bogus_field": "should reject"
254 }"#;
255 let res: Result<DeterminismReport, _> = serde_json::from_str(s);
256 assert!(
257 res.is_err(),
258 "deny_unknown_fields must reject the bogus_field"
259 );
260 }
261
262 #[test]
263 fn unknown_fields_rejected_on_allowlist_entry() {
264 let s = r#"{
265 "schema_version": 1,
266 "anodize_version": "0.2.1",
267 "commit": "abc",
268 "commit_timestamp": 0,
269 "runs": 1,
270 "stages_under_test": [],
271 "allowlist": {
272 "compile_time": [
273 {"artifact": "x", "reason": "y", "extra": "boom"}
274 ],
275 "runtime": []
276 },
277 "artifacts": [],
278 "drift": [],
279 "drift_count": 0
280 }"#;
281 let res: Result<DeterminismReport, _> = serde_json::from_str(s);
282 assert!(res.is_err(), "AllowListEntry must reject unknown fields");
283 }
284
285 #[test]
286 fn drift_row_with_optional_summary_serializes() {
287 let d = DriftRow {
288 artifact: "foo.tar.gz".into(),
289 hashes: vec!["sha256:1".into(), "sha256:2".into()],
290 differing_bytes_summary: Some("tar mtime offset 0x100".into()),
291 };
292 let s = serde_json::to_string(&d).unwrap();
293 assert!(s.contains("differing_bytes_summary"));
294 let back: DriftRow = serde_json::from_str(&s).unwrap();
295 assert_eq!(back, d);
296 }
297
298 #[test]
299 fn drift_row_omits_summary_when_none() {
300 let d = DriftRow {
301 artifact: "foo.tar.gz".into(),
302 hashes: vec!["sha256:1".into(), "sha256:2".into()],
303 differing_bytes_summary: None,
304 };
305 let s = serde_json::to_string(&d).unwrap();
306 assert!(!s.contains("differing_bytes_summary"));
307 }
308}