anodizer_core/determinism.rs
1//! SOURCE_DATE_EPOCH seeding + compile-time / runtime allow-list state.
2//!
3//! `DeterminismState` is the per-run home for:
4//! - `sde`: the SOURCE_DATE_EPOCH value (seconds since epoch) that every
5//! stage exports into subprocess env so artifacts have deterministic
6//! timestamps.
7//! - `compile_time_allowlist`: artifact-name -> reason pairs known at
8//! build time (tool-bug allow-lists for cargo .crate, docker manifest
9//! descriptors, etc.).
10//! - `runtime_allowlist`: operator-supplied opt-outs via the
11//! `--allow-nondeterministic <name>=<reason>` CLI flag.
12//!
13//! Both lists are surfaced into the run-summary JSON
14//! (`determinism_allowlist.compile_time` and `.runtime`) and the
15//! per-artifact `PublishEvidence.nondeterministic` field. On collision
16//! between the two lists, the compile-time reason wins on the per-
17//! artifact field; both entries still appear in the report so the
18//! audit trail is complete.
19
20use anyhow::Result;
21use serde::{Deserialize, Serialize};
22use std::process::Command;
23
24#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
25pub struct DeterminismState {
26 pub sde: i64,
27 pub compile_time_allowlist: Vec<(String, String)>,
28 pub runtime_allowlist: Vec<(String, String)>,
29}
30
31impl DeterminismState {
32 /// Seed from a commit timestamp (seconds since UNIX epoch). All built-
33 /// in compile-time allow-list entries listed in the spec's contract
34 /// table are added here.
35 ///
36 /// Returns `Err` when `commit_ts` is negative — a negative epoch would
37 /// propagate a bogus `SOURCE_DATE_EPOCH` into child processes (where
38 /// shells / build tools may misinterpret it) and almost always
39 /// indicates a corrupted commit graph or a test passing a sentinel
40 /// like `-1`. Fail-fast is the correct UX for a determinism API.
41 ///
42 /// ## Compile-time allow-list scope
43 ///
44 /// Each entry below corresponds to an artifact pattern the
45 /// [`crate::determinism_report`] verification harness will actually
46 /// see in `dist/`. Entries are matched by `*.ext` suffix or exact
47 /// filename against the basename of every file the harness walks
48 /// under the per-run worktree's `dist/` tree. Pattern names that do
49 /// not match any real emitter output are dead code (silently never
50 /// resolve) — keep this list aligned with what stages actually drop
51 /// into `dist/`.
52 ///
53 /// Notably absent (and intentionally so):
54 ///
55 /// - `docker-manifest-descriptor` / `docker-image-blob`: the docker
56 /// stage is in [`crate::determinism_runner::SIDE_EFFECT_STAGES`]
57 /// and skipped by the harness; the only docker file that lands in
58 /// `dist/` is a `.digest` text file written by buildx (a
59 /// deterministic sha256). No need for an allow-list entry.
60 /// - `apple-notarization-receipt`: the notarize stage mutates
61 /// existing artifacts in-place (staples) rather than emitting new
62 /// files; no separate "receipt" artifact lands in `dist/`.
63 /// - `*.exe-nsis`: makensis writes plain `.exe` files into
64 /// `dist/windows/`; the suffix `.exe-nsis` matches nothing the
65 /// harness ever sees. NSIS-built `.exe` files only appear when
66 /// running on Windows (or under Wine), and operators can use the
67 /// runtime `--allow-nondeterministic <name>=<reason>` flag on
68 /// those releases rather than hard-coding a dead sentinel here.
69 pub fn seed_from_commit(commit_ts: i64) -> Result<Self> {
70 if commit_ts < 0 {
71 anyhow::bail!(
72 "commit_ts must be non-negative (got {}); a corrupted commit graph or future-bug? \
73 Negative SOURCE_DATE_EPOCH would propagate to child processes and be \
74 misinterpreted by shells/build tools.",
75 commit_ts
76 );
77 }
78 // Per spec contract table: these are the artifacts whose
79 // deeper reproducibility work is deferred. Listed up-front so
80 // every stage that consumes them sees the same allow-list.
81 // Allow-listed installer formats AND their `.sha256` sidecars —
82 // the sidecar hashes a non-deterministic source so the sidecar
83 // itself is non-deterministic, but it's not an independent
84 // determinism finding worth surfacing.
85 let installer_allow: &[(&str, &str)] = &[
86 (
87 "*.crate",
88 "cargo package non-determinism, tracked in determinism-followups",
89 ),
90 (
91 "*.rpm",
92 "rpmbuild reproducibility deferred to determinism-installers follow-up",
93 ),
94 (
95 "*.msi",
96 "wix/candle/light reproducibility deferred to determinism-installers follow-up",
97 ),
98 (
99 "*.dmg",
100 "hdiutil reproducibility deferred to determinism-installers follow-up",
101 ),
102 (
103 "*.pkg",
104 "pkgbuild reproducibility deferred to determinism-installers follow-up",
105 ),
106 (
107 "*.deb",
108 "dpkg-deb reproducibility varies by version; tracked in determinism-installers",
109 ),
110 (
111 "*.snap",
112 "snapcraft pack runs deterministically when SOURCE_DATE_EPOCH propagates (harness env exports it; mksquashfs respects it via craft-parts); allowlisted as defense-in-depth in case snapcraft introduces non-mtime variance",
113 ),
114 ];
115 let mut compile_time_allowlist: Vec<(String, String)> = Vec::new();
116 for (pattern, reason) in installer_allow {
117 compile_time_allowlist.push(((*pattern).into(), (*reason).into()));
118 compile_time_allowlist.push((
119 format!("{}.sha256", pattern),
120 format!("derivative of {pattern}: {reason}"),
121 ));
122 }
123
124 Ok(Self {
125 sde: commit_ts,
126 compile_time_allowlist,
127 runtime_allowlist: Vec::new(),
128 })
129 }
130
131 /// Export SOURCE_DATE_EPOCH onto a `std::process::Command` so
132 /// child subprocesses (cargo, tar, sbom tools, etc.) see the
133 /// reproducible epoch.
134 pub fn export_env(&self, cmd: &mut Command) {
135 cmd.env("SOURCE_DATE_EPOCH", self.sde.to_string());
136 }
137
138 /// Resolve the allow-list reason for an artifact name. Compile-time
139 /// entries win on collision per the spec's "Operator escape /
140 /// Precedence on collision" section. Returns None when the artifact
141 /// is not in either list.
142 pub fn resolve_reason(&self, artifact: &str) -> Option<&str> {
143 // Compile-time first
144 for (name, reason) in &self.compile_time_allowlist {
145 if matches_artifact_pattern(name, artifact) {
146 return Some(reason.as_str());
147 }
148 }
149 // Then runtime
150 for (name, reason) in &self.runtime_allowlist {
151 if matches_artifact_pattern(name, artifact) {
152 return Some(reason.as_str());
153 }
154 }
155 None
156 }
157
158 /// Append a runtime allow-list entry. Caller is the CLI flag
159 /// handler for `--allow-nondeterministic <name>=<reason>`.
160 pub fn append_runtime(&mut self, artifact: String, reason: String) {
161 self.runtime_allowlist.push((artifact, reason));
162 }
163}
164
165/// Simple glob: `*.ext` matches any artifact ending in `.ext`;
166/// exact-match otherwise. Avoids pulling a globbing crate for this
167/// narrow case.
168fn matches_artifact_pattern(pattern: &str, artifact: &str) -> bool {
169 if let Some(suffix) = pattern.strip_prefix('*') {
170 return artifact.ends_with(suffix);
171 }
172 pattern == artifact
173}
174
175#[cfg(test)]
176mod tests {
177 use super::*;
178
179 #[test]
180 fn sde_from_commit_timestamp_is_idempotent() {
181 let s = DeterminismState::seed_from_commit(1_715_000_000).expect("non-negative");
182 assert_eq!(s.sde, 1_715_000_000);
183 let s2 = DeterminismState::seed_from_commit(1_715_000_000).expect("non-negative");
184 assert_eq!(s, s2);
185 }
186
187 #[test]
188 fn compile_time_allowlist_resolves_for_cargo_crate() {
189 let s = DeterminismState::seed_from_commit(0).expect("non-negative");
190 let reason = s
191 .resolve_reason("anodizer-0.2.1.crate")
192 .expect("matches *.crate");
193 assert!(reason.contains("cargo package"));
194 }
195
196 #[test]
197 fn compile_time_allowlist_resolves_for_rpm() {
198 let s = DeterminismState::seed_from_commit(0).expect("non-negative");
199 assert!(s.resolve_reason("foo-1.0.rpm").is_some());
200 }
201
202 #[test]
203 fn nondeterministic_allowlist_compile_time_wins_on_collision() {
204 let mut s = DeterminismState::seed_from_commit(0).expect("non-negative");
205 // Runtime entry shadowing a compile-time pattern. Compile-time
206 // wins so the report shows the deeper rationale.
207 s.append_runtime(
208 "*.crate".into(),
209 "operator escape (wrong runtime reason)".into(),
210 );
211 let reason = s.resolve_reason("anodizer-0.2.1.crate").unwrap();
212 assert!(
213 reason.contains("cargo package"),
214 "compile-time reason takes precedence"
215 );
216 }
217
218 #[test]
219 fn nondeterministic_allowlist_serializes_with_both_categories() {
220 let mut s = DeterminismState::seed_from_commit(0).expect("non-negative");
221 s.append_runtime("foo.bin".into(), "tool-bug-1234".into());
222 let json = serde_json::to_string(&s).unwrap();
223 assert!(json.contains("compile_time_allowlist"));
224 assert!(json.contains("runtime_allowlist"));
225 assert!(json.contains("foo.bin"));
226 }
227
228 #[test]
229 fn export_env_sets_source_date_epoch() {
230 let s = DeterminismState::seed_from_commit(1_715_000_000).expect("non-negative");
231 let mut cmd = Command::new("true");
232 s.export_env(&mut cmd);
233 let env_vars: Vec<(_, _)> = cmd
234 .get_envs()
235 .filter_map(|(k, v)| v.map(|v| (k.to_owned(), v.to_owned())))
236 .collect();
237 let sde_entry = env_vars.iter().find(|(k, _)| k == "SOURCE_DATE_EPOCH");
238 assert!(sde_entry.is_some());
239 assert_eq!(sde_entry.unwrap().1, "1715000000");
240 }
241
242 #[test]
243 fn resolve_reason_returns_none_for_unrecognized() {
244 let s = DeterminismState::seed_from_commit(0).expect("non-negative");
245 assert!(s.resolve_reason("unrelated.txt").is_none());
246 }
247
248 #[test]
249 fn seed_from_commit_accepts_zero() {
250 // Epoch zero (1970-01-01) is a legitimate sentinel — some
251 // determinism modes anchor SDE to UNIX epoch when the commit
252 // graph isn't usable. Must not be rejected.
253 let s = DeterminismState::seed_from_commit(0).expect("zero is non-negative");
254 assert_eq!(s.sde, 0);
255 }
256
257 #[test]
258 fn seed_from_commit_accepts_positive() {
259 // Typical real-world commit timestamp.
260 let s = DeterminismState::seed_from_commit(1_715_000_000).expect("non-negative");
261 assert_eq!(s.sde, 1_715_000_000);
262 }
263
264 #[test]
265 fn seed_from_commit_rejects_negative() {
266 let err = DeterminismState::seed_from_commit(-1).expect_err("negative must error");
267 let msg = format!("{err:#}");
268 assert!(
269 msg.contains("non-negative") && msg.contains("-1"),
270 "error must name the bad input and the constraint: {msg}"
271 );
272 }
273}