Skip to main content

anodizer_core/
determinism.rs

1//! SOURCE_DATE_EPOCH seeding + compile-time / runtime allow-list state.
2//!
3//! `DeterminismState` is the per-run home for:
4//! - `sde`: the SOURCE_DATE_EPOCH value (seconds since epoch) that every
5//!   stage exports into subprocess env so artifacts have deterministic
6//!   timestamps.
7//! - `compile_time_allowlist`: artifact-name -> reason pairs known at
8//!   build time (tool-bug allow-lists for cargo .crate, docker manifest
9//!   descriptors, etc.).
10//! - `runtime_allowlist`: operator-supplied opt-outs via the
11//!   `--allow-nondeterministic <name>=<reason>` CLI flag.
12//!
13//! Both lists are surfaced into the run-summary JSON
14//! (`determinism_allowlist.compile_time` and `.runtime`) and the
15//! per-artifact `PublishEvidence.nondeterministic` field. On collision
16//! between the two lists, the compile-time reason wins on the per-
17//! artifact field; both entries still appear in the report so the
18//! audit trail is complete.
19
20use anyhow::Result;
21use serde::{Deserialize, Serialize};
22use std::process::Command;
23
24#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
25pub struct DeterminismState {
26    pub sde: i64,
27    pub compile_time_allowlist: Vec<(String, String)>,
28    pub runtime_allowlist: Vec<(String, String)>,
29}
30
31impl DeterminismState {
32    /// Seed from a commit timestamp (seconds since UNIX epoch). All built-
33    /// in compile-time allow-list entries listed in the spec's contract
34    /// table are added here.
35    ///
36    /// Returns `Err` when `commit_ts` is negative — a negative epoch would
37    /// propagate a bogus `SOURCE_DATE_EPOCH` into child processes (where
38    /// shells / build tools may misinterpret it) and almost always
39    /// indicates a corrupted commit graph or a test passing a sentinel
40    /// like `-1`. Fail-fast is the correct UX for a determinism API.
41    ///
42    /// ## Compile-time allow-list scope
43    ///
44    /// Each entry below corresponds to an artifact pattern the
45    /// [`crate::determinism_report`] verification harness will actually
46    /// see in `dist/`. Entries are matched by `*.ext` suffix or exact
47    /// filename against the basename of every file the harness walks
48    /// under the per-run worktree's `dist/` tree. Pattern names that do
49    /// not match any real emitter output are dead code (silently never
50    /// resolve) — keep this list aligned with what stages actually drop
51    /// into `dist/`.
52    ///
53    /// Notably absent (and intentionally so):
54    ///
55    /// - `docker-manifest-descriptor` / `docker-image-blob`: the docker
56    ///   stage is in [`crate::determinism_runner::SIDE_EFFECT_STAGES`]
57    ///   and skipped by the harness; the only docker file that lands in
58    ///   `dist/` is a `.digest` text file written by buildx (a
59    ///   deterministic sha256). No need for an allow-list entry.
60    /// - `apple-notarization-receipt`: the notarize stage mutates
61    ///   existing artifacts in-place (staples) rather than emitting new
62    ///   files; no separate "receipt" artifact lands in `dist/`.
63    /// - `*.exe-nsis`: makensis writes plain `.exe` files into
64    ///   `dist/windows/`; the suffix `.exe-nsis` matches nothing the
65    ///   harness ever sees. NSIS-built `.exe` files only appear when
66    ///   running on Windows (or under Wine), and operators can use the
67    ///   runtime `--allow-nondeterministic <name>=<reason>` flag on
68    ///   those releases rather than hard-coding a dead sentinel here.
69    pub fn seed_from_commit(commit_ts: i64) -> Result<Self> {
70        if commit_ts < 0 {
71            anyhow::bail!(
72                "commit_ts must be non-negative (got {}); a corrupted commit graph or future-bug? \
73                 Negative SOURCE_DATE_EPOCH would propagate to child processes and be \
74                 misinterpreted by shells/build tools.",
75                commit_ts
76            );
77        }
78        // Per spec contract table: these are the artifacts whose
79        // deeper reproducibility work is deferred. Listed up-front so
80        // every stage that consumes them sees the same allow-list.
81        // Allow-listed installer formats AND their `.sha256` sidecars —
82        // the sidecar hashes a non-deterministic source so the sidecar
83        // itself is non-deterministic, but it's not an independent
84        // determinism finding worth surfacing.
85        let installer_allow: &[(&str, &str)] = &[
86            (
87                "*.crate",
88                "cargo package non-determinism, tracked in determinism-followups",
89            ),
90            (
91                "*.rpm",
92                "rpmbuild reproducibility deferred to determinism-installers follow-up",
93            ),
94            (
95                "*.msi",
96                "wix/candle/light reproducibility deferred to determinism-installers follow-up",
97            ),
98            (
99                "*.dmg",
100                "hdiutil reproducibility deferred to determinism-installers follow-up",
101            ),
102            (
103                "*.pkg",
104                "pkgbuild reproducibility deferred to determinism-installers follow-up",
105            ),
106            (
107                "*.deb",
108                "dpkg-deb reproducibility varies by version; tracked in determinism-installers",
109            ),
110            (
111                "*.snap",
112                "snapcraft pack runs deterministically when SOURCE_DATE_EPOCH propagates (harness env exports it; mksquashfs respects it via craft-parts); allowlisted as defense-in-depth in case snapcraft introduces non-mtime variance",
113            ),
114        ];
115        let mut compile_time_allowlist: Vec<(String, String)> = Vec::new();
116        for (pattern, reason) in installer_allow {
117            compile_time_allowlist.push(((*pattern).into(), (*reason).into()));
118            compile_time_allowlist.push((
119                format!("{}.sha256", pattern),
120                format!("derivative of {pattern}: {reason}"),
121            ));
122        }
123
124        Ok(Self {
125            sde: commit_ts,
126            compile_time_allowlist,
127            runtime_allowlist: Vec::new(),
128        })
129    }
130
131    /// Export SOURCE_DATE_EPOCH onto a `std::process::Command` so
132    /// child subprocesses (cargo, tar, sbom tools, etc.) see the
133    /// reproducible epoch.
134    pub fn export_env(&self, cmd: &mut Command) {
135        cmd.env("SOURCE_DATE_EPOCH", self.sde.to_string());
136    }
137
138    /// Resolve the allow-list reason for an artifact name. Compile-time
139    /// entries win on collision per the spec's "Operator escape /
140    /// Precedence on collision" section. Returns None when the artifact
141    /// is not in either list.
142    pub fn resolve_reason(&self, artifact: &str) -> Option<&str> {
143        // Compile-time first
144        for (name, reason) in &self.compile_time_allowlist {
145            if matches_artifact_pattern(name, artifact) {
146                return Some(reason.as_str());
147            }
148        }
149        // Then runtime
150        for (name, reason) in &self.runtime_allowlist {
151            if matches_artifact_pattern(name, artifact) {
152                return Some(reason.as_str());
153            }
154        }
155        None
156    }
157
158    /// Append a runtime allow-list entry. Caller is the CLI flag
159    /// handler for `--allow-nondeterministic <name>=<reason>`.
160    pub fn append_runtime(&mut self, artifact: String, reason: String) {
161        self.runtime_allowlist.push((artifact, reason));
162    }
163}
164
165/// Simple glob: `*.ext` matches any artifact ending in `.ext`;
166/// exact-match otherwise. Avoids pulling a globbing crate for this
167/// narrow case.
168fn matches_artifact_pattern(pattern: &str, artifact: &str) -> bool {
169    if let Some(suffix) = pattern.strip_prefix('*') {
170        return artifact.ends_with(suffix);
171    }
172    pattern == artifact
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178
179    #[test]
180    fn sde_from_commit_timestamp_is_idempotent() {
181        let s = DeterminismState::seed_from_commit(1_715_000_000).expect("non-negative");
182        assert_eq!(s.sde, 1_715_000_000);
183        let s2 = DeterminismState::seed_from_commit(1_715_000_000).expect("non-negative");
184        assert_eq!(s, s2);
185    }
186
187    #[test]
188    fn compile_time_allowlist_resolves_for_cargo_crate() {
189        let s = DeterminismState::seed_from_commit(0).expect("non-negative");
190        let reason = s
191            .resolve_reason("anodizer-0.2.1.crate")
192            .expect("matches *.crate");
193        assert!(reason.contains("cargo package"));
194    }
195
196    #[test]
197    fn compile_time_allowlist_resolves_for_rpm() {
198        let s = DeterminismState::seed_from_commit(0).expect("non-negative");
199        assert!(s.resolve_reason("foo-1.0.rpm").is_some());
200    }
201
202    #[test]
203    fn nondeterministic_allowlist_compile_time_wins_on_collision() {
204        let mut s = DeterminismState::seed_from_commit(0).expect("non-negative");
205        // Runtime entry shadowing a compile-time pattern. Compile-time
206        // wins so the report shows the deeper rationale.
207        s.append_runtime(
208            "*.crate".into(),
209            "operator escape (wrong runtime reason)".into(),
210        );
211        let reason = s.resolve_reason("anodizer-0.2.1.crate").unwrap();
212        assert!(
213            reason.contains("cargo package"),
214            "compile-time reason takes precedence"
215        );
216    }
217
218    #[test]
219    fn nondeterministic_allowlist_serializes_with_both_categories() {
220        let mut s = DeterminismState::seed_from_commit(0).expect("non-negative");
221        s.append_runtime("foo.bin".into(), "tool-bug-1234".into());
222        let json = serde_json::to_string(&s).unwrap();
223        assert!(json.contains("compile_time_allowlist"));
224        assert!(json.contains("runtime_allowlist"));
225        assert!(json.contains("foo.bin"));
226    }
227
228    #[test]
229    fn export_env_sets_source_date_epoch() {
230        let s = DeterminismState::seed_from_commit(1_715_000_000).expect("non-negative");
231        let mut cmd = Command::new("true");
232        s.export_env(&mut cmd);
233        let env_vars: Vec<(_, _)> = cmd
234            .get_envs()
235            .filter_map(|(k, v)| v.map(|v| (k.to_owned(), v.to_owned())))
236            .collect();
237        let sde_entry = env_vars.iter().find(|(k, _)| k == "SOURCE_DATE_EPOCH");
238        assert!(sde_entry.is_some());
239        assert_eq!(sde_entry.unwrap().1, "1715000000");
240    }
241
242    #[test]
243    fn resolve_reason_returns_none_for_unrecognized() {
244        let s = DeterminismState::seed_from_commit(0).expect("non-negative");
245        assert!(s.resolve_reason("unrelated.txt").is_none());
246    }
247
248    #[test]
249    fn seed_from_commit_accepts_zero() {
250        // Epoch zero (1970-01-01) is a legitimate sentinel — some
251        // determinism modes anchor SDE to UNIX epoch when the commit
252        // graph isn't usable. Must not be rejected.
253        let s = DeterminismState::seed_from_commit(0).expect("zero is non-negative");
254        assert_eq!(s.sde, 0);
255    }
256
257    #[test]
258    fn seed_from_commit_accepts_positive() {
259        // Typical real-world commit timestamp.
260        let s = DeterminismState::seed_from_commit(1_715_000_000).expect("non-negative");
261        assert_eq!(s.sde, 1_715_000_000);
262    }
263
264    #[test]
265    fn seed_from_commit_rejects_negative() {
266        let err = DeterminismState::seed_from_commit(-1).expect_err("negative must error");
267        let msg = format!("{err:#}");
268        assert!(
269            msg.contains("non-negative") && msg.contains("-1"),
270            "error must name the bad input and the constraint: {msg}"
271        );
272    }
273}