rosalind_receipt/
lib.rs

1//! A minimal, deterministic reproducibility receipt for a run: tool version,
2//! subcommand, BLAKE3 content hashes of inputs + outputs, and the parameters.
3//! Serialized as canonical JSON (sorted keys, no timestamps) so two identical
4//! runs produce a byte-identical manifest.
5//!
6//! The receipt is split into a deterministic **claim** (inputs, outputs, params,
7//! subcommand, versions) and a machine-/run-dependent **measurement** block (peak
8//! RSS, working set, verdict, …). The self-hash (`manifest_blake3`) covers the
9//! claim only — so the machine-dependent measured *cost* no longer perturbs it —
10//! while a second `measurement_blake3` keeps that cost locally tamper-evident.
11//!
12//! The claim hash is a cross-machine **content-address**: for schema-3 receipts the
13//! claim hashes inputs/outputs by their sorted `blake3` digests (recorded paths are
14//! dropped from the claim form, though the on-disk receipt keeps them for humans and
15//! for `verify` to re-hash files), so the same data at different paths hashes
16//! identically. Pre-3 receipts hashed paths into the claim; the version gate
17//! reproduces their form so they still self-verify.
18
19use std::collections::BTreeMap;
20use std::io::{self, Read, Write};
21use std::path::{Path, PathBuf};
22use std::sync::OnceLock;
23
24mod command;
25pub use command::CommandCapture;
26
27mod repro;
28pub use repro::{ReproOutput, ReproReceipt};
29
30mod badge;
31pub use badge::{badge_json, badge_svg};
32
33/// Current receipt/feature schema version. Bump on any breaking schema change.
34/// v2: split into a deterministic *claim* and a machine-dependent *measurement* block;
35/// the self-hash (`manifest_blake3`) covers the claim only. v3: the claim hashes
36/// inputs/outputs by their sorted `blake3` digests (paths dropped), so it is a
37/// cross-machine content-address — the same data at different paths hashes identically.
38/// v4: the claim records build-identity (`code_git_sha`/`code_dirty`/`rustc_version`/
39/// `target_triple`/`deps_lock_blake3`), committing to exactly which code, toolchain, and
40/// dependencies produced the run. v5: the claim records a normalized, replayable
41/// `command` recipe (via [`CommandCapture`]) plus the discrete output-affecting params and
42/// `mode`, so `reproduce` can re-derive the exact invocation.
43pub const MANIFEST_SCHEMA_VERSION: u32 = 5;
44
45/// Build-identity stamped into every finalized claim (code / toolchain / deps — the
46/// reproduction key). The binary installs the real values once at startup via
47/// [`set_build_identity`]; left unset (unit tests, or a pure in-browser verifier that
48/// only *checks* receipts) every field degrades to `"unknown"`, exactly as a non-git
49/// build would. Keeping this crate free of `build.rs`/`env!` is what lets it compile to
50/// wasm.
51#[derive(Clone, Debug)]
52pub struct BuildIdentity {
53    /// `git rev-parse HEAD`, or `"unknown"`.
54    pub code_git_sha: String,
55    /// `"true"` / `"false"` / `"unknown"` — whether the tree had uncommitted changes.
56    pub code_dirty: String,
57    /// `rustc --version`, or `"unknown"`.
58    pub rustc_version: String,
59    /// The compilation target triple, or `"unknown"`.
60    pub target_triple: String,
61    /// BLAKE3 of `Cargo.lock`, or `"unknown"`.
62    pub deps_lock_blake3: String,
63}
64
65static BUILD_IDENTITY: OnceLock<BuildIdentity> = OnceLock::new();
66
67/// Install the build-identity once (first call wins; later calls are ignored). The
68/// `rosalind` binary calls this at startup with values its `build.rs` baked in.
69pub fn set_build_identity(identity: BuildIdentity) {
70    let _ = BUILD_IDENTITY.set(identity);
71}
72
73/// The five claim key/value pairs for the installed identity, or `"unknown"` if unset.
74fn build_identity_pairs() -> [(&'static str, String); 5] {
75    match BUILD_IDENTITY.get() {
76        Some(i) => [
77            ("code_git_sha", i.code_git_sha.clone()),
78            ("code_dirty", i.code_dirty.clone()),
79            ("rustc_version", i.rustc_version.clone()),
80            ("target_triple", i.target_triple.clone()),
81            ("deps_lock_blake3", i.deps_lock_blake3.clone()),
82        ],
83        None => {
84            let u = || "unknown".to_string();
85            [
86                ("code_git_sha", u()),
87                ("code_dirty", u()),
88                ("rustc_version", u()),
89                ("target_triple", u()),
90                ("deps_lock_blake3", u()),
91            ]
92        }
93    }
94}
95
96/// Keys whose values are machine-/run-dependent measurements, not part of the
97/// deterministic claim. `finalize` relocates these out of `params` into the
98/// `measurements` block so the measured cost is excluded from the claim hash. The
99/// single audited source of truth for the claim/measurement split.
100pub const MEASUREMENT_KEYS: &[&str] = &[
101    "peak_rss_bytes",
102    "max_working_set_bytes",
103    "predicted_peak_rss_bytes",
104    "baseline_rss_bytes",
105    "rss_residual_bytes",
106    "governor",
107    "contract_verdict",
108];
109
110/// Failure parsing a canonical run manifest.
111#[derive(Debug)]
112pub struct ManifestError(pub String);
113
114impl std::fmt::Display for ManifestError {
115    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
116        write!(f, "malformed manifest: {}", self.0)
117    }
118}
119
120impl std::error::Error for ManifestError {}
121
122/// A file referenced by a run, with its content hash.
123#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct FileHash {
125    /// Path as recorded (normalized to a string by the caller).
126    pub path: String,
127    /// BLAKE3 hex digest of the file's contents.
128    pub blake3: String,
129}
130
131/// How input/output file entries render in a canonical form. The on-disk receipt
132/// keeps full `{path, blake3}`; the schema-3 claim drops the path and hashes only the
133/// content digest, so the claim hash does not depend on where files live.
134#[derive(Clone, Copy)]
135enum FileRender {
136    WithPath,
137    ContentOnly,
138}
139
140/// A reproducibility receipt for a single run.
141#[derive(Debug, Clone, PartialEq, Eq)]
142pub struct RunManifest {
143    /// Rosalind version (`CARGO_PKG_VERSION`).
144    pub tool_version: String,
145    /// Subcommand that produced the run (e.g. `variants`, `somatic`).
146    pub subcommand: String,
147    /// Input files and their content hashes.
148    pub inputs: Vec<FileHash>,
149    /// Run parameters (sorted by key in the canonical form).
150    pub params: BTreeMap<String, String>,
151    /// Output files and their content hashes.
152    pub outputs: Vec<FileHash>,
153    /// Machine-/run-dependent measured cost (peak RSS, working set, verdict, …),
154    /// excluded from the claim hash. Carries its own `measurement_blake3`.
155    pub measurements: BTreeMap<String, String>,
156}
157
158impl RunManifest {
159    /// A new manifest stamped with the current tool version.
160    pub fn new(subcommand: impl Into<String>) -> Self {
161        Self {
162            tool_version: env!("CARGO_PKG_VERSION").to_string(),
163            subcommand: subcommand.into(),
164            inputs: Vec::new(),
165            params: BTreeMap::new(),
166            outputs: Vec::new(),
167            measurements: BTreeMap::new(),
168        }
169    }
170
171    /// Record a machine-/run-dependent measurement (excluded from the claim hash).
172    pub fn record_measurement(&mut self, key: impl Into<String>, value: impl Into<String>) {
173        self.measurements.insert(key.into(), value.into());
174    }
175
176    /// Serialize to canonical JSON: keys sorted, arrays sorted by path, no
177    /// timestamps — so identical runs render identically. Files render as full
178    /// `{path, blake3}`; includes the measurement block (when non-empty). This is the
179    /// on-disk receipt — humans and `verify` read files from its recorded paths.
180    pub fn to_canonical_json(&self) -> String {
181        self.push_canonical(true, FileRender::WithPath)
182    }
183
184    /// The claim-only canonical JSON: never emits the measurement block. This is the
185    /// portion the self-hash commits to. For schema-3 receipts, files render as their
186    /// sorted `blake3` digests (paths dropped) so the claim hash is a cross-machine
187    /// content-address.
188    pub fn to_canonical_claim_json(&self) -> String {
189        self.push_canonical(false, self.claim_file_render())
190    }
191
192    /// Schema >= 3 → content-only claim (paths dropped, cross-machine). Older receipts
193    /// hashed paths into the claim; reproduce their form so they still self-verify.
194    fn claim_file_render(&self) -> FileRender {
195        match self
196            .params
197            .get("schema_version")
198            .and_then(|v| v.parse::<u32>().ok())
199        {
200            Some(v) if v >= 3 => FileRender::ContentOnly,
201            _ => FileRender::WithPath,
202        }
203    }
204
205    /// Render the canonical JSON, optionally including the measurement block, with
206    /// files in the requested form. Canonical key order is alphabetical, so
207    /// `measurements` sits between `inputs` and `outputs`; it is emitted only when
208    /// non-empty (a pre-v2 receipt and a measurement-free receipt are byte-identical).
209    fn push_canonical(&self, include_measurements: bool, files: FileRender) -> String {
210        let mut out = String::new();
211        out.push('{');
212        out.push_str("\"inputs\":");
213        push_files(&mut out, &self.inputs, files);
214        if include_measurements && !self.measurements.is_empty() {
215            out.push_str(",\"measurements\":");
216            push_string_map(&mut out, &self.measurements);
217        }
218        out.push_str(",\"outputs\":");
219        push_files(&mut out, &self.outputs, files);
220        out.push_str(",\"params\":");
221        push_string_map(&mut out, &self.params);
222        out.push_str(",\"subcommand\":\"");
223        out.push_str(&json_escape(&self.subcommand));
224        out.push_str("\",\"tool_version\":\"");
225        out.push_str(&json_escape(&self.tool_version));
226        out.push_str("\"}");
227        out
228    }
229
230    /// Parse a manifest from its canonical JSON form (the exact shape
231    /// `to_canonical_json` emits; all values are strings). A small hand-parser —
232    /// no general JSON dependency. Round-trips with `to_canonical_json`.
233    pub fn from_canonical_json(s: &str) -> Result<RunManifest, ManifestError> {
234        let mut p = Parser {
235            b: s.as_bytes(),
236            i: 0,
237        };
238        p.parse_manifest()
239    }
240
241    /// BLAKE3 hex of the **claim** canonical JSON with the self-hash excluded — the
242    /// content this manifest commits to. Excludes the machine-dependent measurement
243    /// block (so the measured cost does not perturb it) and, for schema-3 receipts,
244    /// recorded paths (so it is a cross-machine content-address). Deterministic;
245    /// `verify` re-derives it.
246    pub fn content_hash(&self) -> String {
247        let mut m = self.clone();
248        m.params.remove("manifest_blake3");
249        blake3_hex(m.to_canonical_claim_json().as_bytes())
250    }
251
252    /// BLAKE3 hex of the measurement block with `measurement_blake3` excluded — a
253    /// LOCAL attestation of the measured cost. Not cross-machine stable by design
254    /// (it hashes machine-dependent numbers), so it lives inside the measurement
255    /// block rather than the claim.
256    pub fn measurement_hash(&self) -> String {
257        let mut m = self.measurements.clone();
258        m.remove("measurement_blake3");
259        let mut s = String::new();
260        push_string_map(&mut s, &m);
261        blake3_hex(s.as_bytes())
262    }
263
264    /// Look up a recorded value by key, checking `measurements` then `params`. Lets
265    /// `verify` read v2 receipts (measured fields in `measurements`) and pre-v2
266    /// receipts (everything in `params`) uniformly.
267    pub fn get_recorded(&self, key: &str) -> Option<&String> {
268        self.measurements.get(key).or_else(|| self.params.get(key))
269    }
270
271    /// Whether the (hash-protected) claim records that a measurement block exists.
272    /// `verify` uses this to detect a measurement block stripped after the run — a
273    /// claim that says `has_measurements` paired with a receipt that has none.
274    pub fn claims_measurements(&self) -> bool {
275        self.params
276            .get("has_measurements")
277            .map(|v| v == "true")
278            .unwrap_or(false)
279    }
280
281    /// Check the recorded `code_git_sha` against an expected commit (prefix match, so
282    /// short SHAs work). Returns the problems found: a mismatch, a clean match from a
283    /// DIRTY tree (not reproducible from a SHA alone), or an inability to check (no /
284    /// `unknown` SHA). An empty vec means a clean, matching build.
285    pub fn check_expected_code(&self, expected: &str) -> Vec<String> {
286        // Reject a degenerate expected SHA up front: an empty / too-short / non-hex
287        // value would match loosely (or vacuously) via `starts_with` and give false
288        // confidence — a scripted `--expect-code "$MAYBE_EMPTY"` must fail, not pass.
289        if expected.len() < 7 || !expected.chars().all(|c| c.is_ascii_hexdigit()) {
290            return vec![format!(
291                "invalid --expect-code {expected:?}: expected a hex commit SHA of at least 7 chars"
292            )];
293        }
294        match self.params.get("code_git_sha").map(String::as_str) {
295            None => vec![
296                "cannot check --expect-code: the receipt records no code_git_sha (a pre-P0.3 receipt)"
297                    .to_string(),
298            ],
299            Some("unknown") => vec![
300                "cannot check --expect-code: the receipt's code_git_sha is 'unknown' (a non-git build)"
301                    .to_string(),
302            ],
303            Some(sha) if !sha.starts_with(expected) => vec![format!(
304                "code mismatch: receipt was built from {sha}, expected {expected}"
305            )],
306            Some(_) => {
307                if self.params.get("code_dirty").map(String::as_str) == Some("true") {
308                    vec![format!(
309                        "code matches {expected} but the receipt was built from a DIRTY tree \
310                         (uncommitted changes) — not reproducible from a commit SHA alone"
311                    )]
312                } else {
313                    Vec::new()
314                }
315            }
316        }
317    }
318
319    /// Seal the receipt: partition measured fields out of the claim, stamp the
320    /// measurement hash, the schema version, then the claim self-hash LAST (so it
321    /// covers every other claim field, including the version). Idempotent.
322    pub fn finalize(&mut self) {
323        // 1. Partition: relocate machine-dependent fields OUT of the claim so the
324        //    measured cost no longer perturbs the claim hash.
325        for key in MEASUREMENT_KEYS {
326            if let Some(v) = self.params.remove(*key) {
327                self.measurements.insert((*key).to_string(), v);
328            }
329        }
330        // 2. Local measurement attestation (only when a measurement exists), plus a
331        //    DETERMINISTIC marker in the claim recording that a block exists. The
332        //    marker is covered by the claim self-hash and is identical for the same
333        //    logical run on any machine (unlike `measurement_blake3`, which hashes
334        //    machine-dependent numbers and so cannot live in the claim). It lets
335        //    `verify` catch a measurement block stripped to evade the cost checks:
336        //    dropping the block leaves the claim asserting one must exist.
337        if !self.measurements.is_empty() {
338            let mh = self.measurement_hash();
339            self.measurements
340                .insert("measurement_blake3".to_string(), mh);
341            self.params
342                .insert("has_measurements".to_string(), "true".to_string());
343        }
344        // 3. Build-identity (baked at compile time by build.rs) — part of the claim,
345        //    so it is committed to by the self-hash and forms the reproduction key:
346        //    exactly which code, toolchain, and deps produced this run.
347        for (k, v) in build_identity_pairs() {
348            self.params.insert(k.to_string(), v);
349        }
350        // 4. Stamp the version into the claim, then the claim self-hash last.
351        self.params.insert(
352            "schema_version".to_string(),
353            MANIFEST_SCHEMA_VERSION.to_string(),
354        );
355        let h = self.content_hash();
356        self.params.insert("manifest_blake3".to_string(), h);
357    }
358
359    /// `Some(true)`/`Some(false)` if a claim self-hash is recorded and matches /
360    /// mismatches; `None` if none is recorded (a pre-1.2 receipt).
361    pub fn self_hash_ok(&self) -> Option<bool> {
362        self.params
363            .get("manifest_blake3")
364            .map(|recorded| *recorded == self.content_hash())
365    }
366
367    /// `Some(true)`/`Some(false)` if a measurement self-hash is recorded and matches /
368    /// mismatches; `None` if none is recorded (no measurement block, or a pre-v2 receipt).
369    pub fn measurement_hash_ok(&self) -> Option<bool> {
370        self.measurements
371            .get("measurement_blake3")
372            .map(|recorded| *recorded == self.measurement_hash())
373    }
374}
375
376/// Minimal recursive parser for the fixed canonical-manifest shape. Every value
377/// is a JSON string (inputs/outputs are arrays of `{blake3, path}` objects;
378/// params is an object of string→string), so the parser only needs strings,
379/// arrays, and objects — no numbers/bools/null.
380struct Parser<'a> {
381    b: &'a [u8],
382    i: usize,
383}
384
385impl Parser<'_> {
386    fn err(&self, m: &str) -> ManifestError {
387        ManifestError(format!("{m} at byte {}", self.i))
388    }
389
390    fn expect(&mut self, c: u8) -> Result<(), ManifestError> {
391        if self.i < self.b.len() && self.b[self.i] == c {
392            self.i += 1;
393            Ok(())
394        } else {
395            Err(self.err(&format!("expected '{}'", c as char)))
396        }
397    }
398
399    fn parse_string(&mut self) -> Result<String, ManifestError> {
400        self.expect(b'"')?;
401        let mut buf: Vec<u8> = Vec::new();
402        while self.i < self.b.len() {
403            let c = self.b[self.i];
404            self.i += 1;
405            match c {
406                b'"' => {
407                    return String::from_utf8(buf).map_err(|_| self.err("invalid utf-8"));
408                }
409                b'\\' => {
410                    let e = *self
411                        .b
412                        .get(self.i)
413                        .ok_or_else(|| self.err("trailing escape"))?;
414                    self.i += 1;
415                    match e {
416                        b'"' => buf.push(b'"'),
417                        b'\\' => buf.push(b'\\'),
418                        b'n' => buf.push(b'\n'),
419                        b'r' => buf.push(b'\r'),
420                        b't' => buf.push(b'\t'),
421                        b'u' => {
422                            let hex = self
423                                .b
424                                .get(self.i..self.i + 4)
425                                .ok_or_else(|| self.err("short \\u"))?;
426                            let cp = u32::from_str_radix(
427                                std::str::from_utf8(hex).map_err(|_| self.err("bad \\u"))?,
428                                16,
429                            )
430                            .map_err(|_| self.err("bad \\u"))?;
431                            let ch = char::from_u32(cp).ok_or_else(|| self.err("bad codepoint"))?;
432                            let mut tmp = [0u8; 4];
433                            buf.extend_from_slice(ch.encode_utf8(&mut tmp).as_bytes());
434                            self.i += 4;
435                        }
436                        _ => return Err(self.err("bad escape")),
437                    }
438                }
439                _ => buf.push(c),
440            }
441        }
442        Err(self.err("unterminated string"))
443    }
444
445    fn expect_key(&mut self, key: &str) -> Result<(), ManifestError> {
446        let k = self.parse_string()?;
447        if k != key {
448            return Err(self.err(&format!("expected key \"{key}\", got \"{k}\"")));
449        }
450        self.expect(b':')
451    }
452
453    fn parse_file_array(&mut self) -> Result<Vec<FileHash>, ManifestError> {
454        self.expect(b'[')?;
455        let mut out = Vec::new();
456        if self.i < self.b.len() && self.b[self.i] == b']' {
457            self.i += 1;
458            return Ok(out);
459        }
460        loop {
461            self.expect(b'{')?;
462            self.expect_key("blake3")?;
463            let blake3 = self.parse_string()?;
464            self.expect(b',')?;
465            self.expect_key("path")?;
466            let path = self.parse_string()?;
467            self.expect(b'}')?;
468            out.push(FileHash { path, blake3 });
469            match self.b.get(self.i) {
470                Some(b',') => self.i += 1,
471                Some(b']') => {
472                    self.i += 1;
473                    break;
474                }
475                _ => return Err(self.err("expected ',' or ']' in array")),
476            }
477        }
478        Ok(out)
479    }
480
481    fn parse_params(&mut self) -> Result<BTreeMap<String, String>, ManifestError> {
482        self.expect(b'{')?;
483        let mut map = BTreeMap::new();
484        if self.i < self.b.len() && self.b[self.i] == b'}' {
485            self.i += 1;
486            return Ok(map);
487        }
488        loop {
489            let k = self.parse_string()?;
490            self.expect(b':')?;
491            let v = self.parse_string()?;
492            // Reject duplicate keys: a crafted receipt with two values for one key
493            // could otherwise show one value to a human reader while `verify` (and
494            // `get_recorded`) act on the other.
495            if map.insert(k.clone(), v).is_some() {
496                return Err(self.err(&format!("duplicate key \"{k}\"")));
497            }
498            match self.b.get(self.i) {
499                Some(b',') => self.i += 1,
500                Some(b'}') => {
501                    self.i += 1;
502                    break;
503                }
504                _ => return Err(self.err("expected ',' or '}' in object")),
505            }
506        }
507        Ok(map)
508    }
509
510    fn parse_manifest(&mut self) -> Result<RunManifest, ManifestError> {
511        self.expect(b'{')?;
512        self.expect_key("inputs")?;
513        let inputs = self.parse_file_array()?;
514        self.expect(b',')?;
515        // `measurements` is optional: absent in pre-v2 receipts and measurement-free
516        // runs. Read the next key and branch on whether it is the measurement block.
517        let key = self.parse_string()?;
518        self.expect(b':')?;
519        let (measurements, outputs) = if key == "measurements" {
520            let m = self.parse_params()?;
521            self.expect(b',')?;
522            self.expect_key("outputs")?;
523            (m, self.parse_file_array()?)
524        } else if key == "outputs" {
525            (BTreeMap::new(), self.parse_file_array()?)
526        } else {
527            return Err(self.err(&format!(
528                "expected \"measurements\" or \"outputs\", got \"{key}\""
529            )));
530        };
531        self.expect(b',')?;
532        self.expect_key("params")?;
533        let params = self.parse_params()?;
534        self.expect(b',')?;
535        self.expect_key("subcommand")?;
536        let subcommand = self.parse_string()?;
537        self.expect(b',')?;
538        self.expect_key("tool_version")?;
539        let tool_version = self.parse_string()?;
540        self.expect(b'}')?;
541        Ok(RunManifest {
542            tool_version,
543            subcommand,
544            inputs,
545            params,
546            outputs,
547            measurements,
548        })
549    }
550}
551
552/// Render a file array in the requested form: `[{"blake3","path"}]` sorted by path
553/// (on-disk), or `["<blake3>",…]` sorted by blake3 (the content-only claim).
554fn push_files(out: &mut String, files: &[FileHash], render: FileRender) {
555    match render {
556        FileRender::WithPath => push_file_hashes(out, files),
557        FileRender::ContentOnly => push_blake3_list(out, files),
558    }
559}
560
561/// Render `["<blake3>",…]`, sorted by digest (a content multiset — duplicates kept).
562fn push_blake3_list(out: &mut String, files: &[FileHash]) {
563    let mut digests: Vec<&str> = files.iter().map(|f| f.blake3.as_str()).collect();
564    digests.sort_unstable();
565    out.push('[');
566    for (i, d) in digests.iter().enumerate() {
567        if i > 0 {
568            out.push(',');
569        }
570        out.push('"');
571        out.push_str(&json_escape(d));
572        out.push('"');
573    }
574    out.push(']');
575}
576
577/// Render a `[{"blake3":..,"path":..}, ..]` array, entries sorted by path.
578fn push_file_hashes(out: &mut String, files: &[FileHash]) {
579    let mut sorted: Vec<&FileHash> = files.iter().collect();
580    sorted.sort_by(|a, b| a.path.cmp(&b.path));
581    out.push('[');
582    for (i, f) in sorted.iter().enumerate() {
583        if i > 0 {
584            out.push(',');
585        }
586        out.push_str("{\"blake3\":\"");
587        out.push_str(&json_escape(&f.blake3));
588        out.push_str("\",\"path\":\"");
589        out.push_str(&json_escape(&f.path));
590        out.push_str("\"}");
591    }
592    out.push(']');
593}
594
595/// Render a `{"k":"v",…}` object, entries in the map's (sorted) key order. Shared by
596/// the `params` and `measurements` blocks (both are `string → string`).
597fn push_string_map(out: &mut String, map: &BTreeMap<String, String>) {
598    out.push('{');
599    for (i, (k, v)) in map.iter().enumerate() {
600        if i > 0 {
601            out.push(',');
602        }
603        out.push('"');
604        out.push_str(&json_escape(k));
605        out.push_str("\":\"");
606        out.push_str(&json_escape(v));
607        out.push('"');
608    }
609    out.push('}');
610}
611
612/// Minimal RFC-8259 string escaping for the characters we can encounter.
613fn json_escape(s: &str) -> String {
614    let mut out = String::with_capacity(s.len());
615    for c in s.chars() {
616        match c {
617            '"' => out.push_str("\\\""),
618            '\\' => out.push_str("\\\\"),
619            '\n' => out.push_str("\\n"),
620            '\r' => out.push_str("\\r"),
621            '\t' => out.push_str("\\t"),
622            c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)),
623            c => out.push(c),
624        }
625    }
626    out
627}
628
629/// BLAKE3 hex digest of a byte slice.
630pub fn blake3_hex(bytes: &[u8]) -> String {
631    blake3::hash(bytes).to_hex().to_string()
632}
633
634/// BLAKE3 hex digest of a file's contents, streamed in fixed-size chunks
635/// (bounded memory regardless of file size).
636pub fn blake3_file(path: &Path) -> io::Result<String> {
637    let mut hasher = blake3::Hasher::new();
638    let mut file = std::fs::File::open(path)?;
639    let mut buf = [0u8; 64 * 1024];
640    loop {
641        let n = file.read(&mut buf)?;
642        if n == 0 {
643            break;
644        }
645        hasher.update(&buf[..n]);
646    }
647    Ok(hasher.finalize().to_hex().to_string())
648}
649
650/// Options for [`verify_receipt`] beyond the receipt text itself.
651#[derive(Debug, Default)]
652pub struct VerifyOpts {
653    /// Budget (MiB) to check the recorded peak against (overrides the recorded one).
654    pub budget_mb: Option<u64>,
655    /// Assert the receipt was built from exactly this commit SHA (prefix match).
656    pub expect_code: Option<String>,
657    /// Re-hash the files at the recorded input/output paths and check their digests.
658    pub rehash_files: bool,
659}
660
661/// The outcome of [`verify_receipt`]: failures (empty == ok), informational notes, and
662/// the parsed manifest (when parsing succeeded).
663#[derive(Debug)]
664pub struct VerifyReport {
665    /// Whether the receipt passed every check.
666    pub ok: bool,
667    /// Human-readable failures (empty when `ok`).
668    pub problems: Vec<String>,
669    /// Informational, non-failing notes (e.g. "peak within budget").
670    pub notes: Vec<String>,
671    /// The parsed manifest, when parsing succeeded.
672    pub manifest: Option<RunManifest>,
673}
674
675/// Check a receipt's internal integrity — self-hashes, cross-field consistency, optional
676/// budget + expected-code — and, when `opts.rehash_files`, re-hash recorded files. The
677/// single source of truth shared by the `verify` CLI and `reproduce` (and a future WASM
678/// verifier) so they cannot drift.
679pub fn verify_receipt(text: &str, opts: &VerifyOpts) -> VerifyReport {
680    let manifest = match RunManifest::from_canonical_json(text) {
681        Ok(m) => m,
682        Err(e) => {
683            return VerifyReport {
684                ok: false,
685                problems: vec![format!("parse error: {e}")],
686                notes: Vec::new(),
687                manifest: None,
688            }
689        }
690    };
691    let mut problems: Vec<String> = Vec::new();
692    let mut notes: Vec<String> = Vec::new();
693
694    // Re-hash inputs + outputs against the recorded digests (CLI sets this).
695    if opts.rehash_files {
696        for (kind, files) in [("input", &manifest.inputs), ("output", &manifest.outputs)] {
697            for f in files {
698                match blake3_file(Path::new(&f.path)) {
699                    Ok(h) if h == f.blake3 => {}
700                    Ok(h) => problems.push(format!(
701                        "{kind} {} hash mismatch: recorded {}, now {}",
702                        f.path, f.blake3, h
703                    )),
704                    Err(e) => problems.push(format!("{kind} {} unreadable: {e}", f.path)),
705                }
706            }
707        }
708    }
709
710    // Strictly parse the numeric fields. A PRESENT-but-unparseable field is corruption,
711    // not absence. `get_recorded` reads measurements (v2) or params (pre-v2) uniformly.
712    let parse_num = |k: &str, problems: &mut Vec<String>| -> Option<u64> {
713        match manifest.get_recorded(k) {
714            None => None,
715            Some(v) => match v.parse::<u64>() {
716                Ok(n) => Some(n),
717                Err(_) => {
718                    problems.push(format!("malformed numeric field {k}: {v:?}"));
719                    None
720                }
721            },
722        }
723    };
724    let recorded_peak = parse_num("peak_rss_bytes", &mut problems);
725    let recorded_ws = parse_num("max_working_set_bytes", &mut problems);
726    let recorded_budget = parse_num("memory_budget_mb", &mut problems);
727
728    // Re-check the recorded realized peak against the budget (CLI overrides manifest).
729    let budget_mb = opts.budget_mb.or(recorded_budget);
730    match (budget_mb, recorded_peak) {
731        (Some(mb), Some(peak)) => {
732            if peak <= mb.saturating_mul(1024 * 1024) {
733                notes.push(format!(
734                    "peak {} MiB within budget {mb} MiB",
735                    peak / (1 << 20)
736                ));
737            } else {
738                problems.push(format!(
739                    "recorded peak {} MiB exceeded budget {mb} MiB",
740                    peak / (1 << 20)
741                ));
742            }
743        }
744        (None, _) => notes.push("no budget to check (none supplied or recorded)".to_string()),
745        (Some(_), None) => problems.push("manifest has no recorded peak_rss_bytes".to_string()),
746    }
747
748    // Internal-consistency cross-checks: the working set cannot exceed peak RSS.
749    if let (Some(ws), Some(peak)) = (recorded_ws, recorded_peak) {
750        if ws > peak {
751            problems.push(format!(
752                "internally inconsistent: max_working_set_bytes ({ws}) exceeds peak_rss_bytes ({peak})"
753            ));
754        }
755    }
756    // A recorded verdict must agree with the recorded peak vs the recorded budget.
757    if let (Some(verdict), Some(mb), Some(peak)) = (
758        manifest
759            .get_recorded("contract_verdict")
760            .map(String::as_str),
761        recorded_budget,
762        recorded_peak,
763    ) {
764        let actually_within = peak <= mb.saturating_mul(1024 * 1024);
765        if verdict == "within" && !actually_within {
766            problems.push(format!(
767                "internally inconsistent: contract_verdict='within' but recorded peak {} MiB \
768                 exceeds recorded budget {mb} MiB",
769                peak / (1 << 20)
770            ));
771        }
772        if verdict == "over" && actually_within {
773            problems.push(format!(
774                "internally inconsistent: contract_verdict='over' but recorded peak {} MiB \
775                 is within recorded budget {mb} MiB",
776                peak / (1 << 20)
777            ));
778        }
779    }
780
781    // Claim self-hash: catches any post-write edit to the claim. A pre-1.2 receipt has none.
782    match manifest.self_hash_ok() {
783        Some(true) => {}
784        Some(false) => problems.push(
785            "manifest_blake3 mismatch: the receipt was modified after it was written".to_string(),
786        ),
787        None => {
788            notes.push("no manifest_blake3 (a pre-1.2 receipt); skipping self-hash".to_string())
789        }
790    }
791
792    // Measurement self-hash: catches an edit to a measured field the claim hash can't see.
793    match manifest.measurement_hash_ok() {
794        Some(true) => {}
795        Some(false) => problems.push(
796            "measurement_blake3 mismatch: a measured field was modified after the run".to_string(),
797        ),
798        None => {
799            if manifest.claims_measurements() {
800                problems.push(
801                    "measurement block missing: the claim records a measurement block but \
802                     the receipt has none (stripped after the run?)"
803                        .to_string(),
804                );
805            }
806        }
807    }
808
809    // Build-identity: assert the receipt came from exactly the expected commit.
810    if let Some(expected) = &opts.expect_code {
811        let code_problems = manifest.check_expected_code(expected);
812        if code_problems.is_empty() {
813            notes.push(format!("code matches {expected} (clean build)"));
814        } else {
815            problems.extend(code_problems);
816        }
817    }
818
819    VerifyReport {
820        ok: problems.is_empty(),
821        notes,
822        manifest: Some(manifest),
823        problems,
824    }
825}
826
827/// Write `<output_path>.manifest.json` next to the output, returning its path.
828pub fn write_manifest(output_path: &Path, manifest: &RunManifest) -> io::Result<PathBuf> {
829    let mut name = output_path.as_os_str().to_os_string();
830    name.push(".manifest.json");
831    let manifest_path = PathBuf::from(name);
832    let mut file = std::fs::File::create(&manifest_path)?;
833    file.write_all(manifest.to_canonical_json().as_bytes())?;
834    file.flush()?;
835    Ok(manifest_path)
836}
837
838/// The outcome of a pure, file-free receipt check (the kind a browser verifier runs):
839/// it re-derives the claim self-hash and the measurement hash from the JSON alone, with
840/// no access to the original inputs/outputs.
841#[derive(Debug, Clone, PartialEq, Eq)]
842pub enum ReceiptVerdict {
843    /// The claim self-hash (and measurement hash, if present) re-derive and match.
844    Verified,
845    /// A hash did not re-derive — the receipt was edited after it was written.
846    Tampered,
847    /// Parsed, but carries no `manifest_blake3` to check (a pre-self-hash receipt).
848    Unverifiable,
849    /// Not a parseable canonical run manifest.
850    Unparseable,
851}
852
853/// A file-free verification of a receipt's own integrity (self-hash + measurement hash).
854#[derive(Debug, Clone)]
855pub struct ReceiptCheck {
856    /// The overall verdict.
857    pub verdict: ReceiptVerdict,
858    /// `Some(true/false)` once a `manifest_blake3` is present; `None` if absent.
859    pub self_hash_ok: Option<bool>,
860    /// `Some(true/false)` if a measurement block + `measurement_blake3` are present.
861    pub measurement_hash_ok: Option<bool>,
862    /// The recorded schema version, if parseable.
863    pub schema_version: Option<u32>,
864    /// The recorded subcommand, if present.
865    pub subcommand: Option<String>,
866    /// A short human-readable explanation of the verdict.
867    pub detail: String,
868}
869
870/// Verify a receipt's integrity from its JSON text alone — no inputs/outputs needed.
871///
872/// This is the tamper-evidence check (`manifest_blake3` over the canonical claim, plus
873/// the independent `measurement_blake3`) — what `rosalind verify` runs over a receipt's
874/// recorded hashes, minus the file re-hashing. It is what the in-browser verifier calls.
875pub fn verify_manifest_str(json: &str) -> ReceiptCheck {
876    let m = match RunManifest::from_canonical_json(json) {
877        Ok(m) => m,
878        Err(e) => {
879            return ReceiptCheck {
880                verdict: ReceiptVerdict::Unparseable,
881                self_hash_ok: None,
882                measurement_hash_ok: None,
883                schema_version: None,
884                subcommand: None,
885                detail: format!("not a parseable canonical run manifest: {}", e.0),
886            }
887        }
888    };
889
890    let self_hash_ok = m.self_hash_ok();
891    let measurement_hash_ok = m.measurement_hash_ok();
892    let schema_version = m
893        .params
894        .get("schema_version")
895        .and_then(|s| s.parse::<u32>().ok());
896    let subcommand = Some(m.subcommand.clone());
897
898    let (verdict, detail) = if self_hash_ok == Some(false) {
899        (
900            ReceiptVerdict::Tampered,
901            "the claim self-hash (manifest_blake3) does not re-derive — the receipt was edited after it was written".to_string(),
902        )
903    } else if measurement_hash_ok == Some(false) {
904        (
905            ReceiptVerdict::Tampered,
906            "the measurement block's measurement_blake3 does not re-derive — the recorded cost was altered".to_string(),
907        )
908    } else if self_hash_ok == Some(true) {
909        (
910            ReceiptVerdict::Verified,
911            "the claim self-hash re-derives and matches — the receipt is intact".to_string(),
912        )
913    } else {
914        (
915            ReceiptVerdict::Unverifiable,
916            "the receipt carries no manifest_blake3 to check (a pre-self-hash receipt)".to_string(),
917        )
918    };
919
920    ReceiptCheck {
921        verdict,
922        self_hash_ok,
923        measurement_hash_ok,
924        schema_version,
925        subcommand,
926        detail,
927    }
928}
929
930#[cfg(test)]
931mod tests {
932    use super::*;
933
934    #[test]
935    fn verify_manifest_str_reports_verified_for_a_finalized_receipt() {
936        let mut m = RunManifest::new("variants");
937        m.finalize();
938        let json = m.to_canonical_json();
939        let check = verify_manifest_str(&json);
940        assert_eq!(
941            check.verdict,
942            ReceiptVerdict::Verified,
943            "a fresh finalized receipt should verify; detail: {}",
944            check.detail
945        );
946        assert_eq!(check.self_hash_ok, Some(true));
947    }
948
949    #[test]
950    fn verify_manifest_str_reports_tampered_when_a_claim_byte_is_flipped() {
951        let mut m = RunManifest::new("variants");
952        m.finalize();
953        let json = m.to_canonical_json();
954        // Edit a claim field (the subcommand) without recomputing the self-hash —
955        // exactly what a human editing the .manifest.json by hand does.
956        let tampered = json.replace("\"variants\"", "\"variantz\"");
957        assert_ne!(tampered, json, "the tamper must change the JSON");
958        let check = verify_manifest_str(&tampered);
959        assert_eq!(
960            check.verdict,
961            ReceiptVerdict::Tampered,
962            "an edited claim must be caught; detail: {}",
963            check.detail
964        );
965        assert_eq!(check.self_hash_ok, Some(false));
966    }
967
968    #[test]
969    fn verify_manifest_str_reports_unparseable_for_non_manifest_json() {
970        let check = verify_manifest_str("this is not a receipt");
971        assert_eq!(check.verdict, ReceiptVerdict::Unparseable);
972    }
973
974    #[test]
975    fn verify_manifest_str_reports_tampered_when_the_measurement_block_is_edited() {
976        let mut m = RunManifest::new("variants");
977        m.record_measurement("peak_rss_bytes", "12345");
978        m.finalize();
979        let json = m.to_canonical_json();
980        // Edit the recorded peak inside the measurement block (not the claim).
981        let tampered = json.replace("12345", "99999");
982        assert_ne!(tampered, json, "the tamper must change the JSON");
983        let check = verify_manifest_str(&tampered);
984        assert_eq!(
985            check.verdict,
986            ReceiptVerdict::Tampered,
987            "an edited measurement must be caught; detail: {}",
988            check.detail
989        );
990        assert_eq!(check.measurement_hash_ok, Some(false));
991        // The claim self-hash is untouched — measurements are excluded from the claim.
992        assert_eq!(check.self_hash_ok, Some(true));
993    }
994
995    #[test]
996    fn verify_manifest_str_reports_unverifiable_without_a_self_hash() {
997        // A manifest serialized without finalize() carries no manifest_blake3.
998        let m = RunManifest::new("variants");
999        let json = m.to_canonical_json();
1000        let check = verify_manifest_str(&json);
1001        assert_eq!(check.verdict, ReceiptVerdict::Unverifiable);
1002        assert_eq!(check.self_hash_ok, None);
1003    }
1004
1005    #[test]
1006    fn verify_receipt_reports_a_tampered_claim() {
1007        let mut m = RunManifest::new("variants");
1008        m.inputs.push(FileHash {
1009            path: "a".into(),
1010            blake3: "aa".into(),
1011        });
1012        m.finalize();
1013        // Flip a byte of a recorded input digest without re-sealing — the claim
1014        // self-hash must catch it.
1015        let text = m.to_canonical_json().replace("\"aa\"", "\"ab\"");
1016        let report = verify_receipt(&text, &VerifyOpts::default());
1017        assert!(!report.ok, "tampered claim must not verify");
1018        assert!(
1019            report
1020                .problems
1021                .iter()
1022                .any(|p| p.contains("manifest_blake3")),
1023            "{:?}",
1024            report.problems
1025        );
1026    }
1027
1028    #[test]
1029    fn verify_receipt_passes_a_clean_receipt() {
1030        let mut m = RunManifest::new("variants");
1031        m.finalize();
1032        let report = verify_receipt(&m.to_canonical_json(), &VerifyOpts::default());
1033        assert!(report.ok, "{:?}", report.problems);
1034    }
1035
1036    #[test]
1037    fn blake3_is_deterministic_and_sensitive() {
1038        assert_eq!(blake3_hex(b"abc"), blake3_hex(b"abc"));
1039        assert_ne!(blake3_hex(b"abc"), blake3_hex(b"abd"));
1040        // Known BLAKE3 vector for the empty input.
1041        assert_eq!(
1042            blake3_hex(b""),
1043            "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"
1044        );
1045        assert_eq!(blake3_hex(b"abc").len(), 64);
1046    }
1047
1048    #[test]
1049    fn canonical_json_has_sorted_keys_and_is_exact() {
1050        let mut m = RunManifest::new("variants");
1051        m.tool_version = "0.1.0".to_string();
1052        m.inputs.push(FileHash {
1053            path: "ref.fa".to_string(),
1054            blake3: "aa".to_string(),
1055        });
1056        m.outputs.push(FileHash {
1057            path: "out.vcf".to_string(),
1058            blake3: "bb".to_string(),
1059        });
1060        m.params.insert("min_qual".to_string(), "30".to_string());
1061        m.params.insert("min_depth".to_string(), "8".to_string());
1062
1063        let json = m.to_canonical_json();
1064        assert_eq!(
1065            json,
1066            r#"{"inputs":[{"blake3":"aa","path":"ref.fa"}],"outputs":[{"blake3":"bb","path":"out.vcf"}],"params":{"min_depth":"8","min_qual":"30"},"subcommand":"variants","tool_version":"0.1.0"}"#
1067        );
1068    }
1069
1070    #[test]
1071    fn canonical_json_is_order_independent() {
1072        let mk = |order_swapped: bool| {
1073            let mut m = RunManifest::new("variants");
1074            m.tool_version = "0.1.0".to_string();
1075            let a = FileHash {
1076                path: "a.fa".to_string(),
1077                blake3: "1".to_string(),
1078            };
1079            let b = FileHash {
1080                path: "b.fa".to_string(),
1081                blake3: "2".to_string(),
1082            };
1083            if order_swapped {
1084                m.inputs.push(b);
1085                m.inputs.push(a);
1086            } else {
1087                m.inputs.push(a);
1088                m.inputs.push(b);
1089            }
1090            m.to_canonical_json()
1091        };
1092        // Inputs are sorted by path in the canonical form → order-independent.
1093        assert_eq!(mk(false), mk(true));
1094    }
1095
1096    #[test]
1097    fn json_escapes_special_characters() {
1098        let mut m = RunManifest::new("variants");
1099        m.tool_version = "0.1.0".to_string();
1100        m.params.insert("note".to_string(), "a\"b\\c".to_string());
1101        let json = m.to_canonical_json();
1102        assert!(json.contains(r#""note":"a\"b\\c""#));
1103    }
1104
1105    #[test]
1106    fn write_manifest_emits_sidecar_file() {
1107        let dir =
1108            std::env::temp_dir().join(format!("rosalind_manifest_test_{}", std::process::id()));
1109        std::fs::create_dir_all(&dir).unwrap();
1110        let out = dir.join("calls.vcf");
1111        std::fs::write(&out, b"##fileformat=VCFv4.2\n").unwrap();
1112
1113        let mut m = RunManifest::new("variants");
1114        m.tool_version = "0.1.0".to_string();
1115        m.outputs.push(FileHash {
1116            path: out.display().to_string(),
1117            blake3: blake3_file(&out).unwrap(),
1118        });
1119
1120        let manifest_path = write_manifest(&out, &m).unwrap();
1121        assert_eq!(manifest_path, dir.join("calls.vcf.manifest.json"));
1122        let written = std::fs::read_to_string(&manifest_path).unwrap();
1123        assert_eq!(written, m.to_canonical_json());
1124
1125        std::fs::remove_dir_all(&dir).ok();
1126    }
1127
1128    #[test]
1129    fn parse_round_trips_canonical_json_including_escapes() {
1130        let mut m = RunManifest::new("variants");
1131        m.tool_version = "9.9.9".to_string();
1132        m.inputs.push(FileHash {
1133            path: "weird \"path\"\twith\\escapes/和.fa".to_string(),
1134            blake3: "aa".to_string(),
1135        });
1136        m.inputs.push(FileHash {
1137            path: "a.idx".to_string(),
1138            blake3: "bb".to_string(),
1139        });
1140        m.outputs.push(FileHash {
1141            path: "out.vcf".to_string(),
1142            blake3: "cc".to_string(),
1143        });
1144        m.params
1145            .insert("contract_verdict".to_string(), "within".to_string());
1146        m.params
1147            .insert("peak_rss_bytes".to_string(), "12345".to_string());
1148        m.params
1149            .insert("note".to_string(), "line1\nline2".to_string());
1150
1151        let json = m.to_canonical_json();
1152        let parsed = RunManifest::from_canonical_json(&json).expect("parse");
1153        // serialize → parse → serialize is the identity on the canonical form.
1154        assert_eq!(parsed.to_canonical_json(), json);
1155        assert_eq!(parsed.tool_version, "9.9.9");
1156        assert_eq!(parsed.subcommand, "variants");
1157        assert_eq!(parsed.params.get("note").unwrap(), "line1\nline2");
1158        assert_eq!(parsed.params.get("contract_verdict").unwrap(), "within");
1159    }
1160
1161    #[test]
1162    fn parse_rejects_malformed() {
1163        assert!(RunManifest::from_canonical_json("not json").is_err());
1164        assert!(RunManifest::from_canonical_json("{\"inputs\":[}").is_err());
1165    }
1166
1167    #[test]
1168    fn finalize_stamps_schema_version_and_a_matching_self_hash() {
1169        let mut m = RunManifest::new("variants");
1170        m.tool_version = "0.1.0".to_string();
1171        m.params
1172            .insert("peak_rss_bytes".to_string(), "123".to_string());
1173        m.finalize();
1174        assert_eq!(
1175            m.params.get("schema_version").map(String::as_str),
1176            Some(MANIFEST_SCHEMA_VERSION.to_string().as_str())
1177        );
1178        assert!(m.params.contains_key("manifest_blake3"));
1179        assert_eq!(m.self_hash_ok(), Some(true), "fresh finalize must verify");
1180    }
1181
1182    #[test]
1183    fn tampering_any_field_breaks_the_self_hash() {
1184        let mut m = RunManifest::new("variants");
1185        m.tool_version = "0.1.0".to_string();
1186        m.params
1187            .insert("peak_rss_bytes".to_string(), "123".to_string());
1188        m.finalize();
1189        // Flip a field WITHOUT re-finalizing — the recorded hash no longer matches.
1190        m.params
1191            .insert("peak_rss_bytes".to_string(), "999".to_string());
1192        assert_eq!(m.self_hash_ok(), Some(false));
1193    }
1194
1195    #[test]
1196    fn a_manifest_without_a_self_hash_returns_none() {
1197        let mut m = RunManifest::new("variants");
1198        m.tool_version = "0.1.0".to_string();
1199        assert_eq!(m.self_hash_ok(), None);
1200    }
1201
1202    #[test]
1203    fn finalize_is_idempotent() {
1204        let mut m = RunManifest::new("variants");
1205        m.tool_version = "0.1.0".to_string();
1206        m.params
1207            .insert("peak_rss_bytes".to_string(), "123".to_string());
1208        m.finalize();
1209        let first = m.params.get("manifest_blake3").cloned();
1210        m.finalize();
1211        assert_eq!(m.params.get("manifest_blake3").cloned(), first);
1212        assert_eq!(m.self_hash_ok(), Some(true));
1213    }
1214
1215    #[test]
1216    fn claim_hash_is_stable_across_machine_dependent_measurements() {
1217        // Same logical run on two machines: identical claim, different measured cost.
1218        // The claim self-hash must match; the measurement is excluded from it.
1219        let mk = |peak: &str, ws: &str| {
1220            let mut m = RunManifest::new("variants");
1221            m.tool_version = "0.1.0".to_string();
1222            m.inputs.push(FileHash {
1223                path: "ref.fa".to_string(),
1224                blake3: "aa".to_string(),
1225            });
1226            m.outputs.push(FileHash {
1227                path: "out.vcf".to_string(),
1228                blake3: "bb".to_string(),
1229            });
1230            m.params.insert("min_qual".to_string(), "30".to_string());
1231            m.record_measurement("peak_rss_bytes", peak);
1232            m.record_measurement("max_working_set_bytes", ws);
1233            m.finalize();
1234            m
1235        };
1236        let a = mk("1000000", "4096");
1237        let b = mk("9999999", "8192");
1238        assert_eq!(
1239            a.content_hash(),
1240            b.content_hash(),
1241            "measured cost must not change the claim hash"
1242        );
1243        assert_eq!(a.self_hash_ok(), Some(true));
1244        assert_eq!(b.self_hash_ok(), Some(true));
1245        // Differing measurements DO change the measurement hash.
1246        assert_ne!(
1247            a.measurements.get("measurement_blake3"),
1248            b.measurements.get("measurement_blake3")
1249        );
1250    }
1251
1252    #[test]
1253    fn claim_excludes_but_full_form_includes_measurements() {
1254        let mut m = RunManifest::new("variants");
1255        m.tool_version = "0.1.0".to_string();
1256        m.record_measurement("peak_rss_bytes", "123");
1257        m.finalize();
1258        assert!(
1259            !m.to_canonical_claim_json().contains("peak_rss_bytes"),
1260            "claim form must not carry the measurement"
1261        );
1262        assert!(
1263            m.to_canonical_json().contains("peak_rss_bytes"),
1264            "full form must record the measurement"
1265        );
1266    }
1267
1268    #[test]
1269    fn editing_a_measurement_breaks_only_the_measurement_hash() {
1270        let mut m = RunManifest::new("variants");
1271        m.tool_version = "0.1.0".to_string();
1272        m.record_measurement("peak_rss_bytes", "123");
1273        m.finalize();
1274        assert_eq!(m.self_hash_ok(), Some(true));
1275        assert_eq!(m.measurement_hash_ok(), Some(true));
1276        // Lower the recorded peak WITHOUT re-finalizing (a tampered receipt).
1277        m.measurements
1278            .insert("peak_rss_bytes".to_string(), "1".to_string());
1279        assert_eq!(
1280            m.self_hash_ok(),
1281            Some(true),
1282            "claim hash is unaffected by the measurement edit"
1283        );
1284        assert_eq!(
1285            m.measurement_hash_ok(),
1286            Some(false),
1287            "measurement hash must catch the edit"
1288        );
1289    }
1290
1291    #[test]
1292    fn finalize_relocates_measured_keys_out_of_the_claim() {
1293        let mut m = RunManifest::new("variants");
1294        m.tool_version = "0.1.0".to_string();
1295        // Insert measured fields the legacy way (into params); finalize must relocate.
1296        m.params
1297            .insert("peak_rss_bytes".to_string(), "555".to_string());
1298        m.params
1299            .insert("contract_verdict".to_string(), "within".to_string());
1300        m.params.insert("min_qual".to_string(), "30".to_string());
1301        m.finalize();
1302        for k in ["peak_rss_bytes", "contract_verdict"] {
1303            assert!(!m.params.contains_key(k), "{k} must leave the claim");
1304            assert!(
1305                m.measurements.contains_key(k),
1306                "{k} must enter measurements"
1307            );
1308        }
1309        assert!(m.params.contains_key("min_qual"), "claim params stay put");
1310    }
1311
1312    #[test]
1313    fn pre_v2_receipt_with_measurements_in_params_still_verifies() {
1314        // A pre-v2 receipt: peak in params, no measurements key, schema 1, self-hash
1315        // over the params-inclusive claim. It must still self-verify (graceful degrade).
1316        let mut m = RunManifest::new("variants");
1317        m.tool_version = "0.1.0".to_string();
1318        m.params
1319            .insert("peak_rss_bytes".to_string(), "123".to_string());
1320        m.params
1321            .insert("schema_version".to_string(), "1".to_string());
1322        let h = m.content_hash();
1323        m.params.insert("manifest_blake3".to_string(), h);
1324
1325        assert_eq!(m.self_hash_ok(), Some(true));
1326        assert_eq!(m.measurement_hash_ok(), None, "no measurement block in v1");
1327        let json = m.to_canonical_json();
1328        assert!(
1329            !json.contains("\"measurements\""),
1330            "v1 emits no measurements key"
1331        );
1332        let parsed = RunManifest::from_canonical_json(&json).expect("parse v1");
1333        assert!(parsed.measurements.is_empty());
1334        assert_eq!(parsed.self_hash_ok(), Some(true));
1335    }
1336
1337    #[test]
1338    fn v2_receipt_round_trips_through_the_parser() {
1339        let mut m = RunManifest::new("features");
1340        m.tool_version = "9.9.9".to_string();
1341        m.inputs.push(FileHash {
1342            path: "a.idx".to_string(),
1343            blake3: "aa".to_string(),
1344        });
1345        m.outputs.push(FileHash {
1346            path: "out.tsv".to_string(),
1347            blake3: "bb".to_string(),
1348        });
1349        m.params
1350            .insert("feature_rows".to_string(), "42".to_string());
1351        m.record_measurement("peak_rss_bytes", "1000");
1352        m.record_measurement("governor", "enforced");
1353        m.finalize();
1354
1355        let json = m.to_canonical_json();
1356        let parsed = RunManifest::from_canonical_json(&json).expect("parse v2");
1357        assert_eq!(
1358            parsed.to_canonical_json(),
1359            json,
1360            "round-trip is the identity"
1361        );
1362        assert_eq!(
1363            parsed
1364                .measurements
1365                .get("peak_rss_bytes")
1366                .map(String::as_str),
1367            Some("1000")
1368        );
1369        assert_eq!(
1370            parsed.params.get("feature_rows").map(String::as_str),
1371            Some("42")
1372        );
1373        assert_eq!(parsed.self_hash_ok(), Some(true));
1374        assert_eq!(parsed.measurement_hash_ok(), Some(true));
1375    }
1376
1377    #[test]
1378    fn finalize_records_a_claim_marker_only_when_a_measurement_exists() {
1379        // With a measurement: the claim records has_measurements (so stripping the
1380        // block is detectable), and the marker is covered by the claim self-hash.
1381        let mut with = RunManifest::new("variants");
1382        with.tool_version = "0.1.0".to_string();
1383        with.record_measurement("peak_rss_bytes", "123");
1384        with.finalize();
1385        assert!(with.claims_measurements());
1386        assert_eq!(
1387            with.params.get("has_measurements").map(String::as_str),
1388            Some("true")
1389        );
1390        assert_eq!(with.self_hash_ok(), Some(true));
1391
1392        // Without a measurement (e.g. a somatic run): no marker, nothing to strip.
1393        let mut without = RunManifest::new("somatic");
1394        without.tool_version = "0.1.0".to_string();
1395        without.finalize();
1396        assert!(!without.claims_measurements());
1397        assert!(!without.params.contains_key("has_measurements"));
1398    }
1399
1400    #[test]
1401    fn stripping_the_measurement_block_leaves_the_claim_asserting_one_exists() {
1402        // The deletion-bypass guard: clearing the measurement block keeps the claim
1403        // self-hash valid (the claim never carried the block), but the claim still
1404        // records has_measurements while measurement_hash_ok drops to None — the
1405        // exact signal `verify` keys on.
1406        let mut m = RunManifest::new("variants");
1407        m.tool_version = "0.1.0".to_string();
1408        m.record_measurement("peak_rss_bytes", "900000000");
1409        m.finalize();
1410        m.measurements.clear();
1411        assert_eq!(
1412            m.self_hash_ok(),
1413            Some(true),
1414            "claim is intact after stripping"
1415        );
1416        assert_eq!(m.measurement_hash_ok(), None, "no block to hash");
1417        assert!(
1418            m.claims_measurements(),
1419            "the claim still asserts a measurement block must exist"
1420        );
1421    }
1422
1423    #[test]
1424    fn parser_rejects_a_duplicate_key() {
1425        // A crafted receipt cannot carry two values for one key (reader/verifier
1426        // shadowing). `params` and `measurements` both go through `parse_params`.
1427        let dup = r#"{"inputs":[],"outputs":[],"params":{"k":"1","k":"2"},"subcommand":"x","tool_version":"0.1.0"}"#;
1428        assert!(RunManifest::from_canonical_json(dup).is_err());
1429    }
1430
1431    #[test]
1432    fn claim_hash_is_stable_across_machine_dependent_paths() {
1433        // Same content (blake3) at DIFFERENT paths on two machines → SAME claim hash.
1434        // The keystone of P0.2b: paths are dropped from the claim form.
1435        let mk = |idx_path: &str, out_path: &str| {
1436            let mut m = RunManifest::new("variants");
1437            m.tool_version = "0.1.0".to_string();
1438            m.inputs.push(FileHash {
1439                path: idx_path.to_string(),
1440                blake3: "aa".to_string(),
1441            });
1442            m.outputs.push(FileHash {
1443                path: out_path.to_string(),
1444                blake3: "bb".to_string(),
1445            });
1446            m.params.insert("min_qual".to_string(), "30".to_string());
1447            m.finalize();
1448            m
1449        };
1450        let a = mk("/home/alice/ref.idx", "/tmp/run-1/out.vcf");
1451        let b = mk("/data/ref.idx", "out.vcf");
1452        assert_eq!(
1453            a.content_hash(),
1454            b.content_hash(),
1455            "the claim hash must not depend on recorded paths"
1456        );
1457        assert_eq!(a.self_hash_ok(), Some(true));
1458        assert_eq!(b.self_hash_ok(), Some(true));
1459    }
1460
1461    #[test]
1462    fn claim_hash_still_tracks_content() {
1463        // Different content (blake3) → different claim hash (the address is the content).
1464        let mk = |digest: &str| {
1465            let mut m = RunManifest::new("variants");
1466            m.tool_version = "0.1.0".to_string();
1467            m.inputs.push(FileHash {
1468                path: "ref.idx".to_string(),
1469                blake3: digest.to_string(),
1470            });
1471            m.finalize();
1472            m
1473        };
1474        assert_ne!(mk("aa").content_hash(), mk("bb").content_hash());
1475    }
1476
1477    #[test]
1478    fn claim_drops_paths_but_the_on_disk_form_keeps_them() {
1479        let mut m = RunManifest::new("variants");
1480        m.tool_version = "0.1.0".to_string();
1481        m.inputs.push(FileHash {
1482            path: "/home/alice/secret/ref.idx".to_string(),
1483            blake3: "aa".to_string(),
1484        });
1485        m.finalize();
1486        assert!(
1487            !m.to_canonical_claim_json().contains("/home/alice"),
1488            "the claim form must not carry the recorded path"
1489        );
1490        assert!(
1491            m.to_canonical_json().contains("/home/alice"),
1492            "the on-disk form must keep the recorded path"
1493        );
1494        // The content digest is present in BOTH.
1495        assert!(m.to_canonical_claim_json().contains("aa"));
1496    }
1497
1498    #[test]
1499    fn the_schema_gate_keeps_v2_verifying_and_makes_only_v3_path_independent() {
1500        // Seal a receipt at a given schema with a given input path.
1501        let seal = |schema: &str, path: &str| {
1502            let mut m = RunManifest::new("variants");
1503            m.tool_version = "0.1.0".to_string();
1504            m.inputs.push(FileHash {
1505                path: path.to_string(),
1506                blake3: "aa".to_string(),
1507            });
1508            m.params
1509                .insert("schema_version".to_string(), schema.to_string());
1510            let h = m.content_hash();
1511            m.params.insert("manifest_blake3".to_string(), h);
1512            m
1513        };
1514        // Back-compat: the gate reproduces each schema's own claim form, so both
1515        // self-verify — a pre-P0.2b receipt does not break.
1516        assert_eq!(
1517            seal("2", "ref.idx").self_hash_ok(),
1518            Some(true),
1519            "schema-2 must still self-verify"
1520        );
1521        assert_eq!(
1522            seal("3", "ref.idx").self_hash_ok(),
1523            Some(true),
1524            "schema-3 must self-verify"
1525        );
1526        // Isolate the file-render gate: hold the schema string fixed, vary ONLY the
1527        // path. schema 2 hashed paths into the claim → path-SENSITIVE.
1528        assert_ne!(
1529            seal("2", "/a/ref.idx").content_hash(),
1530            seal("2", "/b/ref.idx").content_hash(),
1531            "the pre-P0.2b claim form is path-inclusive"
1532        );
1533        // schema 3 drops paths → path-INDEPENDENT: the gate fired for the right reason
1534        // (the render change, not the schema string sitting inside the hashed claim).
1535        assert_eq!(
1536            seal("3", "/a/ref.idx").content_hash(),
1537            seal("3", "/b/ref.idx").content_hash(),
1538            "the schema-3 claim form is content-only"
1539        );
1540    }
1541
1542    #[test]
1543    fn finalize_stamps_build_identity_into_the_claim() {
1544        let mut m = RunManifest::new("variants");
1545        m.finalize();
1546        for k in [
1547            "code_git_sha",
1548            "code_dirty",
1549            "rustc_version",
1550            "target_triple",
1551            "deps_lock_blake3",
1552        ] {
1553            assert!(
1554                m.params.get(k).is_some_and(|v| !v.is_empty()),
1555                "finalize must stamp {k}"
1556            );
1557        }
1558        // Build-identity is in the claim → tampering it breaks the claim self-hash.
1559        assert_eq!(m.self_hash_ok(), Some(true));
1560        m.params
1561            .insert("code_git_sha".to_string(), "tampered".to_string());
1562        assert_eq!(m.self_hash_ok(), Some(false));
1563    }
1564
1565    #[test]
1566    fn check_expected_code_matches_mismatches_and_flags_dirty() {
1567        let mk = |sha: &str, dirty: &str| {
1568            let mut m = RunManifest::new("variants");
1569            m.params.insert("code_git_sha".to_string(), sha.to_string());
1570            m.params.insert("code_dirty".to_string(), dirty.to_string());
1571            m
1572        };
1573        // Exact + (>=7-char) prefix match on a clean build → no problems.
1574        assert!(mk("abc123def456", "false")
1575            .check_expected_code("abc123def456")
1576            .is_empty());
1577        assert!(mk("abc123def456", "false")
1578            .check_expected_code("abc123d")
1579            .is_empty());
1580        // Mismatch → one problem mentioning "mismatch".
1581        let mm = mk("abc123def456", "false").check_expected_code("deadbeef");
1582        assert_eq!(mm.len(), 1);
1583        assert!(mm[0].contains("mismatch"));
1584        // Match but dirty → one problem mentioning the DIRTY tree.
1585        let dirty = mk("abc123def456", "true").check_expected_code("abc123def456");
1586        assert_eq!(dirty.len(), 1);
1587        assert!(dirty[0].contains("DIRTY"));
1588        // Absent / unknown → cannot check (one problem each).
1589        assert_eq!(
1590            RunManifest::new("variants")
1591                .check_expected_code("abc1234")
1592                .len(),
1593            1
1594        );
1595        assert_eq!(
1596            mk("unknown", "false").check_expected_code("abc1234").len(),
1597            1
1598        );
1599    }
1600
1601    #[test]
1602    fn check_expected_code_rejects_a_degenerate_expected_sha() {
1603        // An empty / too-short / non-hex expected must FAIL (not vacuously pass via
1604        // starts_with) — otherwise `--expect-code "$MAYBE_EMPTY"` is a false-confidence
1605        // footgun.
1606        let m = {
1607            let mut m = RunManifest::new("variants");
1608            m.params
1609                .insert("code_git_sha".to_string(), "abc123def456".to_string());
1610            m.params
1611                .insert("code_dirty".to_string(), "false".to_string());
1612            m
1613        };
1614        for bad in ["", "9", "abc", "zzzzzzz"] {
1615            let problems = m.check_expected_code(bad);
1616            assert_eq!(problems.len(), 1, "{bad:?} must be rejected");
1617            assert!(
1618                problems[0].contains("invalid --expect-code"),
1619                "{bad:?}: {}",
1620                problems[0]
1621            );
1622        }
1623    }
1624}
rosalind_receipt/lib.rs

rosalind_receipt/
lib.rs