Skip to main content

rosalind_receipt/
command.rs

1//! `CommandCapture` — the single chokepoint that records a normalized, replayable
2//! invocation into a receipt's claim, and reconstructs an argv from it. Recording and
3//! replay share this one structure so they cannot drift (the "forgot to record flag X"
4//! bug class is eliminated by construction).
5//!
6//! Input/output operands carry a content hash, not a path, so the recorded `command`
7//! string is machine-independent and is protected by the existing claim self-hash.
8
9use std::path::Path;
10
11use super::{blake3_file, FileHash, RunManifest};
12
13/// One token of a recorded invocation.
14#[derive(Debug)]
15enum Token {
16    Flag(String),
17    Opt(String, String),
18    Input { flag: String, blake3: String },
19    Output { flag: String, blake3: String },
20}
21
22/// Accumulates an invocation, then writes it into a `RunManifest` (claim) and/or
23/// reconstructs an argv for re-execution.
24#[derive(Debug)]
25pub struct CommandCapture {
26    subcommand: String,
27    tokens: Vec<Token>,
28    inputs: Vec<FileHash>,
29    outputs: Vec<FileHash>,
30}
31
32impl CommandCapture {
33    /// Start capturing an invocation of `subcommand` (e.g. `"variants"`).
34    pub fn new(subcommand: impl Into<String>) -> Self {
35        Self {
36            subcommand: subcommand.into(),
37            tokens: Vec::new(),
38            inputs: Vec::new(),
39            outputs: Vec::new(),
40        }
41    }
42
43    /// A content-addressed input operand; hashes the file and records it.
44    pub fn input(&mut self, flag: &str, path: &Path) -> std::io::Result<&mut Self> {
45        let h = blake3_file(path)?;
46        Ok(self.input_hashed(flag, &path.display().to_string(), &h))
47    }
48
49    /// A content-addressed output operand; hashes the file and records it.
50    pub fn output(&mut self, flag: &str, path: &Path) -> std::io::Result<&mut Self> {
51        let h = blake3_file(path)?;
52        Ok(self.output_hashed(flag, &path.display().to_string(), &h))
53    }
54
55    /// Input operand with a precomputed hash (when the caller already hashed it, and in
56    /// tests). `path` is recorded into `inputs[]` for humans / `verify` to re-hash.
57    pub fn input_hashed(&mut self, flag: &str, path: &str, blake3: &str) -> &mut Self {
58        self.inputs.push(FileHash {
59            path: path.to_string(),
60            blake3: blake3.to_string(),
61        });
62        self.tokens.push(Token::Input {
63            flag: flag.to_string(),
64            blake3: blake3.to_string(),
65        });
66        self
67    }
68
69    /// Output operand with a precomputed hash.
70    pub fn output_hashed(&mut self, flag: &str, path: &str, blake3: &str) -> &mut Self {
71        self.outputs.push(FileHash {
72            path: path.to_string(),
73            blake3: blake3.to_string(),
74        });
75        self.tokens.push(Token::Output {
76            flag: flag.to_string(),
77            blake3: blake3.to_string(),
78        });
79        self
80    }
81
82    /// An option with a value, e.g. `("--max-depth", 1000)`.
83    pub fn opt(&mut self, flag: &str, value: impl ToString) -> &mut Self {
84        self.tokens
85            .push(Token::Opt(flag.to_string(), value.to_string()));
86        self
87    }
88
89    /// A bare flag, e.g. `"--enforce"`.
90    pub fn flag(&mut self, flag: &str) -> &mut Self {
91        self.tokens.push(Token::Flag(flag.to_string()));
92        self
93    }
94
95    /// Record `flag` only when `cond` (so a `false` boolean leaves no trace).
96    pub fn flag_if(&mut self, cond: bool, flag: &str) -> &mut Self {
97        if cond {
98            self.flag(flag)
99        } else {
100            self
101        }
102    }
103
104    /// Render the normalized, machine-independent command string. Canonical order:
105    /// subcommand, then input operands (in order added), then options sorted by flag,
106    /// then bare flags sorted, then output operands — so it is stable run-to-run.
107    fn render_command(&self) -> String {
108        let mut parts: Vec<String> = vec![self.subcommand.clone()];
109        for t in &self.tokens {
110            if let Token::Input { flag, blake3 } = t {
111                parts.push(flag.clone());
112                parts.push(format!("@in:{blake3}"));
113            }
114        }
115        let mut opts: Vec<(&String, &String)> = self
116            .tokens
117            .iter()
118            .filter_map(|t| match t {
119                Token::Opt(f, v) => Some((f, v)),
120                _ => None,
121            })
122            .collect();
123        opts.sort_by(|a, b| a.0.cmp(b.0));
124        for (f, v) in opts {
125            parts.push(f.clone());
126            parts.push(v.clone());
127        }
128        let mut flags: Vec<&String> = self
129            .tokens
130            .iter()
131            .filter_map(|t| match t {
132                Token::Flag(f) => Some(f),
133                _ => None,
134            })
135            .collect();
136        flags.sort();
137        for f in flags {
138            parts.push(f.clone());
139        }
140        for t in &self.tokens {
141            if let Token::Output { flag, blake3 } = t {
142                parts.push(flag.clone());
143                parts.push(format!("@out:{blake3}"));
144            }
145        }
146        parts.join(" ")
147    }
148
149    /// Write the capture into a manifest's claim: the `command` recipe, the derived
150    /// `inputs[]`/`outputs[]`, the discrete params (mechanical flag→key projection), and
151    /// the inferred `mode`. Call before `finalize()`. Measurement fields remain the
152    /// caller's responsibility (recorded separately, relocated by `finalize`).
153    pub fn record_into(self, m: &mut RunManifest) {
154        m.params
155            .insert("command".to_string(), self.render_command());
156        for t in &self.tokens {
157            match t {
158                Token::Opt(f, v) => {
159                    m.params.insert(flag_to_key(f), v.clone());
160                }
161                Token::Flag(f) => {
162                    m.params.insert(flag_to_key(f), "true".to_string());
163                }
164                _ => {}
165            }
166        }
167        let has = |want: &str| {
168            self.tokens
169                .iter()
170                .any(|t| matches!(t, Token::Input { flag, .. } if flag == want))
171        };
172        if has("--index") {
173            m.params.insert("mode".to_string(), "index".to_string());
174        } else if has("--reference") {
175            m.params.insert("mode".to_string(), "reference".to_string());
176        }
177        m.inputs = self.inputs;
178        m.outputs = self.outputs;
179    }
180
181    /// Reconstruct an argv from a recorded `command`: substitute each `@in:<h>` with the
182    /// located input path and each `@out:<h>` with a caller-supplied temp path. Errors
183    /// (naming the hash) if an input cannot be located.
184    pub fn argv_from_command(
185        command: &str,
186        locate_input: &dyn Fn(&str) -> Option<String>,
187        temp_output: &dyn Fn(&str) -> String,
188    ) -> Result<Vec<String>, String> {
189        let mut argv = Vec::new();
190        for tok in command.split(' ') {
191            if let Some(h) = tok.strip_prefix("@in:") {
192                match locate_input(h) {
193                    Some(p) => argv.push(p),
194                    None => return Err(format!("input not located by content hash @in:{h}")),
195                }
196            } else if let Some(h) = tok.strip_prefix("@out:") {
197                argv.push(temp_output(h));
198            } else {
199                argv.push(tok.to_string());
200            }
201        }
202        Ok(argv)
203    }
204}
205
206/// Mechanical `--max-depth` → `max_depth` projection (strip leading dashes,
207/// dashes → underscores). Deterministic; no per-flag config.
208fn flag_to_key(flag: &str) -> String {
209    flag.trim_start_matches('-').replace('-', "_")
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215    use crate::RunManifest;
216
217    // Build a capture WITHOUT touching the filesystem by injecting hashes directly.
218    fn sample() -> CommandCapture {
219        let mut c = CommandCapture::new("variants");
220        c.input_hashed("--index", "ref.idx", "h_idx");
221        c.input_hashed("--alignments", "s.bam", "h_bam");
222        c.opt("--mapq-threshold", 20u8);
223        c.opt("--max-depth", 1000u32);
224        c.flag_if(true, "--enforce");
225        c.flag_if(false, "--gvcf");
226        c.opt("--memory-budget-mb", 256u64);
227        c.output_hashed("-o", "out.vcf", "h_out");
228        c
229    }
230
231    #[test]
232    fn record_into_writes_command_inputs_outputs_and_discrete_params() {
233        let mut m = RunManifest::new("variants");
234        sample().record_into(&mut m);
235
236        assert_eq!(
237            m.params.get("command").unwrap(),
238            "variants --index @in:h_idx --alignments @in:h_bam \
239             --mapq-threshold 20 --max-depth 1000 --memory-budget-mb 256 --enforce -o @out:h_out"
240        );
241        assert_eq!(
242            m.inputs
243                .iter()
244                .map(|f| f.blake3.as_str())
245                .collect::<Vec<_>>(),
246            ["h_idx", "h_bam"]
247        );
248        assert_eq!(
249            m.outputs
250                .iter()
251                .map(|f| f.blake3.as_str())
252                .collect::<Vec<_>>(),
253            ["h_out"]
254        );
255        assert_eq!(m.params.get("mapq_threshold").unwrap(), "20");
256        assert_eq!(m.params.get("max_depth").unwrap(), "1000");
257        assert_eq!(m.params.get("memory_budget_mb").unwrap(), "256");
258        assert_eq!(m.params.get("enforce").unwrap(), "true");
259        assert_eq!(m.params.get("mode").unwrap(), "index");
260        assert!(!m.params.contains_key("gvcf"));
261    }
262
263    #[test]
264    fn argv_roundtrips_from_the_recorded_command() {
265        let mut m = RunManifest::new("variants");
266        sample().record_into(&mut m);
267        let command = m.params.get("command").unwrap();
268
269        let locate = |h: &str| match h {
270            "h_idx" => Some("/data/ref.idx".to_string()),
271            "h_bam" => Some("/data/s.bam".to_string()),
272            _ => None,
273        };
274        let out_temp = |_h: &str| "/tmp/out.vcf".to_string();
275        let argv = CommandCapture::argv_from_command(command, &locate, &out_temp).unwrap();
276
277        assert_eq!(
278            argv,
279            vec![
280                "variants",
281                "--index",
282                "/data/ref.idx",
283                "--alignments",
284                "/data/s.bam",
285                "--mapq-threshold",
286                "20",
287                "--max-depth",
288                "1000",
289                "--memory-budget-mb",
290                "256",
291                "--enforce",
292                "-o",
293                "/tmp/out.vcf",
294            ]
295        );
296    }
297
298    #[test]
299    fn argv_errors_when_an_input_cannot_be_located() {
300        let mut m = RunManifest::new("variants");
301        sample().record_into(&mut m);
302        let command = m.params.get("command").unwrap();
303        let locate = |_h: &str| None;
304        let out_temp = |_h: &str| "/tmp/out.vcf".to_string();
305        let err = CommandCapture::argv_from_command(command, &locate, &out_temp).unwrap_err();
306        assert!(
307            err.contains("h_idx"),
308            "error names the unresolved input hash: {err}"
309        );
310    }
311}