1use std::path::Path;
10
11use super::{blake3_file, FileHash, RunManifest};
12
13#[derive(Debug)]
15enum Token {
16 Flag(String),
17 Opt(String, String),
18 Input { flag: String, blake3: String },
19 Output { flag: String, blake3: String },
20}
21
22#[derive(Debug)]
25pub struct CommandCapture {
26 subcommand: String,
27 tokens: Vec<Token>,
28 inputs: Vec<FileHash>,
29 outputs: Vec<FileHash>,
30}
31
32impl CommandCapture {
33 pub fn new(subcommand: impl Into<String>) -> Self {
35 Self {
36 subcommand: subcommand.into(),
37 tokens: Vec::new(),
38 inputs: Vec::new(),
39 outputs: Vec::new(),
40 }
41 }
42
43 pub fn input(&mut self, flag: &str, path: &Path) -> std::io::Result<&mut Self> {
45 let h = blake3_file(path)?;
46 Ok(self.input_hashed(flag, &path.display().to_string(), &h))
47 }
48
49 pub fn output(&mut self, flag: &str, path: &Path) -> std::io::Result<&mut Self> {
51 let h = blake3_file(path)?;
52 Ok(self.output_hashed(flag, &path.display().to_string(), &h))
53 }
54
55 pub fn input_hashed(&mut self, flag: &str, path: &str, blake3: &str) -> &mut Self {
58 self.inputs.push(FileHash {
59 path: path.to_string(),
60 blake3: blake3.to_string(),
61 });
62 self.tokens.push(Token::Input {
63 flag: flag.to_string(),
64 blake3: blake3.to_string(),
65 });
66 self
67 }
68
69 pub fn output_hashed(&mut self, flag: &str, path: &str, blake3: &str) -> &mut Self {
71 self.outputs.push(FileHash {
72 path: path.to_string(),
73 blake3: blake3.to_string(),
74 });
75 self.tokens.push(Token::Output {
76 flag: flag.to_string(),
77 blake3: blake3.to_string(),
78 });
79 self
80 }
81
82 pub fn opt(&mut self, flag: &str, value: impl ToString) -> &mut Self {
84 self.tokens
85 .push(Token::Opt(flag.to_string(), value.to_string()));
86 self
87 }
88
89 pub fn flag(&mut self, flag: &str) -> &mut Self {
91 self.tokens.push(Token::Flag(flag.to_string()));
92 self
93 }
94
95 pub fn flag_if(&mut self, cond: bool, flag: &str) -> &mut Self {
97 if cond {
98 self.flag(flag)
99 } else {
100 self
101 }
102 }
103
104 fn render_command(&self) -> String {
108 let mut parts: Vec<String> = vec![self.subcommand.clone()];
109 for t in &self.tokens {
110 if let Token::Input { flag, blake3 } = t {
111 parts.push(flag.clone());
112 parts.push(format!("@in:{blake3}"));
113 }
114 }
115 let mut opts: Vec<(&String, &String)> = self
116 .tokens
117 .iter()
118 .filter_map(|t| match t {
119 Token::Opt(f, v) => Some((f, v)),
120 _ => None,
121 })
122 .collect();
123 opts.sort_by(|a, b| a.0.cmp(b.0));
124 for (f, v) in opts {
125 parts.push(f.clone());
126 parts.push(v.clone());
127 }
128 let mut flags: Vec<&String> = self
129 .tokens
130 .iter()
131 .filter_map(|t| match t {
132 Token::Flag(f) => Some(f),
133 _ => None,
134 })
135 .collect();
136 flags.sort();
137 for f in flags {
138 parts.push(f.clone());
139 }
140 for t in &self.tokens {
141 if let Token::Output { flag, blake3 } = t {
142 parts.push(flag.clone());
143 parts.push(format!("@out:{blake3}"));
144 }
145 }
146 parts.join(" ")
147 }
148
149 pub fn record_into(self, m: &mut RunManifest) {
154 m.params
155 .insert("command".to_string(), self.render_command());
156 for t in &self.tokens {
157 match t {
158 Token::Opt(f, v) => {
159 m.params.insert(flag_to_key(f), v.clone());
160 }
161 Token::Flag(f) => {
162 m.params.insert(flag_to_key(f), "true".to_string());
163 }
164 _ => {}
165 }
166 }
167 let has = |want: &str| {
168 self.tokens
169 .iter()
170 .any(|t| matches!(t, Token::Input { flag, .. } if flag == want))
171 };
172 if has("--index") {
173 m.params.insert("mode".to_string(), "index".to_string());
174 } else if has("--reference") {
175 m.params.insert("mode".to_string(), "reference".to_string());
176 }
177 m.inputs = self.inputs;
178 m.outputs = self.outputs;
179 }
180
181 pub fn argv_from_command(
185 command: &str,
186 locate_input: &dyn Fn(&str) -> Option<String>,
187 temp_output: &dyn Fn(&str) -> String,
188 ) -> Result<Vec<String>, String> {
189 let mut argv = Vec::new();
190 for tok in command.split(' ') {
191 if let Some(h) = tok.strip_prefix("@in:") {
192 match locate_input(h) {
193 Some(p) => argv.push(p),
194 None => return Err(format!("input not located by content hash @in:{h}")),
195 }
196 } else if let Some(h) = tok.strip_prefix("@out:") {
197 argv.push(temp_output(h));
198 } else {
199 argv.push(tok.to_string());
200 }
201 }
202 Ok(argv)
203 }
204}
205
206fn flag_to_key(flag: &str) -> String {
209 flag.trim_start_matches('-').replace('-', "_")
210}
211
212#[cfg(test)]
213mod tests {
214 use super::*;
215 use crate::RunManifest;
216
217 fn sample() -> CommandCapture {
219 let mut c = CommandCapture::new("variants");
220 c.input_hashed("--index", "ref.idx", "h_idx");
221 c.input_hashed("--alignments", "s.bam", "h_bam");
222 c.opt("--mapq-threshold", 20u8);
223 c.opt("--max-depth", 1000u32);
224 c.flag_if(true, "--enforce");
225 c.flag_if(false, "--gvcf");
226 c.opt("--memory-budget-mb", 256u64);
227 c.output_hashed("-o", "out.vcf", "h_out");
228 c
229 }
230
231 #[test]
232 fn record_into_writes_command_inputs_outputs_and_discrete_params() {
233 let mut m = RunManifest::new("variants");
234 sample().record_into(&mut m);
235
236 assert_eq!(
237 m.params.get("command").unwrap(),
238 "variants --index @in:h_idx --alignments @in:h_bam \
239 --mapq-threshold 20 --max-depth 1000 --memory-budget-mb 256 --enforce -o @out:h_out"
240 );
241 assert_eq!(
242 m.inputs
243 .iter()
244 .map(|f| f.blake3.as_str())
245 .collect::<Vec<_>>(),
246 ["h_idx", "h_bam"]
247 );
248 assert_eq!(
249 m.outputs
250 .iter()
251 .map(|f| f.blake3.as_str())
252 .collect::<Vec<_>>(),
253 ["h_out"]
254 );
255 assert_eq!(m.params.get("mapq_threshold").unwrap(), "20");
256 assert_eq!(m.params.get("max_depth").unwrap(), "1000");
257 assert_eq!(m.params.get("memory_budget_mb").unwrap(), "256");
258 assert_eq!(m.params.get("enforce").unwrap(), "true");
259 assert_eq!(m.params.get("mode").unwrap(), "index");
260 assert!(!m.params.contains_key("gvcf"));
261 }
262
263 #[test]
264 fn argv_roundtrips_from_the_recorded_command() {
265 let mut m = RunManifest::new("variants");
266 sample().record_into(&mut m);
267 let command = m.params.get("command").unwrap();
268
269 let locate = |h: &str| match h {
270 "h_idx" => Some("/data/ref.idx".to_string()),
271 "h_bam" => Some("/data/s.bam".to_string()),
272 _ => None,
273 };
274 let out_temp = |_h: &str| "/tmp/out.vcf".to_string();
275 let argv = CommandCapture::argv_from_command(command, &locate, &out_temp).unwrap();
276
277 assert_eq!(
278 argv,
279 vec![
280 "variants",
281 "--index",
282 "/data/ref.idx",
283 "--alignments",
284 "/data/s.bam",
285 "--mapq-threshold",
286 "20",
287 "--max-depth",
288 "1000",
289 "--memory-budget-mb",
290 "256",
291 "--enforce",
292 "-o",
293 "/tmp/out.vcf",
294 ]
295 );
296 }
297
298 #[test]
299 fn argv_errors_when_an_input_cannot_be_located() {
300 let mut m = RunManifest::new("variants");
301 sample().record_into(&mut m);
302 let command = m.params.get("command").unwrap();
303 let locate = |_h: &str| None;
304 let out_temp = |_h: &str| "/tmp/out.vcf".to_string();
305 let err = CommandCapture::argv_from_command(command, &locate, &out_temp).unwrap_err();
306 assert!(
307 err.contains("h_idx"),
308 "error names the unresolved input hash: {err}"
309 );
310 }
311}