wdl_cli/
inputs.rs

1//! Inputs parsed in from the command line.
2
3use std::ops::Deref;
4use std::ops::DerefMut;
5use std::path::Path;
6use std::path::PathBuf;
7use std::str::FromStr;
8use std::sync::LazyLock;
9
10use indexmap::IndexMap;
11use regex::Regex;
12use serde_json::Value;
13use thiserror::Error;
14use wdl_analysis::Document;
15use wdl_engine::Inputs as EngineInputs;
16
17pub mod file;
18pub mod origin_paths;
19
20pub use file::InputFile;
21pub use origin_paths::OriginPaths;
22
23/// A regex that matches a valid identifier.
24///
25/// This is useful when recognizing whether a key provided on the command line
26/// is a valid identifier.
27static IDENTIFIER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
28    // SAFETY: this is checked statically with tests to always unwrap.
29    Regex::new(r"^([a-zA-Z][a-zA-Z0-9_.]*)$").unwrap()
30});
31
32/// If a value in a key-value pair passed in on the command line cannot be
33/// resolved to a WDL type, this regex is compared to the value.
34///
35/// If the regex matches, we assume the value is a string.
36static ASSUME_STRING_REGEX: LazyLock<Regex> = LazyLock::new(|| {
37    // SAFETY: this is checked statically with tests to always unwrap.
38    Regex::new(r"^[\w /~.]*$").unwrap()
39});
40
41/// An error related to inputs.
42#[derive(Error, Debug)]
43pub enum Error {
44    /// A file error.
45    #[error(transparent)]
46    File(#[from] file::Error),
47
48    /// A file was specified on the command line but not found.
49    #[error("file `{0}` was not found")]
50    FileNotFound(PathBuf),
51
52    /// Encountered an invalid key-value pair.
53    #[error("invalid key-value pair `{pair}`: {reason}")]
54    InvalidPair {
55        /// The string-value of the pair.
56        pair: String,
57
58        /// The reason the pair was not valid.
59        reason: String,
60    },
61
62    /// A deserialization error.
63    #[error("unable to deserialize `{0}` as a valid WDL value")]
64    Deserialize(String),
65}
66
67/// A [`Result`](std::result::Result) with an [`Error`].
68pub type Result<T> = std::result::Result<T, Error>;
69
70/// An input parsed from the command line.
71#[derive(Clone, Debug)]
72pub enum Input {
73    /// A file.
74    File(
75        /// The path to the file.
76        ///
77        /// If this input is successfully created, the input is guaranteed to
78        /// exist at the time the inputs were processed.
79        PathBuf,
80    ),
81
82    /// A key-value pair representing an input.
83    Pair {
84        /// The key.
85        key: String,
86
87        /// The value.
88        value: Value,
89    },
90}
91
92impl Input {
93    /// Attempts to return a reference to the inner [`Path`].
94    ///
95    /// * If the input is a [`Input::File`], a reference to the inner path is
96    ///   returned wrapped in [`Some`].
97    /// * Otherwise, [`None`] is returned.
98    pub fn as_file(&self) -> Option<&Path> {
99        match self {
100            Input::File(p) => Some(p.as_path()),
101            _ => None,
102        }
103    }
104
105    /// Consumes `self` and attempts to return the inner [`PathBuf`].
106    ///
107    /// * If the input is a [`Input::File`], the inner path buffer is returned
108    ///   wrapped in [`Some`].
109    /// * Otherwise, [`None`] is returned.
110    pub fn into_file(self) -> Option<PathBuf> {
111        match self {
112            Input::File(p) => Some(p),
113            _ => None,
114        }
115    }
116
117    /// Consumes `self` and returns the inner [`PathBuf`].
118    ///
119    /// # Panics
120    ///
121    /// If the input is not a [`Input::File`].
122    pub fn unwrap_file(self) -> PathBuf {
123        match self {
124            Input::File(p) => p,
125            v => panic!("{v:?} is not an `Input::File`"),
126        }
127    }
128
129    /// Attempts to return a reference to the inner key-value pair.
130    ///
131    /// * If the input is a [`Input::Pair`], a reference to the inner key and
132    ///   value is returned wrapped in [`Some`].
133    /// * Otherwise, [`None`] is returned.
134    pub fn as_pair(&self) -> Option<(&str, &Value)> {
135        match self {
136            Input::Pair { key, value } => Some((key.as_str(), value)),
137            _ => None,
138        }
139    }
140
141    /// Consumes `self` and attempts to return the inner key-value pair.
142    ///
143    /// * If the input is a [`Input::Pair`], the inner key-value pair is
144    ///   returned wrapped in [`Some`].
145    /// * Otherwise, [`None`] is returned.
146    pub fn into_pair(self) -> Option<(String, Value)> {
147        match self {
148            Input::Pair { key, value } => Some((key, value)),
149            _ => None,
150        }
151    }
152
153    /// Consumes `self` and returns the inner key-value pair.
154    ///
155    /// # Panics
156    ///
157    /// If the input is not a [`Input::Pair`].
158    pub fn unwrap_pair(self) -> (String, Value) {
159        match self {
160            Input::Pair { key, value } => (key, value),
161            v => panic!("{v:?} is not an `Input::Pair`"),
162        }
163    }
164}
165
166impl FromStr for Input {
167    type Err = Error;
168
169    fn from_str(s: &str) -> std::result::Result<Self, Error> {
170        match s.split_once("=") {
171            Some((key, value)) => {
172                if !IDENTIFIER_REGEX.is_match(key) {
173                    return Err(Error::InvalidPair {
174                        pair: s.to_string(),
175                        reason: format!(
176                            "key `{}` did not match the identifier regex (`{}`)",
177                            key,
178                            IDENTIFIER_REGEX.as_str()
179                        ),
180                    });
181                }
182
183                let value = serde_json::from_str(value).or_else(|_| {
184                    if ASSUME_STRING_REGEX.is_match(value) {
185                        Ok(Value::String(value.to_owned()))
186                    } else {
187                        Err(Error::Deserialize(value.to_owned()))
188                    }
189                })?;
190
191                Ok(Input::Pair {
192                    key: key.to_owned(),
193                    value,
194                })
195            }
196            None => {
197                let path = PathBuf::from(s);
198
199                if !path.exists() {
200                    return Err(Error::FileNotFound(path));
201                }
202
203                Ok(Input::File(path))
204            }
205        }
206    }
207}
208
209/// The inner type for inputs (for convenience).
210type InputsInner = IndexMap<String, (PathBuf, Value)>;
211
212/// A set of inputs parsed from the command line and compiled on top of one
213/// another.
214#[derive(Clone, Debug, Default)]
215pub struct Inputs(InputsInner);
216
217impl Inputs {
218    /// Adds an input read from the command line.
219    fn add_input(&mut self, input: &str) -> Result<()> {
220        match input.parse::<Input>()? {
221            Input::File(path) => {
222                let inputs = InputFile::read(&path).map_err(Error::File)?;
223                self.extend(inputs.into_inner());
224            }
225            Input::Pair { key, value } => {
226                // SAFETY: we expect that the current working directory is
227                // always available for the platforms that `wdl` will run
228                // within.
229                let cwd = std::env::current_dir().unwrap();
230                self.insert(key, (cwd, value));
231            }
232        };
233
234        Ok(())
235    }
236
237    /// Attempts to coalesce a set of inputs into an [`Inputs`].
238    pub fn coalesce<T, V>(iter: T) -> Result<Self>
239    where
240        T: IntoIterator<Item = V>,
241        V: AsRef<str>,
242    {
243        let mut inputs = Inputs::default();
244
245        for input in iter {
246            inputs.add_input(input.as_ref())?;
247        }
248
249        Ok(inputs)
250    }
251
252    /// Consumes `self` and returns the inner index map.
253    pub fn into_inner(self) -> InputsInner {
254        self.0
255    }
256
257    /// Converts a set of inputs to a set of engine inputs.
258    ///
259    /// Returns `Ok(Some(_))` if the inputs are not empty.
260    ///
261    /// Returns `Ok(None)` if the inputs are empty.
262    ///
263    /// When the inputs are not empty, the return type contained in `Some(_)` is
264    /// a tuple of,
265    ///
266    /// - the name of the callee (the name of the task or workflow being run),
267    /// - the transformed engine inputs, and
268    /// - a map containing the origin path for each provided input key.
269    pub fn into_engine_inputs(
270        self,
271        document: &Document,
272    ) -> anyhow::Result<Option<(String, EngineInputs, OriginPaths)>> {
273        let (origins, values) = self.0.into_iter().fold(
274            (IndexMap::new(), serde_json::Map::new()),
275            |(mut origins, mut values), (key, (origin, value))| {
276                origins.insert(key.clone(), origin);
277                values.insert(key, value);
278                (origins, values)
279            },
280        );
281
282        let result = EngineInputs::parse_object(document, values)?;
283
284        Ok(result.map(|(callee_name, inputs)| {
285            let callee_prefix = format!("{}.", callee_name);
286
287            let origins = origins
288                .into_iter()
289                .map(|(key, path)| {
290                    if let Some(key) = key.strip_prefix(&callee_prefix) {
291                        (key.to_owned(), path)
292                    } else {
293                        (key, path)
294                    }
295                })
296                .collect::<IndexMap<String, PathBuf>>();
297
298            (callee_name, inputs, OriginPaths::from(origins))
299        }))
300    }
301}
302
303impl Deref for Inputs {
304    type Target = InputsInner;
305
306    fn deref(&self) -> &Self::Target {
307        &self.0
308    }
309}
310
311impl DerefMut for Inputs {
312    fn deref_mut(&mut self) -> &mut Self::Target {
313        &mut self.0
314    }
315}
316
317#[cfg(test)]
318mod tests {
319    use super::*;
320
321    #[test]
322    fn identifier_regex() {
323        assert!(IDENTIFIER_REGEX.is_match("here_is_an.identifier"));
324        assert!(!IDENTIFIER_REGEX.is_match("here is not an identifier"));
325    }
326
327    #[test]
328    fn assume_string_regex() {
329        // Matches.
330        assert!(ASSUME_STRING_REGEX.is_match(""));
331        assert!(ASSUME_STRING_REGEX.is_match("fooBAR082"));
332        assert!(ASSUME_STRING_REGEX.is_match("foo bar baz"));
333
334        // Non-matches.
335        assert!(!ASSUME_STRING_REGEX.is_match("[1, a]"));
336    }
337
338    #[test]
339    fn file_parsing() {
340        // A valid JSON file path.
341        let input = "./tests/fixtures/inputs_one.json".parse::<Input>().unwrap();
342        assert!(matches!(
343            input,
344            Input::File(path) if path.to_str().unwrap() == "./tests/fixtures/inputs_one.json"
345        ));
346
347        // A valid YAML file path.
348        let input = "./tests/fixtures/inputs_three.yml"
349            .parse::<Input>()
350            .unwrap();
351        assert!(matches!(
352            input,
353            Input::File(path) if path.to_str().unwrap() == "./tests/fixtures/inputs_three.yml"
354        ));
355
356        // A missing file path.
357        let err = "./tests/fixtures/missing.json"
358            .parse::<Input>()
359            .unwrap_err();
360        assert!(matches!(
361            err,
362            Error::FileNotFound(path) if path.to_str().unwrap() == "./tests/fixtures/missing.json"
363        ));
364    }
365
366    #[test]
367    fn key_value_pair_parsing() {
368        // A standard key-value pair.
369        let input = r#"foo="bar""#.parse::<Input>().unwrap();
370        let (key, value) = input.unwrap_pair();
371        assert_eq!(key, "foo");
372        assert_eq!(value.as_str().unwrap(), "bar");
373
374        // A standard key-value pair.
375        let input = r#"foo.bar_baz_quux="qil""#.parse::<Input>().unwrap();
376        let (key, value) = input.unwrap_pair();
377        assert_eq!(key, "foo.bar_baz_quux");
378        assert_eq!(value.as_str().unwrap(), "qil");
379
380        // An invalid identifier for the key.
381        let err = r#"foo$="bar""#.parse::<Input>().unwrap_err();
382        assert!(matches!(
383                err,
384                Error::InvalidPair {
385                    pair,
386                    reason
387                } if pair == r#"foo$="bar""# &&
388                reason == r"key `foo$` did not match the identifier regex (`^([a-zA-Z][a-zA-Z0-9_.]*)$`)"));
389
390        // A value that is valid despite that value not being valid as a key.
391        let input = r#"foo="bar$""#.parse::<Input>().unwrap();
392        let (key, value) = input.unwrap_pair();
393        assert_eq!(key, "foo");
394        assert_eq!(value.as_str().unwrap(), "bar$");
395    }
396
397    #[test]
398    fn coalesce() {
399        // Helper functions.
400        fn check_string_value(inputs: &Inputs, key: &str, value: &str) {
401            let (_, input) = inputs.get(key).unwrap();
402            assert_eq!(input.as_str().unwrap(), value);
403        }
404
405        fn check_float_value(inputs: &Inputs, key: &str, value: f64) {
406            let (_, input) = inputs.get(key).unwrap();
407            assert_eq!(input.as_f64().unwrap(), value);
408        }
409
410        fn check_boolean_value(inputs: &Inputs, key: &str, value: bool) {
411            let (_, input) = inputs.get(key).unwrap();
412            assert_eq!(input.as_bool().unwrap(), value);
413        }
414
415        fn check_integer_value(inputs: &Inputs, key: &str, value: i64) {
416            let (_, input) = inputs.get(key).unwrap();
417            assert_eq!(input.as_i64().unwrap(), value);
418        }
419
420        // The standard coalescing order.
421        let inputs = Inputs::coalesce([
422            "./tests/fixtures/inputs_one.json",
423            "./tests/fixtures/inputs_two.json",
424            "./tests/fixtures/inputs_three.yml",
425        ])
426        .unwrap();
427
428        assert_eq!(inputs.len(), 5);
429        check_string_value(&inputs, "foo", "bar");
430        check_float_value(&inputs, "baz", 128.0);
431        check_string_value(&inputs, "quux", "qil");
432        check_string_value(&inputs, "new.key", "foobarbaz");
433        check_string_value(&inputs, "new_two.key", "bazbarfoo");
434
435        // The opposite coalescing order.
436        let inputs = Inputs::coalesce([
437            "./tests/fixtures/inputs_three.yml",
438            "./tests/fixtures/inputs_two.json",
439            "./tests/fixtures/inputs_one.json",
440        ])
441        .unwrap();
442
443        assert_eq!(inputs.len(), 5);
444        check_string_value(&inputs, "foo", "bar");
445        check_float_value(&inputs, "baz", 42.0);
446        check_string_value(&inputs, "quux", "qil");
447        check_string_value(&inputs, "new.key", "foobarbaz");
448        check_string_value(&inputs, "new_two.key", "bazbarfoo");
449
450        // An example with some random key-value pairs thrown in.
451        let inputs = Inputs::coalesce([
452            r#"sandwich=-100"#,
453            "./tests/fixtures/inputs_one.json",
454            "./tests/fixtures/inputs_two.json",
455            r#"quux="jacks""#,
456            "./tests/fixtures/inputs_three.yml",
457            r#"baz=false"#,
458        ])
459        .unwrap();
460
461        assert_eq!(inputs.len(), 6);
462        check_string_value(&inputs, "foo", "bar");
463        check_boolean_value(&inputs, "baz", false);
464        check_string_value(&inputs, "quux", "jacks");
465        check_string_value(&inputs, "new.key", "foobarbaz");
466        check_string_value(&inputs, "new_two.key", "bazbarfoo");
467        check_integer_value(&inputs, "sandwich", -100);
468
469        // An invalid key-value pair.
470        let error =
471            Inputs::coalesce(["./tests/fixtures/inputs_one.json", "foo=baz#bar"]).unwrap_err();
472        assert!(matches!(
473            error,
474            Error::Deserialize(value) if value == "baz#bar"
475        ));
476
477        // A missing file.
478        let error = Inputs::coalesce([
479            "./tests/fixtures/inputs_one.json",
480            "./tests/fixtures/inputs_two.json",
481            "./tests/fixtures/inputs_three.yml",
482            "./tests/fixtures/missing.json",
483        ])
484        .unwrap_err();
485        assert!(matches!(
486                error,
487                Error::FileNotFound(path) if path.to_str().unwrap() == "./tests/fixtures/missing.json"));
488    }
489
490    #[test]
491    fn multiple_equal_signs() {
492        let (key, value) = r#"foo="bar=baz""#.parse::<Input>().unwrap().unwrap_pair();
493        assert_eq!(key, "foo");
494        assert_eq!(value.as_str().unwrap(), "bar=baz");
495    }
496}