wdl_cli/
inputs.rs

1//! Inputs parsed in from the command line.
2
3use std::ops::Deref;
4use std::ops::DerefMut;
5use std::path::Path;
6use std::path::PathBuf;
7use std::str::FromStr;
8use std::sync::LazyLock;
9
10use anyhow::bail;
11use indexmap::IndexMap;
12use regex::Regex;
13use serde_json::Value;
14use thiserror::Error;
15use wdl_analysis::Document;
16use wdl_engine::Inputs as EngineInputs;
17
18pub mod file;
19pub mod origin_paths;
20
21pub use file::InputFile;
22pub use origin_paths::OriginPaths;
23
24/// A regex that matches a valid identifier.
25///
26/// This is useful when recognizing whether a key provided on the command line
27/// is a valid identifier.
28static IDENTIFIER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29    // SAFETY: this is checked statically with tests to always unwrap.
30    Regex::new(r"^([a-zA-Z][a-zA-Z0-9_.]*)$").unwrap()
31});
32
33/// If a value in a key-value pair passed in on the command line cannot be
34/// resolved to a WDL type, this regex is compared to the value.
35///
36/// If the regex matches, we assume the value is a string.
37static ASSUME_STRING_REGEX: LazyLock<Regex> = LazyLock::new(|| {
38    // SAFETY: this is checked statically with tests to always unwrap.
39    Regex::new(r"^[\w /~.]*$").unwrap()
40});
41
42/// An error related to inputs.
43#[derive(Error, Debug)]
44pub enum Error {
45    /// A file error.
46    #[error(transparent)]
47    File(#[from] file::Error),
48
49    /// A file was specified on the command line but not found.
50    #[error("file `{0}` was not found")]
51    FileNotFound(PathBuf),
52
53    /// Encountered an invalid key-value pair.
54    #[error("invalid key-value pair `{pair}`: {reason}")]
55    InvalidPair {
56        /// The string-value of the pair.
57        pair: String,
58
59        /// The reason the pair was not valid.
60        reason: String,
61    },
62
63    /// An invalid entrypoint was specified.
64    #[error("invalid entrypoint `{0}`")]
65    InvalidEntrypoint(String),
66
67    /// A deserialization error.
68    #[error("unable to deserialize `{0}` as a valid WDL value")]
69    Deserialize(String),
70}
71
72/// A [`Result`](std::result::Result) with an [`Error`].
73pub type Result<T> = std::result::Result<T, Error>;
74
75/// An input parsed from the command line.
76#[derive(Clone, Debug)]
77pub enum Input {
78    /// A file.
79    File(
80        /// The path to the file.
81        ///
82        /// If this input is successfully created, the input is guaranteed to
83        /// exist at the time the inputs were processed.
84        PathBuf,
85    ),
86
87    /// A key-value pair representing an input.
88    Pair {
89        /// The key.
90        key: String,
91
92        /// The value.
93        value: Value,
94    },
95}
96
97impl Input {
98    /// Attempts to return a reference to the inner [`Path`].
99    ///
100    /// * If the input is a [`Input::File`], a reference to the inner path is
101    ///   returned wrapped in [`Some`].
102    /// * Otherwise, [`None`] is returned.
103    pub fn as_file(&self) -> Option<&Path> {
104        match self {
105            Input::File(p) => Some(p.as_path()),
106            _ => None,
107        }
108    }
109
110    /// Consumes `self` and attempts to return the inner [`PathBuf`].
111    ///
112    /// * If the input is a [`Input::File`], the inner path buffer is returned
113    ///   wrapped in [`Some`].
114    /// * Otherwise, [`None`] is returned.
115    pub fn into_file(self) -> Option<PathBuf> {
116        match self {
117            Input::File(p) => Some(p),
118            _ => None,
119        }
120    }
121
122    /// Consumes `self` and returns the inner [`PathBuf`].
123    ///
124    /// # Panics
125    ///
126    /// If the input is not a [`Input::File`].
127    pub fn unwrap_file(self) -> PathBuf {
128        match self {
129            Input::File(p) => p,
130            v => panic!("{v:?} is not an `Input::File`"),
131        }
132    }
133
134    /// Attempts to return a reference to the inner key-value pair.
135    ///
136    /// * If the input is a [`Input::Pair`], a reference to the inner key and
137    ///   value is returned wrapped in [`Some`].
138    /// * Otherwise, [`None`] is returned.
139    pub fn as_pair(&self) -> Option<(&str, &Value)> {
140        match self {
141            Input::Pair { key, value } => Some((key.as_str(), value)),
142            _ => None,
143        }
144    }
145
146    /// Consumes `self` and attempts to return the inner key-value pair.
147    ///
148    /// * If the input is a [`Input::Pair`], the inner key-value pair is
149    ///   returned wrapped in [`Some`].
150    /// * Otherwise, [`None`] is returned.
151    pub fn into_pair(self) -> Option<(String, Value)> {
152        match self {
153            Input::Pair { key, value } => Some((key, value)),
154            _ => None,
155        }
156    }
157
158    /// Consumes `self` and returns the inner key-value pair.
159    ///
160    /// # Panics
161    ///
162    /// If the input is not a [`Input::Pair`].
163    pub fn unwrap_pair(self) -> (String, Value) {
164        match self {
165            Input::Pair { key, value } => (key, value),
166            v => panic!("{v:?} is not an `Input::Pair`"),
167        }
168    }
169}
170
171impl FromStr for Input {
172    type Err = Error;
173
174    fn from_str(s: &str) -> std::result::Result<Self, Error> {
175        match s.split_once("=") {
176            Some((key, value)) => {
177                if !IDENTIFIER_REGEX.is_match(key) {
178                    return Err(Error::InvalidPair {
179                        pair: s.to_string(),
180                        reason: format!(
181                            "key `{}` did not match the identifier regex (`{}`)",
182                            key,
183                            IDENTIFIER_REGEX.as_str()
184                        ),
185                    });
186                }
187
188                let value = serde_json::from_str(value).or_else(|_| {
189                    if ASSUME_STRING_REGEX.is_match(value) {
190                        Ok(Value::String(value.to_owned()))
191                    } else {
192                        Err(Error::Deserialize(value.to_owned()))
193                    }
194                })?;
195
196                Ok(Input::Pair {
197                    key: key.to_owned(),
198                    value,
199                })
200            }
201            None => {
202                let path = PathBuf::from(s);
203
204                if !path.exists() {
205                    return Err(Error::FileNotFound(path));
206                }
207
208                Ok(Input::File(path))
209            }
210        }
211    }
212}
213
214/// The inner type for inputs (for convenience).
215type InputsInner = IndexMap<String, (PathBuf, Value)>;
216
217/// A set of inputs parsed from the command line and compiled on top of one
218/// another.
219#[derive(Clone, Debug, Default)]
220pub struct Inputs {
221    /// The actual inputs map.
222    inputs: InputsInner,
223    /// The name of the task or workflow these inputs are provided for.
224    entrypoint: Option<String>,
225}
226
227impl Inputs {
228    /// Adds an input read from the command line.
229    fn add_input(&mut self, input: &str) -> Result<()> {
230        match input.parse::<Input>()? {
231            Input::File(path) => {
232                let inputs = InputFile::read(&path).map_err(Error::File)?;
233                self.extend(inputs.into_inner());
234            }
235            Input::Pair { key, value } => {
236                // SAFETY: we expect that the current working directory is
237                // always available for the platforms that `wdl` will run
238                // within.
239                let cwd = std::env::current_dir().unwrap();
240
241                let key = if let Some(prefix) = &self.entrypoint {
242                    format!("{prefix}.{key}")
243                } else {
244                    key
245                };
246                self.insert(key, (cwd, value));
247            }
248        };
249
250        Ok(())
251    }
252
253    /// Attempts to coalesce a set of inputs into an [`Inputs`].
254    ///
255    /// `entrypoint` is the task or workflow the inputs are for.
256    /// If `entrypoint` is `Some(_)` then it will be prefixed to each
257    /// [`Input::Pair`]. Keys inside a [`Input::File`] must always have this
258    /// common prefix specified. If `entrypoint` is `None` then all of the
259    /// inputs in `iter` must be prefixed with the task or workflow name.
260    pub fn coalesce<T, V>(iter: T, entrypoint: Option<String>) -> Result<Self>
261    where
262        T: IntoIterator<Item = V>,
263        V: AsRef<str>,
264    {
265        if let Some(ep) = &entrypoint
266            && ep.contains('.')
267        {
268            return Err(Error::InvalidEntrypoint(ep.into()));
269        }
270
271        let mut inputs = Inputs {
272            entrypoint,
273            ..Default::default()
274        };
275
276        for input in iter {
277            inputs.add_input(input.as_ref())?;
278        }
279
280        Ok(inputs)
281    }
282
283    /// Consumes `self` and returns the inner index map.
284    pub fn into_inner(self) -> InputsInner {
285        self.inputs
286    }
287
288    /// Converts a set of inputs to a set of engine inputs.
289    ///
290    /// Returns `Ok(Some(_))` if the inputs are not empty.
291    ///
292    /// Returns `Ok(None)` if the inputs are empty.
293    ///
294    /// When the inputs are not empty, the return type contained in `Some(_)` is
295    /// a tuple of,
296    ///
297    /// - the name of the callee (the name of the task or workflow being run),
298    /// - the transformed engine inputs, and
299    /// - a map containing the origin path for each provided input key.
300    pub fn into_engine_inputs(
301        self,
302        document: &Document,
303    ) -> anyhow::Result<Option<(String, EngineInputs, OriginPaths)>> {
304        let (origins, values) = self.inputs.into_iter().fold(
305            (IndexMap::new(), serde_json::Map::new()),
306            |(mut origins, mut values), (key, (origin, value))| {
307                origins.insert(key.clone(), origin);
308                values.insert(key, value);
309                (origins, values)
310            },
311        );
312
313        let result = EngineInputs::parse_object(document, values)?;
314
315        if let Some((derived, _)) = &result
316            && let Some(ep) = &self.entrypoint
317            && derived != ep
318        {
319            bail!(format!(
320                "supplied entrypoint `{ep}` does not match derived entrypoint `{derived}`"
321            ))
322        }
323
324        Ok(result.map(|(callee_name, inputs)| {
325            let callee_prefix = format!("{callee_name}.");
326
327            let origins = origins
328                .into_iter()
329                .map(|(key, path)| {
330                    if let Some(key) = key.strip_prefix(&callee_prefix) {
331                        (key.to_owned(), path)
332                    } else {
333                        (key, path)
334                    }
335                })
336                .collect::<IndexMap<String, PathBuf>>();
337
338            (callee_name, inputs, OriginPaths::from(origins))
339        }))
340    }
341}
342
343impl Deref for Inputs {
344    type Target = InputsInner;
345
346    fn deref(&self) -> &Self::Target {
347        &self.inputs
348    }
349}
350
351impl DerefMut for Inputs {
352    fn deref_mut(&mut self) -> &mut Self::Target {
353        &mut self.inputs
354    }
355}
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360
361    #[test]
362    fn identifier_regex() {
363        assert!(IDENTIFIER_REGEX.is_match("here_is_an.identifier"));
364        assert!(!IDENTIFIER_REGEX.is_match("here is not an identifier"));
365    }
366
367    #[test]
368    fn assume_string_regex() {
369        // Matches.
370        assert!(ASSUME_STRING_REGEX.is_match(""));
371        assert!(ASSUME_STRING_REGEX.is_match("fooBAR082"));
372        assert!(ASSUME_STRING_REGEX.is_match("foo bar baz"));
373
374        // Non-matches.
375        assert!(!ASSUME_STRING_REGEX.is_match("[1, a]"));
376    }
377
378    #[test]
379    fn file_parsing() {
380        // A valid JSON file path.
381        let input = "./tests/fixtures/inputs_one.json".parse::<Input>().unwrap();
382        assert!(matches!(
383            input,
384            Input::File(path) if path.to_str().unwrap() == "./tests/fixtures/inputs_one.json"
385        ));
386
387        // A valid YAML file path.
388        let input = "./tests/fixtures/inputs_three.yml"
389            .parse::<Input>()
390            .unwrap();
391        assert!(matches!(
392            input,
393            Input::File(path) if path.to_str().unwrap() == "./tests/fixtures/inputs_three.yml"
394        ));
395
396        // A missing file path.
397        let err = "./tests/fixtures/missing.json"
398            .parse::<Input>()
399            .unwrap_err();
400        assert!(matches!(
401            err,
402            Error::FileNotFound(path) if path.to_str().unwrap() == "./tests/fixtures/missing.json"
403        ));
404    }
405
406    #[test]
407    fn key_value_pair_parsing() {
408        // A standard key-value pair.
409        let input = r#"foo="bar""#.parse::<Input>().unwrap();
410        let (key, value) = input.unwrap_pair();
411        assert_eq!(key, "foo");
412        assert_eq!(value.as_str().unwrap(), "bar");
413
414        // A standard key-value pair.
415        let input = r#"foo.bar_baz_quux="qil""#.parse::<Input>().unwrap();
416        let (key, value) = input.unwrap_pair();
417        assert_eq!(key, "foo.bar_baz_quux");
418        assert_eq!(value.as_str().unwrap(), "qil");
419
420        // An invalid identifier for the key.
421        let err = r#"foo$="bar""#.parse::<Input>().unwrap_err();
422        assert!(matches!(
423                err,
424                Error::InvalidPair {
425                    pair,
426                    reason
427                } if pair == r#"foo$="bar""# &&
428                reason == r"key `foo$` did not match the identifier regex (`^([a-zA-Z][a-zA-Z0-9_.]*)$`)"));
429
430        // A value that is valid despite that value not being valid as a key.
431        let input = r#"foo="bar$""#.parse::<Input>().unwrap();
432        let (key, value) = input.unwrap_pair();
433        assert_eq!(key, "foo");
434        assert_eq!(value.as_str().unwrap(), "bar$");
435    }
436
437    #[test]
438    fn coalesce() {
439        // Helper functions.
440        fn check_string_value(inputs: &Inputs, key: &str, value: &str) {
441            let (_, input) = inputs.get(key).unwrap();
442            assert_eq!(input.as_str().unwrap(), value);
443        }
444
445        fn check_float_value(inputs: &Inputs, key: &str, value: f64) {
446            let (_, input) = inputs.get(key).unwrap();
447            assert_eq!(input.as_f64().unwrap(), value);
448        }
449
450        fn check_boolean_value(inputs: &Inputs, key: &str, value: bool) {
451            let (_, input) = inputs.get(key).unwrap();
452            assert_eq!(input.as_bool().unwrap(), value);
453        }
454
455        fn check_integer_value(inputs: &Inputs, key: &str, value: i64) {
456            let (_, input) = inputs.get(key).unwrap();
457            assert_eq!(input.as_i64().unwrap(), value);
458        }
459
460        // The standard coalescing order.
461        let inputs = Inputs::coalesce(
462            [
463                "./tests/fixtures/inputs_one.json",
464                "./tests/fixtures/inputs_two.json",
465                "./tests/fixtures/inputs_three.yml",
466            ],
467            Some("foo".to_string()),
468        )
469        .unwrap();
470
471        assert_eq!(inputs.len(), 5);
472        check_string_value(&inputs, "foo", "bar");
473        check_float_value(&inputs, "baz", 128.0);
474        check_string_value(&inputs, "quux", "qil");
475        check_string_value(&inputs, "new.key", "foobarbaz");
476        check_string_value(&inputs, "new_two.key", "bazbarfoo");
477
478        // The opposite coalescing order.
479        let inputs = Inputs::coalesce(
480            [
481                "./tests/fixtures/inputs_three.yml",
482                "./tests/fixtures/inputs_two.json",
483                "./tests/fixtures/inputs_one.json",
484            ],
485            Some("name_ex".to_string()),
486        )
487        .unwrap();
488
489        assert_eq!(inputs.len(), 5);
490        check_string_value(&inputs, "foo", "bar");
491        check_float_value(&inputs, "baz", 42.0);
492        check_string_value(&inputs, "quux", "qil");
493        check_string_value(&inputs, "new.key", "foobarbaz");
494        check_string_value(&inputs, "new_two.key", "bazbarfoo");
495
496        // An example with some random key-value pairs thrown in.
497        let inputs = Inputs::coalesce(
498            [
499                r#"sandwich=-100"#,
500                "./tests/fixtures/inputs_one.json",
501                "./tests/fixtures/inputs_two.json",
502                r#"quux="jacks""#,
503                "./tests/fixtures/inputs_three.yml",
504                r#"baz=false"#,
505            ],
506            None,
507        )
508        .unwrap();
509
510        assert_eq!(inputs.len(), 6);
511        check_string_value(&inputs, "foo", "bar");
512        check_boolean_value(&inputs, "baz", false);
513        check_string_value(&inputs, "quux", "jacks");
514        check_string_value(&inputs, "new.key", "foobarbaz");
515        check_string_value(&inputs, "new_two.key", "bazbarfoo");
516        check_integer_value(&inputs, "sandwich", -100);
517
518        // An invalid key-value pair.
519        let error = Inputs::coalesce(["./tests/fixtures/inputs_one.json", "foo=baz#bar"], None)
520            .unwrap_err();
521        assert!(matches!(
522            error,
523            Error::Deserialize(value) if value == "baz#bar"
524        ));
525
526        // A missing file.
527        let error = Inputs::coalesce(
528            [
529                "./tests/fixtures/inputs_one.json",
530                "./tests/fixtures/inputs_two.json",
531                "./tests/fixtures/inputs_three.yml",
532                "./tests/fixtures/missing.json",
533            ],
534            None,
535        )
536        .unwrap_err();
537        assert!(matches!(
538                error,
539                Error::FileNotFound(path) if path.to_str().unwrap() == "./tests/fixtures/missing.json"));
540    }
541
542    #[test]
543    fn multiple_equal_signs() {
544        let (key, value) = r#"foo="bar=baz""#.parse::<Input>().unwrap().unwrap_pair();
545        assert_eq!(key, "foo");
546        assert_eq!(value.as_str().unwrap(), "bar=baz");
547    }
548}