wdl_cli/
inputs.rs

1//! Inputs parsed in from the command line.
2
3use std::ops::Deref;
4use std::ops::DerefMut;
5use std::path::Path;
6use std::path::PathBuf;
7use std::str::FromStr;
8use std::sync::LazyLock;
9
10use indexmap::IndexMap;
11use regex::Regex;
12use thiserror::Error;
13use wdl_analysis::document::Document;
14use wdl_engine::Inputs as EngineInputs;
15use wdl_engine::Object;
16use wdl_engine::PrimitiveValue;
17use wdl_engine::Value;
18
19pub mod file;
20pub mod origin_paths;
21
22pub use file::InputFile;
23pub use origin_paths::OriginPaths;
24
25/// A regex that matches a valid identifier.
26///
27/// This is useful when recognizing whether a key provided on the command line
28/// is a valid identifier.
29static IDENTIFIER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
30    // SAFETY: this is checked statically with tests to always unwrap.
31    Regex::new(r"^([a-zA-Z][a-zA-Z0-9_.]*)$").unwrap()
32});
33
34/// If a value in a key-value pair passed in on the command line cannot be
35/// resolved to a WDL type, this regex is compared to the value.
36///
37/// If the regex matches, we assume the value is a string.
38static ASSUME_STRING_REGEX: LazyLock<Regex> = LazyLock::new(|| {
39    // SAFETY: this is checked statically with tests to always unwrap.
40    Regex::new(r"^[\w /~.]*$").unwrap()
41});
42
43/// An error related to inputs.
44#[derive(Error, Debug)]
45pub enum Error {
46    /// A file error.
47    #[error(transparent)]
48    File(#[from] file::Error),
49
50    /// A file was specified on the command line but not found.
51    #[error("file `{0}` was not found")]
52    FileNotFound(PathBuf),
53
54    /// Encountered an invalid key-value pair.
55    #[error("invalid key-value pair `{pair}`: {reason}")]
56    InvalidPair {
57        /// The string-value of the pair.
58        pair: String,
59
60        /// The reason the pair was not valid.
61        reason: String,
62    },
63
64    /// A deserialization error.
65    #[error("unable to deserialize `{0}` as a valid WDL value")]
66    Deserialize(String),
67}
68
69/// A [`Result`](std::result::Result) with an [`Error`].
70pub type Result<T> = std::result::Result<T, Error>;
71
72/// An input parsed from the command line.
73#[derive(Clone, Debug)]
74pub enum Input {
75    /// A file.
76    File(
77        /// The path to the file.
78        ///
79        /// If this input is successfully created, the input is guaranteed to
80        /// exist at the time the inputs were processed.
81        PathBuf,
82    ),
83
84    /// A key-value pair representing an input.
85    Pair {
86        /// The key.
87        key: String,
88
89        /// The value.
90        value: Value,
91    },
92}
93
94impl Input {
95    /// Attempts to return a reference to the inner [`Path`].
96    ///
97    /// * If the input is a [`Input::File`], a reference to the inner path is
98    ///   returned wrapped in [`Some`].
99    /// * Otherwise, [`None`] is returned.
100    pub fn as_file(&self) -> Option<&Path> {
101        match self {
102            Input::File(p) => Some(p.as_path()),
103            _ => None,
104        }
105    }
106
107    /// Consumes `self` and attempts to return the inner [`PathBuf`].
108    ///
109    /// * If the input is a [`Input::File`], the inner path buffer is returned
110    ///   wrapped in [`Some`].
111    /// * Otherwise, [`None`] is returned.
112    pub fn into_file(self) -> Option<PathBuf> {
113        match self {
114            Input::File(p) => Some(p),
115            _ => None,
116        }
117    }
118
119    /// Consumes `self` and returns the inner [`PathBuf`].
120    ///
121    /// # Panics
122    ///
123    /// If the input is not a [`Input::File`].
124    pub fn unwrap_file(self) -> PathBuf {
125        match self {
126            Input::File(p) => p,
127            v => panic!("{v:?} is not an `Input::File`"),
128        }
129    }
130
131    /// Attempts to return a reference to the inner key-value pair.
132    ///
133    /// * If the input is a [`Input::Pair`], a reference to the inner key and
134    ///   value is returned wrapped in [`Some`].
135    /// * Otherwise, [`None`] is returned.
136    pub fn as_pair(&self) -> Option<(&str, &Value)> {
137        match self {
138            Input::Pair { key, value } => Some((key.as_str(), value)),
139            _ => None,
140        }
141    }
142
143    /// Consumes `self` and attempts to return the inner key-value pair.
144    ///
145    /// * If the input is a [`Input::Pair`], the inner key-value pair is
146    ///   returned wrapped in [`Some`].
147    /// * Otherwise, [`None`] is returned.
148    pub fn into_pair(self) -> Option<(String, Value)> {
149        match self {
150            Input::Pair { key, value } => Some((key, value)),
151            _ => None,
152        }
153    }
154
155    /// Consumes `self` and returns the inner key-value pair.
156    ///
157    /// # Panics
158    ///
159    /// If the input is not a [`Input::Pair`].
160    pub fn unwrap_pair(self) -> (String, Value) {
161        match self {
162            Input::Pair { key, value } => (key, value),
163            v => panic!("{v:?} is not an `Input::Pair`"),
164        }
165    }
166}
167
168impl FromStr for Input {
169    type Err = Error;
170
171    fn from_str(s: &str) -> std::result::Result<Self, Error> {
172        match s.split_once("=") {
173            Some((key, value)) => {
174                if !IDENTIFIER_REGEX.is_match(key) {
175                    return Err(Error::InvalidPair {
176                        pair: s.to_string(),
177                        reason: format!(
178                            "key `{}` did not match the identifier regex (`{}`)",
179                            key,
180                            IDENTIFIER_REGEX.as_str()
181                        ),
182                    });
183                }
184
185                let value = serde_json::from_str(value).or_else(|_| {
186                    if ASSUME_STRING_REGEX.is_match(value) {
187                        Ok(Value::Primitive(PrimitiveValue::String(
188                            value.to_owned().into(),
189                        )))
190                    } else {
191                        Err(Error::Deserialize(value.to_owned()))
192                    }
193                })?;
194
195                Ok(Input::Pair {
196                    key: key.to_owned(),
197                    value,
198                })
199            }
200            None => {
201                let path = PathBuf::from(s);
202
203                if !path.exists() {
204                    return Err(Error::FileNotFound(path));
205                }
206
207                Ok(Input::File(path))
208            }
209        }
210    }
211}
212
213/// The inner type for inputs (for convenience).
214type InputsInner = IndexMap<String, (PathBuf, Value)>;
215
216/// A set of inputs parsed from the command line and compiled on top of one
217/// another.
218#[derive(Clone, Debug, Default)]
219pub struct Inputs(InputsInner);
220
221impl Inputs {
222    /// Adds an input read from the command line.
223    fn add_input(&mut self, input: &str) -> Result<()> {
224        match input.parse::<Input>()? {
225            Input::File(path) => {
226                let inputs = InputFile::read(&path).map_err(Error::File)?;
227                self.extend(inputs.into_inner());
228            }
229            Input::Pair { key, value } => {
230                // SAFETY: we expect that the current working directory is
231                // always available for the platforms that `wdl` will run
232                // within.
233                let cwd = std::env::current_dir().unwrap();
234                self.insert(key, (cwd, value));
235            }
236        };
237
238        Ok(())
239    }
240
241    /// Attempts to coalesce a set of inputs into an [`Inputs`].
242    pub fn coalesce<T, V>(iter: T) -> Result<Self>
243    where
244        T: IntoIterator<Item = V>,
245        V: AsRef<str>,
246    {
247        let mut inputs = Inputs::default();
248
249        for input in iter {
250            inputs.add_input(input.as_ref())?;
251        }
252
253        Ok(inputs)
254    }
255
256    /// Consumes `self` and returns the inner index map.
257    pub fn into_inner(self) -> InputsInner {
258        self.0
259    }
260
261    /// Converts a set of inputs to a set of engine inputs.
262    ///
263    /// Returns `Ok(Some(_))` if the inputs are not empty.
264    ///
265    /// Returns `Ok(None)` if the inputs are empty.
266    ///
267    /// When the inputs are not empty, the return type contained in `Some(_)` is
268    /// a tuple of,
269    ///
270    /// - the name of the callee (the name of the task or workflow being run),
271    /// - the transformed engine inputs, and
272    /// - a map containing the origin path for each provided input key.
273    pub fn into_engine_inputs(
274        self,
275        document: &Document,
276    ) -> anyhow::Result<Option<(String, EngineInputs, OriginPaths)>> {
277        let (origins, values): (IndexMap<_, _>, IndexMap<_, _>) = self
278            .0
279            .into_iter()
280            .map(|(key, (origin, value))| ((key.clone(), origin), (key, value)))
281            .unzip();
282
283        let object = Object::from(values);
284        let result = EngineInputs::parse_object(document, object)?;
285
286        Ok(result.map(|(callee_name, inputs)| {
287            let callee_prefix = format!("{}.", callee_name);
288
289            let origins = origins
290                .into_iter()
291                .map(|(key, path)| {
292                    if let Some(key) = key.strip_prefix(&callee_prefix) {
293                        (key.to_owned(), path)
294                    } else {
295                        (key, path)
296                    }
297                })
298                .collect::<IndexMap<String, PathBuf>>();
299
300            (callee_name, inputs, OriginPaths::from(origins))
301        }))
302    }
303}
304
305impl Deref for Inputs {
306    type Target = InputsInner;
307
308    fn deref(&self) -> &Self::Target {
309        &self.0
310    }
311}
312
313impl DerefMut for Inputs {
314    fn deref_mut(&mut self) -> &mut Self::Target {
315        &mut self.0
316    }
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322
323    #[test]
324    fn identifier_regex() {
325        assert!(IDENTIFIER_REGEX.is_match("here_is_an.identifier"));
326        assert!(!IDENTIFIER_REGEX.is_match("here is not an identifier"));
327    }
328
329    #[test]
330    fn assume_string_regex() {
331        // Matches.
332        assert!(ASSUME_STRING_REGEX.is_match(""));
333        assert!(ASSUME_STRING_REGEX.is_match("fooBAR082"));
334        assert!(ASSUME_STRING_REGEX.is_match("foo bar baz"));
335
336        // Non-matches.
337        assert!(!ASSUME_STRING_REGEX.is_match("[1, a]"));
338    }
339
340    #[test]
341    fn file_parsing() {
342        // A valid JSON file path.
343        let input = "./tests/fixtures/inputs_one.json".parse::<Input>().unwrap();
344        assert!(matches!(
345            input,
346            Input::File(path) if path.to_str().unwrap() == "./tests/fixtures/inputs_one.json"
347        ));
348
349        // A valid YAML file path.
350        let input = "./tests/fixtures/inputs_three.yml"
351            .parse::<Input>()
352            .unwrap();
353        assert!(matches!(
354            input,
355            Input::File(path) if path.to_str().unwrap() == "./tests/fixtures/inputs_three.yml"
356        ));
357
358        // A missing file path.
359        let err = "./tests/fixtures/missing.json"
360            .parse::<Input>()
361            .unwrap_err();
362        assert!(matches!(
363            err,
364            Error::FileNotFound(path) if path.to_str().unwrap() == "./tests/fixtures/missing.json"
365        ));
366    }
367
368    #[test]
369    fn key_value_pair_parsing() {
370        // A standard key-value pair.
371        let input = r#"foo="bar""#.parse::<Input>().unwrap();
372        let (key, value) = input.unwrap_pair();
373        assert_eq!(key, "foo");
374        assert_eq!(value.unwrap_string().as_str(), "bar");
375
376        // A standard key-value pair.
377        let input = r#"foo.bar_baz_quux="qil""#.parse::<Input>().unwrap();
378        let (key, value) = input.unwrap_pair();
379        assert_eq!(key, "foo.bar_baz_quux");
380        assert_eq!(value.unwrap_string().as_str(), "qil");
381
382        // An invalid identifier for the key.
383        let err = r#"foo$="bar""#.parse::<Input>().unwrap_err();
384        assert!(matches!(
385                err,
386                Error::InvalidPair {
387                    pair,
388                    reason
389                } if pair == r#"foo$="bar""# &&
390                reason == r"key `foo$` did not match the identifier regex (`^([a-zA-Z][a-zA-Z0-9_.]*)$`)"));
391
392        // A value that is valid despite that value not being valid as a key.
393        let input = r#"foo="bar$""#.parse::<Input>().unwrap();
394        let (key, value) = input.unwrap_pair();
395        assert_eq!(key, "foo");
396        assert_eq!(value.unwrap_string().as_str(), "bar$");
397    }
398
399    #[test]
400    fn coalesce() {
401        // Helper functions.
402        fn check_string_value(inputs: &Inputs, key: &str, value: &str) {
403            let (_, input) = inputs.get(key).unwrap();
404            assert_eq!(input.as_string().unwrap().as_str(), value);
405        }
406
407        fn check_float_value(inputs: &Inputs, key: &str, value: f64) {
408            let (_, input) = inputs.get(key).unwrap();
409            assert_eq!(input.as_float().unwrap(), value);
410        }
411
412        fn check_boolean_value(inputs: &Inputs, key: &str, value: bool) {
413            let (_, input) = inputs.get(key).unwrap();
414            assert_eq!(input.as_boolean().unwrap(), value);
415        }
416
417        fn check_integer_value(inputs: &Inputs, key: &str, value: i64) {
418            let (_, input) = inputs.get(key).unwrap();
419            assert_eq!(input.as_integer().unwrap(), value);
420        }
421
422        // The standard coalescing order.
423        let inputs = Inputs::coalesce([
424            "./tests/fixtures/inputs_one.json",
425            "./tests/fixtures/inputs_two.json",
426            "./tests/fixtures/inputs_three.yml",
427        ])
428        .unwrap();
429
430        assert_eq!(inputs.len(), 5);
431        check_string_value(&inputs, "foo", "bar");
432        check_float_value(&inputs, "baz", 128.0);
433        check_string_value(&inputs, "quux", "qil");
434        check_string_value(&inputs, "new", "foobarbaz");
435        check_string_value(&inputs, "new_two", "bazbarfoo");
436
437        // The opposite coalescing order.
438        let inputs = Inputs::coalesce([
439            "./tests/fixtures/inputs_three.yml",
440            "./tests/fixtures/inputs_two.json",
441            "./tests/fixtures/inputs_one.json",
442        ])
443        .unwrap();
444
445        assert_eq!(inputs.len(), 5);
446        check_string_value(&inputs, "foo", "bar");
447        check_float_value(&inputs, "baz", 42.0);
448        check_string_value(&inputs, "quux", "qil");
449        check_string_value(&inputs, "new", "foobarbaz");
450        check_string_value(&inputs, "new_two", "bazbarfoo");
451
452        // An example with some random key-value pairs thrown in.
453        let inputs = Inputs::coalesce([
454            r#"sandwich=-100"#,
455            "./tests/fixtures/inputs_one.json",
456            "./tests/fixtures/inputs_two.json",
457            r#"quux="jacks""#,
458            "./tests/fixtures/inputs_three.yml",
459            r#"baz=false"#,
460        ])
461        .unwrap();
462
463        assert_eq!(inputs.len(), 6);
464        check_string_value(&inputs, "foo", "bar");
465        check_boolean_value(&inputs, "baz", false);
466        check_string_value(&inputs, "quux", "jacks");
467        check_string_value(&inputs, "new", "foobarbaz");
468        check_string_value(&inputs, "new_two", "bazbarfoo");
469        check_integer_value(&inputs, "sandwich", -100);
470
471        // An invalid key-value pair.
472        let error =
473            Inputs::coalesce(["./tests/fixtures/inputs_one.json", "foo=baz#bar"]).unwrap_err();
474        assert!(matches!(
475            error,
476            Error::Deserialize(value) if value == "baz#bar"
477        ));
478
479        // A missing file.
480        let error = Inputs::coalesce([
481            "./tests/fixtures/inputs_one.json",
482            "./tests/fixtures/inputs_two.json",
483            "./tests/fixtures/inputs_three.yml",
484            "./tests/fixtures/missing.json",
485        ])
486        .unwrap_err();
487        assert!(matches!(
488                error,
489                Error::FileNotFound(path) if path.to_str().unwrap() == "./tests/fixtures/missing.json"));
490    }
491
492    #[test]
493    fn multiple_equal_signs() {
494        let (key, value) = r#"foo="bar=baz""#.parse::<Input>().unwrap().unwrap_pair();
495        assert_eq!(key, "foo");
496        assert_eq!(&**value.unwrap_string(), "bar=baz");
497    }
498}