wdl_cli/
inputs.rs

1//! Inputs parsed in from the command line.
2
3use std::ops::Deref;
4use std::ops::DerefMut;
5use std::str::FromStr;
6use std::sync::LazyLock;
7
8use anyhow::bail;
9use indexmap::IndexMap;
10use regex::Regex;
11use serde_json::Value;
12use thiserror::Error;
13use wdl_analysis::Document;
14use wdl_engine::Inputs as EngineInputs;
15use wdl_engine::path::EvaluationPath;
16
17pub mod file;
18pub mod origin_paths;
19
20pub use file::InputFile;
21pub use origin_paths::OriginPaths;
22
23/// A regex that matches a valid identifier.
24///
25/// This is useful when recognizing whether a key provided on the command line
26/// is a valid identifier.
27static IDENTIFIER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
28    // SAFETY: this is checked statically with tests to always unwrap.
29    Regex::new(r"^([a-zA-Z][a-zA-Z0-9_.]*)$").unwrap()
30});
31
32/// If a value in a key-value pair passed in on the command line cannot be
33/// resolved to a WDL type, this regex is compared to the value.
34///
35/// If the regex matches, we assume the value is a string.
36static ASSUME_STRING_REGEX: LazyLock<Regex> = LazyLock::new(|| {
37    // SAFETY: this is checked statically with tests to always unwrap.
38    Regex::new(r"^[^\[\]{}]*$").unwrap()
39});
40
41/// An error related to inputs.
42#[derive(Error, Debug)]
43pub enum Error {
44    /// Failed to determine the current working directory.
45    #[error("failed to determine the current working directory")]
46    NoCurrentWorkingDirectory,
47
48    /// A file error.
49    #[error(transparent)]
50    File(#[from] file::Error),
51
52    /// Encountered an invalid key-value pair.
53    #[error("invalid key-value pair `{pair}`: {reason}")]
54    InvalidPair {
55        /// The string-value of the pair.
56        pair: String,
57
58        /// The reason the pair was not valid.
59        reason: String,
60    },
61
62    /// An invalid entrypoint was specified.
63    #[error("invalid entrypoint `{0}`")]
64    InvalidEntrypoint(String),
65
66    /// A deserialization error.
67    #[error("unable to deserialize `{0}` as a valid WDL value")]
68    Deserialize(String),
69}
70
71/// A [`Result`](std::result::Result) with an [`Error`](enum@self::Error).
72pub type Result<T> = std::result::Result<T, Error>;
73
74/// An input parsed from the command line.
75#[derive(Clone, Debug)]
76pub enum Input {
77    /// A file.
78    File(
79        /// The path to the file.
80        ///
81        /// If this input is successfully created, the input is guaranteed to
82        /// exist at the time the inputs were processed.
83        EvaluationPath,
84    ),
85    /// A key-value pair representing an input.
86    Pair {
87        /// The key.
88        key: String,
89
90        /// The value.
91        value: Value,
92    },
93}
94
95impl Input {
96    /// Attempts to return a reference to the inner [`EvaluationPath`].
97    ///
98    /// * If the input is a [`Input::File`], a reference to the inner path is
99    ///   returned wrapped in [`Some`].
100    /// * Otherwise, [`None`] is returned.
101    pub fn as_file(&self) -> Option<&EvaluationPath> {
102        match self {
103            Input::File(p) => Some(p),
104            _ => None,
105        }
106    }
107
108    /// Consumes `self` and attempts to return the inner [`EvaluationPath`].
109    ///
110    /// * If the input is a [`Input::File`], the inner path buffer is returned
111    ///   wrapped in [`Some`].
112    /// * Otherwise, [`None`] is returned.
113    pub fn into_file(self) -> Option<EvaluationPath> {
114        match self {
115            Input::File(p) => Some(p),
116            _ => None,
117        }
118    }
119
120    /// Consumes `self` and returns the inner [`EvaluationPath`].
121    ///
122    /// # Panics
123    ///
124    /// If the input is not a [`Input::File`].
125    pub fn unwrap_file(self) -> EvaluationPath {
126        match self {
127            Input::File(p) => p,
128            v => panic!("{v:?} is not an `Input::File`"),
129        }
130    }
131
132    /// Attempts to return a reference to the inner key-value pair.
133    ///
134    /// * If the input is a [`Input::Pair`], a reference to the inner key and
135    ///   value is returned wrapped in [`Some`].
136    /// * Otherwise, [`None`] is returned.
137    pub fn as_pair(&self) -> Option<(&str, &Value)> {
138        match self {
139            Input::Pair { key, value } => Some((key.as_str(), value)),
140            _ => None,
141        }
142    }
143
144    /// Consumes `self` and attempts to return the inner key-value pair.
145    ///
146    /// * If the input is a [`Input::Pair`], the inner key-value pair is
147    ///   returned wrapped in [`Some`].
148    /// * Otherwise, [`None`] is returned.
149    pub fn into_pair(self) -> Option<(String, Value)> {
150        match self {
151            Input::Pair { key, value } => Some((key, value)),
152            _ => None,
153        }
154    }
155
156    /// Consumes `self` and returns the inner key-value pair.
157    ///
158    /// # Panics
159    ///
160    /// If the input is not a [`Input::Pair`].
161    pub fn unwrap_pair(self) -> (String, Value) {
162        match self {
163            Input::Pair { key, value } => (key, value),
164            v => panic!("{v:?} is not an `Input::Pair`"),
165        }
166    }
167}
168
169impl FromStr for Input {
170    type Err = Error;
171
172    fn from_str(s: &str) -> std::result::Result<Self, Error> {
173        match s.split_once("=") {
174            Some((key, value)) => {
175                if !IDENTIFIER_REGEX.is_match(key) {
176                    return Err(Error::InvalidPair {
177                        pair: s.to_string(),
178                        reason: format!(
179                            "key `{}` did not match the identifier regex (`{}`)",
180                            key,
181                            IDENTIFIER_REGEX.as_str()
182                        ),
183                    });
184                }
185
186                let value = serde_json::from_str(value).or_else(|_| {
187                    if ASSUME_STRING_REGEX.is_match(value) {
188                        Ok(Value::String(value.to_owned()))
189                    } else {
190                        Err(Error::Deserialize(value.to_owned()))
191                    }
192                })?;
193
194                Ok(Input::Pair {
195                    key: key.to_owned(),
196                    value,
197                })
198            }
199            None => {
200                let path: EvaluationPath = s.parse().map_err(|e| file::Error::Path {
201                    path: s.to_string(),
202                    error: e,
203                })?;
204                if let Some(path) = path.as_local()
205                    && !path.exists()
206                {
207                    return Err(file::Error::NotFound(path.to_path_buf()).into());
208                }
209
210                Ok(Input::File(path))
211            }
212        }
213    }
214}
215
216/// The inner type for inputs (for convenience).
217type InputsInner = IndexMap<String, (EvaluationPath, Value)>;
218
219/// A set of inputs parsed from the command line and compiled on top of one
220/// another.
221#[derive(Clone, Debug, Default)]
222pub struct Inputs {
223    /// The actual inputs map.
224    inputs: InputsInner,
225    /// The name of the task or workflow these inputs are provided for.
226    entrypoint: Option<String>,
227}
228
229impl Inputs {
230    /// Adds an input read from the command line.
231    async fn add_input(&mut self, input: &str) -> Result<()> {
232        match input.parse::<Input>()? {
233            Input::File(path) => {
234                let inputs = InputFile::read(&path).await.map_err(Error::File)?;
235                self.extend(inputs.into_inner());
236            }
237            Input::Pair { key, value } => {
238                let cwd = std::env::current_dir().map_err(|_| Error::NoCurrentWorkingDirectory)?;
239
240                let key = if let Some(prefix) = &self.entrypoint {
241                    format!("{prefix}.{key}")
242                } else {
243                    key
244                };
245                self.insert(key, (EvaluationPath::Local(cwd), value));
246            }
247        };
248
249        Ok(())
250    }
251
252    /// Attempts to coalesce a set of inputs into an [`Inputs`].
253    ///
254    /// `entrypoint` is the task or workflow the inputs are for.
255    /// If `entrypoint` is `Some(_)` then it will be prefixed to each
256    /// [`Input::Pair`]. Keys inside a [`Input::File`] must always have this
257    /// common prefix specified. If `entrypoint` is `None` then all of the
258    /// inputs in `iter` must be prefixed with the task or workflow name.
259    pub async fn coalesce<T, V>(iter: T, entrypoint: Option<String>) -> Result<Self>
260    where
261        T: IntoIterator<Item = V>,
262        V: AsRef<str>,
263    {
264        if let Some(ep) = &entrypoint
265            && ep.contains('.')
266        {
267            return Err(Error::InvalidEntrypoint(ep.into()));
268        }
269
270        let mut inputs = Inputs {
271            entrypoint,
272            ..Default::default()
273        };
274
275        for input in iter {
276            inputs.add_input(input.as_ref()).await?;
277        }
278
279        Ok(inputs)
280    }
281
282    /// Consumes `self` and returns the inner index map.
283    pub fn into_inner(self) -> InputsInner {
284        self.inputs
285    }
286
287    /// Converts a set of inputs to a set of engine inputs.
288    ///
289    /// Returns `Ok(Some(_))` if the inputs are not empty.
290    ///
291    /// Returns `Ok(None)` if the inputs are empty.
292    ///
293    /// When the inputs are not empty, the return type contained in `Some(_)` is
294    /// a tuple of,
295    ///
296    /// - the name of the callee (the name of the task or workflow being run),
297    /// - the transformed engine inputs, and
298    /// - a map containing the origin path for each provided input key.
299    pub fn into_engine_inputs(
300        self,
301        document: &Document,
302    ) -> anyhow::Result<Option<(String, EngineInputs, OriginPaths)>> {
303        let (origins, values) = self.inputs.into_iter().fold(
304            (IndexMap::new(), serde_json::Map::new()),
305            |(mut origins, mut values), (key, (origin, value))| {
306                origins.insert(key.clone(), origin);
307                values.insert(key, value);
308                (origins, values)
309            },
310        );
311
312        let result = EngineInputs::parse_object(document, values)?;
313
314        if let Some((derived, _)) = &result
315            && let Some(ep) = &self.entrypoint
316            && derived != ep
317        {
318            bail!(format!(
319                "supplied entrypoint `{ep}` does not match derived entrypoint `{derived}`"
320            ))
321        }
322
323        Ok(result.map(|(callee_name, inputs)| {
324            let callee_prefix = format!("{callee_name}.");
325
326            let origins = origins
327                .into_iter()
328                .map(|(key, path)| {
329                    if let Some(key) = key.strip_prefix(&callee_prefix) {
330                        (key.to_owned(), path)
331                    } else {
332                        (key, path)
333                    }
334                })
335                .collect::<IndexMap<_, _>>();
336
337            (callee_name, inputs, OriginPaths::Map(origins))
338        }))
339    }
340}
341
342impl Deref for Inputs {
343    type Target = InputsInner;
344
345    fn deref(&self) -> &Self::Target {
346        &self.inputs
347    }
348}
349
350impl DerefMut for Inputs {
351    fn deref_mut(&mut self) -> &mut Self::Target {
352        &mut self.inputs
353    }
354}
355
356#[cfg(test)]
357mod tests {
358    use pretty_assertions::assert_eq;
359
360    use super::*;
361
362    #[test]
363    fn identifier_regex() {
364        assert!(IDENTIFIER_REGEX.is_match("here_is_an.identifier"));
365        assert!(!IDENTIFIER_REGEX.is_match("here is not an identifier"));
366    }
367
368    #[test]
369    fn assume_string_regex() {
370        // Matches.
371        assert!(ASSUME_STRING_REGEX.is_match(""));
372        assert!(ASSUME_STRING_REGEX.is_match("fooBAR082"));
373        assert!(ASSUME_STRING_REGEX.is_match("foo bar baz"));
374
375        // Non-matches.
376        assert!(!ASSUME_STRING_REGEX.is_match("[1, a]"));
377    }
378
379    #[test]
380    fn file_parsing() {
381        // A valid JSON file path.
382        let input = "./tests/fixtures/inputs_one.json".parse::<Input>().unwrap();
383        assert!(matches!(
384            input,
385            Input::File(path) if path.to_str().unwrap().replace("\\", "/") == "tests/fixtures/inputs_one.json"
386        ));
387
388        // A valid YAML file path.
389        let input = "tests/fixtures/inputs_three.yml".parse::<Input>().unwrap();
390        assert!(matches!(
391            input,
392            Input::File(path) if path.to_str().unwrap().replace("\\", "/") == "tests/fixtures/inputs_three.yml"
393        ));
394
395        // A missing file path.
396        let err = "./tests/fixtures/missing.json"
397            .parse::<Input>()
398            .unwrap_err();
399        assert_eq!(
400            err.to_string().replace("\\", "/"),
401            "input file `tests/fixtures/missing.json` was not found"
402        );
403    }
404
405    #[test]
406    fn key_value_pair_parsing() {
407        // A standard key-value pair.
408        let input = r#"foo="bar""#.parse::<Input>().unwrap();
409        let (key, value) = input.unwrap_pair();
410        assert_eq!(key, "foo");
411        assert_eq!(value.as_str().unwrap(), "bar");
412
413        // A standard key-value pair.
414        let input = r#"foo.bar_baz_quux="qil""#.parse::<Input>().unwrap();
415        let (key, value) = input.unwrap_pair();
416        assert_eq!(key, "foo.bar_baz_quux");
417        assert_eq!(value.as_str().unwrap(), "qil");
418
419        // An invalid identifier for the key.
420        let err = r#"foo$="bar""#.parse::<Input>().unwrap_err();
421        assert_eq!(
422            err.to_string(),
423            r#"invalid key-value pair `foo$="bar"`: key `foo$` did not match the identifier regex (`^([a-zA-Z][a-zA-Z0-9_.]*)$`)"#
424        );
425
426        // A value that is valid despite that value not being valid as a key.
427        let input = r#"foo="bar$""#.parse::<Input>().unwrap();
428        let (key, value) = input.unwrap_pair();
429        assert_eq!(key, "foo");
430        assert_eq!(value.as_str().unwrap(), "bar$");
431    }
432
433    #[tokio::test]
434    async fn coalesce() {
435        // Helper functions.
436        fn check_string_value(inputs: &Inputs, key: &str, value: &str) {
437            let (_, input) = inputs.get(key).unwrap();
438            assert_eq!(input.as_str().unwrap(), value);
439        }
440
441        fn check_float_value(inputs: &Inputs, key: &str, value: f64) {
442            let (_, input) = inputs.get(key).unwrap();
443            assert_eq!(input.as_f64().unwrap(), value);
444        }
445
446        fn check_boolean_value(inputs: &Inputs, key: &str, value: bool) {
447            let (_, input) = inputs.get(key).unwrap();
448            assert_eq!(input.as_bool().unwrap(), value);
449        }
450
451        fn check_integer_value(inputs: &Inputs, key: &str, value: i64) {
452            let (_, input) = inputs.get(key).unwrap();
453            assert_eq!(input.as_i64().unwrap(), value);
454        }
455
456        // The standard coalescing order.
457        let inputs = Inputs::coalesce(
458            [
459                "./tests/fixtures/inputs_one.json",
460                "./tests/fixtures/inputs_two.json",
461                "./tests/fixtures/inputs_three.yml",
462            ],
463            Some("foo".to_string()),
464        )
465        .await
466        .unwrap();
467
468        assert_eq!(inputs.len(), 5);
469        check_string_value(&inputs, "foo", "bar");
470        check_float_value(&inputs, "baz", 128.0);
471        check_string_value(&inputs, "quux", "qil");
472        check_string_value(&inputs, "new.key", "foobarbaz");
473        check_string_value(&inputs, "new_two.key", "bazbarfoo");
474
475        // The opposite coalescing order.
476        let inputs = Inputs::coalesce(
477            [
478                "./tests/fixtures/inputs_three.yml",
479                "./tests/fixtures/inputs_two.json",
480                "./tests/fixtures/inputs_one.json",
481            ],
482            Some("name_ex".to_string()),
483        )
484        .await
485        .unwrap();
486
487        assert_eq!(inputs.len(), 5);
488        check_string_value(&inputs, "foo", "bar");
489        check_float_value(&inputs, "baz", 42.0);
490        check_string_value(&inputs, "quux", "qil");
491        check_string_value(&inputs, "new.key", "foobarbaz");
492        check_string_value(&inputs, "new_two.key", "bazbarfoo");
493
494        // An example with some random key-value pairs thrown in.
495        let inputs = Inputs::coalesce(
496            [
497                r#"sandwich=-100"#,
498                "./tests/fixtures/inputs_one.json",
499                "./tests/fixtures/inputs_two.json",
500                r#"quux="jacks""#,
501                "./tests/fixtures/inputs_three.yml",
502                r#"baz=false"#,
503            ],
504            None,
505        )
506        .await
507        .unwrap();
508
509        assert_eq!(inputs.len(), 6);
510        check_string_value(&inputs, "foo", "bar");
511        check_boolean_value(&inputs, "baz", false);
512        check_string_value(&inputs, "quux", "jacks");
513        check_string_value(&inputs, "new.key", "foobarbaz");
514        check_string_value(&inputs, "new_two.key", "bazbarfoo");
515        check_integer_value(&inputs, "sandwich", -100);
516
517        // An invalid key-value pair.
518        let error = Inputs::coalesce(["./tests/fixtures/inputs_one.json", "foo=baz[bar"], None)
519            .await
520            .unwrap_err();
521        assert_eq!(
522            error.to_string(),
523            "unable to deserialize `baz[bar` as a valid WDL value"
524        );
525
526        // A missing file.
527        let error = Inputs::coalesce(
528            [
529                "./tests/fixtures/inputs_one.json",
530                "./tests/fixtures/inputs_two.json",
531                "./tests/fixtures/inputs_three.yml",
532                "./tests/fixtures/missing.json",
533            ],
534            None,
535        )
536        .await
537        .unwrap_err();
538        assert_eq!(
539            error.to_string().replace("\\", "/"),
540            "input file `tests/fixtures/missing.json` was not found"
541        );
542    }
543
544    #[tokio::test]
545    async fn coalesce_special_characters() {
546        async fn check_can_coalesce_string(value: &str) {
547            let inputs = Inputs::coalesce([format!("input={}", value)], None)
548                .await
549                .unwrap();
550            let (_, input) = inputs.get("input").unwrap();
551            assert_eq!(input.as_str().unwrap(), value);
552        }
553        async fn check_cannot_coalesce_string(value: &str) {
554            let error = Inputs::coalesce([format!("input={}", value)], None)
555                .await
556                .unwrap_err();
557            assert!(matches!(
558                error,
559                Error::Deserialize(output) if output == value
560            ));
561        }
562
563        check_can_coalesce_string("can-coalesce-dashes").await;
564        check_can_coalesce_string("can\"coalesce\"quotes").await;
565        check_can_coalesce_string("can'coalesce'apostrophes").await;
566        check_can_coalesce_string("can;coalesce;semicolons").await;
567        check_can_coalesce_string("can:coalesce:colons").await;
568        check_can_coalesce_string("can*coalesce*stars").await;
569        check_can_coalesce_string("can,coalesce,commas").await;
570        check_can_coalesce_string("can?coalesce?question?mark").await;
571        check_can_coalesce_string("can|coalesce|pipe").await;
572        check_can_coalesce_string("can<coalesce>less<than>or>greater<than").await;
573        check_can_coalesce_string("can^coalesce^carrot").await;
574        check_can_coalesce_string("can#coalesce#pound#sign").await;
575        check_can_coalesce_string("can%coalesce%percent").await;
576        check_can_coalesce_string("can!coalesce!exclamation!marks").await;
577        check_can_coalesce_string("can\\coalesce\\backslashes").await;
578        check_can_coalesce_string("can@coalesce@at@sign").await;
579        check_can_coalesce_string("can(coalesce(parenthesis))").await;
580        check_can_coalesce_string("can coalesce السلام عليكم").await;
581        check_can_coalesce_string("can coalesce 你").await;
582        check_can_coalesce_string("can coalesce Dobrý den").await;
583        check_can_coalesce_string("can coalesce Hello").await;
584        check_can_coalesce_string("can coalesce שלום").await;
585        check_can_coalesce_string("can coalesce नमस्ते").await;
586        check_can_coalesce_string("can coalesce こんにちは").await;
587        check_can_coalesce_string("can coalesce 안녕하세요").await;
588        check_can_coalesce_string("can coalesce 你好").await;
589        check_can_coalesce_string("can coalesce Olá").await;
590        check_can_coalesce_string("can coalesce Здравствуйте").await;
591        check_can_coalesce_string("can coalesce Hola").await;
592        check_cannot_coalesce_string("cannot coalesce string with [").await;
593        check_cannot_coalesce_string("cannot coalesce string with ]").await;
594        check_cannot_coalesce_string("cannot coalesce string with {").await;
595        check_cannot_coalesce_string("cannot coalesce string with }").await;
596    }
597
598    #[test]
599    fn multiple_equal_signs() {
600        let (key, value) = r#"foo="bar=baz""#.parse::<Input>().unwrap().unwrap_pair();
601        assert_eq!(key, "foo");
602        assert_eq!(value.as_str().unwrap(), "bar=baz");
603    }
604}