wdl_cli/inputs/
file.rs

1//! Input files parsed in from the command line.
2
3use std::ffi::OsStr;
4use std::path::PathBuf;
5use std::path::absolute;
6
7use serde_json::Value as JsonValue;
8use serde_yaml_ng::Value as YamlValue;
9use thiserror::Error;
10use wdl_engine::JsonMap;
11use wdl_engine::path::EvaluationPath;
12
13use crate::Inputs;
14
15/// An error related to a input file.
16#[derive(Error, Debug)]
17pub enum Error {
18    /// An input file specified by local path was not found.
19    #[error("input file `{0}` was not found")]
20    NotFound(PathBuf),
21
22    /// An error occurred parsing an input file path.
23    #[error("input file path `{path}` is invalid: {error:#}")]
24    Path {
25        /// The path to the inputs file.
26        path: String,
27        /// The error parsing the path.
28        error: anyhow::Error,
29    },
30
31    /// An error occurring in [`serde_json`].
32    #[error(transparent)]
33    Json(#[from] serde_json::Error),
34
35    /// An input file cannot be read from a directory.
36    #[error("an input file cannot be read from directory `{0}`")]
37    InvalidDir(PathBuf),
38
39    /// The input file did not contain a map at the root.
40    #[error("input file `{path}` did not contain a map from strings to values at the root", path = .0.display())]
41    NonMapRoot(EvaluationPath),
42
43    /// Failed to read the contents of an input file due to I/O error.
44    #[error("failed to read input file `{path}`: {error:#}", path = .path.display())]
45    Io {
46        /// The path to the inputs file.
47        path: EvaluationPath,
48        /// The I/O error that occurred.
49        error: std::io::Error,
50    },
51
52    /// Failed to read the contents of an input file due to reqwest error.
53    #[error("failed to read input file `{path}`: {error:#}", path = .path.display())]
54    Reqwest {
55        /// The path to the inputs file.
56        path: EvaluationPath,
57        /// The reqwest error that occurred.
58        error: reqwest::Error,
59    },
60
61    /// Neither JSON nor YAML could be parsed from the provided path.
62    #[error(
63        "unsupported input file `{path}`: the supported formats are JSON (`.json`) or YAML (`.yaml` and `.yml`)", path = .0.display()
64    )]
65    UnsupportedFileExt(EvaluationPath),
66
67    /// An error occurring in [`serde_yaml_ng`].
68    #[error(transparent)]
69    Yaml(#[from] serde_yaml_ng::Error),
70}
71
72/// A [`Result`](std::result::Result) with an [`Error`](enum@self::Error).
73pub type Result<T> = std::result::Result<T, Error>;
74
75/// An input file containing WDL values.
76pub struct InputFile;
77
78impl InputFile {
79    /// Reads an input file.
80    ///
81    /// The file is attempted to be parsed based on its extension.
82    ///
83    /// - If the input file is successfully parsed, it's returned wrapped in
84    ///   [`Ok`].
85    /// - If a deserialization error is encountered while parsing the JSON/YAML
86    ///   file, an [`Error::Json`]/[`Error::Yaml`] is returned respectively.
87    /// - If no recognized extension is found, an [`Error::UnsupportedFileExt`]
88    ///   is returned.
89    pub async fn read(path: &EvaluationPath) -> Result<Inputs> {
90        fn map_to_inputs(map: JsonMap, origin: &EvaluationPath) -> Inputs {
91            let mut inputs = Inputs::default();
92
93            for (key, value) in map.iter() {
94                inputs.insert(key.to_owned(), (origin.clone(), value.clone()));
95            }
96
97            inputs
98        }
99
100        if let Some(path) = path.as_local()
101            && path.is_dir()
102        {
103            return Err(Error::InvalidDir(path.to_path_buf()));
104        }
105
106        /// Supported inputs file formats
107        enum Format {
108            /// The inputs file is a JSON file
109            Json,
110            /// The inputs file is a YAML file
111            Yaml,
112        }
113
114        let (content, origin, format) = match path {
115            EvaluationPath::Local(local) => {
116                let format = match local.extension().and_then(OsStr::to_str) {
117                    Some("json") => Format::Json,
118                    Some("yml") | Some("yaml") => Format::Yaml,
119                    _ => return Err(Error::UnsupportedFileExt(path.clone())),
120                };
121
122                let absolute = absolute(local).map_err(|e| Error::Io {
123                    path: path.clone(),
124                    error: e,
125                })?;
126                let origin = if let Some(parent) = absolute.parent() {
127                    parent.to_path_buf()
128                } else {
129                    absolute
130                };
131
132                // Read the contents from the local file
133                let contents = std::fs::read_to_string(local).map_err(|e| Error::Io {
134                    path: path.clone(),
135                    error: e,
136                })?;
137
138                (contents, EvaluationPath::Local(origin), format)
139            }
140            EvaluationPath::Remote(url) => {
141                let map_err = |e| Error::Reqwest {
142                    path: path.clone(),
143                    error: e,
144                };
145
146                let format = if url.path().ends_with(".json") {
147                    Format::Json
148                } else if url.path().ends_with(".yml") || url.path().ends_with(".yaml") {
149                    Format::Yaml
150                } else {
151                    return Err(Error::UnsupportedFileExt(path.clone()));
152                };
153
154                // SAFETY: a parsed evaluation path always has a base, so `path_segments_mut`
155                // will never return an error; additionally, we must pop off a trailing `/` in
156                // the URL along with the "file name" part of the URL to get the parent; the
157                // final `push("")` call puts an empty segment on the URL so that any future
158                // `join` operation on the origin URL will treat it as a "directory".
159                let mut origin = url.clone();
160                origin
161                    .path_segments_mut()
162                    .unwrap()
163                    .pop_if_empty()
164                    .pop()
165                    .push("");
166
167                // Read the contents from the URL
168                let contents = reqwest::get(url.clone())
169                    .await
170                    .map_err(map_err)?
171                    .error_for_status()
172                    .map_err(map_err)?
173                    .text()
174                    .await
175                    .map_err(map_err)?;
176
177                (contents, EvaluationPath::Remote(origin), format)
178            }
179        };
180
181        match format {
182            Format::Json => serde_json::from_str::<JsonValue>(&content)
183                .map_err(Error::from)
184                .and_then(|value| match value {
185                    JsonValue::Object(object) => Ok(map_to_inputs(object, &origin)),
186                    _ => Err(Error::NonMapRoot(path.clone())),
187                }),
188            Format::Yaml => serde_yaml_ng::from_str::<YamlValue>(&content)
189                .map_err(Error::from)
190                .and_then(|value| match &value {
191                    YamlValue::Mapping(_) => {
192                        // SAFETY: a YAML mapping should always be able to be
193                        // transformed to a JSON value.
194                        let value = serde_json::to_value(value).unwrap();
195                        if let JsonValue::Object(map) = value {
196                            return Ok(map_to_inputs(map, &origin));
197                        }
198
199                        // SAFETY: a serde map will always be translated to a
200                        // [`YamlValue::Mapping`] and a [`JsonValue::Object`],
201                        // so the above `if` statement should always evaluate to
202                        // `true`.
203                        unreachable!(
204                            "a YAML mapping must always coerce to a JSON object, found `{value}`"
205                        )
206                    }
207                    _ => Err(Error::NonMapRoot(path.clone())),
208                }),
209        }
210    }
211}
212
213#[cfg(test)]
214mod tests {
215    use std::path::Path;
216
217    use pretty_assertions::assert_eq;
218
219    use super::*;
220
221    #[tokio::test]
222    async fn nonmap_root() {
223        // A JSON file that does not have a map at the root.
224        let err = InputFile::read(&"./tests/fixtures/nonmap_inputs.json".parse().unwrap())
225            .await
226            .unwrap_err();
227        assert_eq!(
228            err.to_string().replace("\\", "/"),
229            "input file `tests/fixtures/nonmap_inputs.json` did not contain a map from strings to \
230             values at the root"
231        );
232
233        // A YML file that does not have a map at the root.
234        let err = InputFile::read(&"./tests/fixtures/nonmap_inputs.yml".parse().unwrap())
235            .await
236            .unwrap_err();
237        assert_eq!(
238            err.to_string().replace("\\", "/"),
239            "input file `tests/fixtures/nonmap_inputs.yml` did not contain a map from strings to \
240             values at the root"
241        );
242    }
243
244    #[tokio::test]
245    async fn missing_ext() {
246        let err = InputFile::read(&"./tests/fixtures/missing_ext".parse().unwrap())
247            .await
248            .unwrap_err();
249        assert_eq!(
250            err.to_string().replace("\\", "/"),
251            "unsupported input file `tests/fixtures/missing_ext`: the supported formats are JSON \
252             (`.json`) or YAML (`.yaml` and `.yml`)"
253        );
254
255        let err = InputFile::read(&"http://example.com".parse().unwrap())
256            .await
257            .unwrap_err();
258        assert_eq!(
259            err.to_string(),
260            "unsupported input file `http://example.com/`: the supported formats are JSON \
261             (`.json`) or YAML (`.yaml` and `.yml`)"
262        );
263    }
264
265    #[tokio::test]
266    async fn read_local() {
267        let inputs = InputFile::read(&"./tests/fixtures/inputs_one.json".parse().unwrap())
268            .await
269            .unwrap();
270
271        let inner = inputs.into_inner();
272        assert_eq!(inner.len(), 3);
273
274        let expected_origin = absolute(Path::new("tests/fixtures")).unwrap();
275        let expected_origin = expected_origin.to_str().unwrap();
276
277        let (origin, value) = &inner["foo"];
278        assert_eq!(origin.to_str().unwrap(), expected_origin);
279        assert_eq!(value.as_str().unwrap(), "bar");
280
281        let (origin, value) = &inner["baz"];
282        assert_eq!(origin.to_str().unwrap(), expected_origin);
283        assert_eq!(value.as_number().unwrap().as_f64().unwrap() as u64, 42);
284
285        let (origin, value) = &inner["quux"];
286        assert_eq!(origin.to_str().unwrap(), expected_origin);
287        assert_eq!(value.as_str().unwrap(), "qil");
288    }
289
290    #[tokio::test]
291    async fn read_remote() {
292        // The URL is a gist of `fixtures/inputs_one.json`
293        // Create a new gist and substitute it here if the file contents need to change
294        let inputs = InputFile::read(&"https://gist.githubusercontent.com/peterhuene/9990b86bf0c419e144326b0276bf6f14/raw/d4116ef8888ccd78e2967d7ad32e1aeb3e4ab734/inputs.json".parse().unwrap())
295            .await
296            .unwrap();
297
298        let inner = inputs.into_inner();
299        assert_eq!(inner.len(), 3);
300
301        let expected_origin = "https://gist.githubusercontent.com/peterhuene/9990b86bf0c419e144326b0276bf6f14/raw/d4116ef8888ccd78e2967d7ad32e1aeb3e4ab734/";
302
303        let (origin, value) = &inner["foo"];
304        assert_eq!(origin.to_str().unwrap(), expected_origin);
305        assert_eq!(value.as_str().unwrap(), "bar");
306
307        let (origin, value) = &inner["baz"];
308        assert_eq!(origin.to_str().unwrap(), expected_origin);
309        assert_eq!(value.as_number().unwrap().as_f64().unwrap() as u64, 42);
310
311        let (origin, value) = &inner["quux"];
312        assert_eq!(origin.to_str().unwrap(), expected_origin);
313        assert_eq!(value.as_str().unwrap(), "qil");
314    }
315
316    #[tokio::test]
317    async fn read_remote_missing() {
318        let err = InputFile::read(&"https://example.com/not-a-file.json".parse().unwrap())
319            .await
320            .unwrap_err();
321        assert_eq!(
322            err.to_string().replace("\\", "/"),
323            "failed to read input file `https://example.com/not-a-file.json`: HTTP status client error (404 Not Found) for url (https://example.com/not-a-file.json)"
324        );
325    }
326}