wdl_engine/
path.rs

1//! Representation of evaluation paths that support URLs.
2
3use std::fmt;
4use std::path::Path;
5use std::path::PathBuf;
6use std::str::FromStr;
7
8use anyhow::Context;
9use anyhow::Result;
10use anyhow::anyhow;
11use anyhow::bail;
12use path_clean::clean;
13use url::Url;
14
15use crate::PrimitiveValue;
16
17/// Determines if the given string is prefixed with a `file` URL scheme.
18pub fn is_file_url(s: &str) -> bool {
19    s.get(0..7)
20        .map(|s| s.eq_ignore_ascii_case("file://"))
21        .unwrap_or(false)
22}
23
24/// Determines if the given string is prefixed with a supported URL scheme.
25pub fn is_url(s: &str) -> bool {
26    ["http://", "https://", "file://", "az://", "s3://", "gs://"]
27        .iter()
28        .any(|prefix| {
29            s.get(0..prefix.len())
30                .map(|s| s.eq_ignore_ascii_case(prefix))
31                .unwrap_or(false)
32        })
33}
34
35/// Parses a string into a URL.
36///
37/// Returns `None` if the string is not a supported scheme or not a valid URL.
38pub fn parse_url(s: &str) -> Option<Url> {
39    if !is_url(s) {
40        return None;
41    }
42
43    s.parse().ok()
44}
45
46/// Represents a path used in evaluation that may be either local or remote.
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub enum EvaluationPath {
49    /// The path is local (i.e. on the host).
50    Local(PathBuf),
51    /// The path is remote.
52    Remote(Url),
53}
54
55impl EvaluationPath {
56    /// Joins the given path to this path.
57    pub fn join(&self, path: &str) -> Result<Self> {
58        // URLs are absolute, so they can't be joined
59        if is_url(path) {
60            return path.parse();
61        }
62
63        // We can't join an absolute local path either
64        if Path::new(path).is_absolute() {
65            return Ok(Self::Local(clean(path)));
66        }
67
68        match self {
69            Self::Local(dir) => Ok(Self::Local(dir.join(clean(path)))),
70            Self::Remote(dir) => dir
71                .join(path)
72                .map(Self::Remote)
73                .with_context(|| format!("failed to join `{path}` to URL `{dir}`")),
74        }
75    }
76
77    /// Creates a path from a primitive `File` or `Directory` value.
78    pub fn from_primitive_value(v: &PrimitiveValue) -> Result<Self> {
79        match v {
80            PrimitiveValue::File(path) | PrimitiveValue::Directory(path) => path.parse(),
81            _ => bail!("primitive value must be a `File` or a `Directory`"),
82        }
83    }
84
85    /// Gets a string representation of the path.
86    ///
87    /// Returns `None` if the path is local and cannot be represented in UTF-8.
88    pub fn to_str(&self) -> Option<&str> {
89        match self {
90            Self::Local(path) => path.to_str(),
91            Self::Remote(url) => Some(url.as_str()),
92        }
93    }
94
95    /// Converts the path to a local path.
96    ///
97    /// Returns `None` if the path is remote.
98    pub fn as_local(&self) -> Option<&Path> {
99        match self {
100            Self::Local(path) => Some(path),
101            Self::Remote(_) => None,
102        }
103    }
104
105    /// Unwraps the path to a local path.
106    ///
107    /// # Panics
108    ///
109    /// Panics if the path is remote.
110    pub fn unwrap_local(self) -> PathBuf {
111        match self {
112            Self::Local(path) => path,
113            Self::Remote(_) => panic!("path is remote"),
114        }
115    }
116
117    /// Converts the path to a remote URL.
118    ///
119    /// Returns `None` if the path is local.
120    pub fn as_remote(&self) -> Option<&Url> {
121        match self {
122            Self::Local(_) => None,
123            Self::Remote(url) => Some(url),
124        }
125    }
126
127    /// Unwraps the path to a remote URL.
128    ///
129    /// # Panics
130    ///
131    /// Panics if the path is local.
132    pub fn unwrap_remote(self) -> Url {
133        match self {
134            Self::Local(_) => panic!("path is local"),
135            Self::Remote(url) => url,
136        }
137    }
138
139    /// Gets the file name of the path.
140    ///
141    /// Returns `Ok(None)` if the path does not contain a file name (i.e. is
142    /// root).
143    ///
144    /// Returns an error if the file name is not UTF-8.
145    pub fn file_name(&self) -> Result<Option<&str>> {
146        match self {
147            Self::Local(path) => path
148                .file_name()
149                .map(|n| {
150                    n.to_str().with_context(|| {
151                        format!("path `{path}` is not UTF-8", path = path.display())
152                    })
153                })
154                .transpose(),
155            Self::Remote(url) => Ok(url.path_segments().and_then(|mut s| s.next_back())),
156        }
157    }
158
159    /// Returns a display implementation for the path.
160    pub fn display(&self) -> impl fmt::Display {
161        struct Display<'a>(&'a EvaluationPath);
162
163        impl fmt::Display for Display<'_> {
164            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165                match self.0 {
166                    EvaluationPath::Local(path) => write!(f, "{path}", path = path.display()),
167                    EvaluationPath::Remote(url) => write!(f, "{url}"),
168                }
169            }
170        }
171
172        Display(self)
173    }
174}
175
176impl FromStr for EvaluationPath {
177    type Err = anyhow::Error;
178
179    fn from_str(s: &str) -> Result<Self, Self::Err> {
180        // Store `file` schemed URLs as local paths.
181        if is_file_url(s) {
182            let url = s
183                .parse::<Url>()
184                .with_context(|| format!("invalid `file` schemed URL `{s}`"))?;
185            return url
186                .to_file_path()
187                .map(|p| Self::Local(clean(p)))
188                .map_err(|_| anyhow!("URL `{s}` cannot be represented as a local file path"));
189        }
190
191        if let Some(url) = parse_url(s) {
192            return Ok(Self::Remote(url));
193        }
194
195        Ok(Self::Local(clean(s)))
196    }
197}
198
199impl TryFrom<EvaluationPath> for String {
200    type Error = anyhow::Error;
201
202    fn try_from(value: EvaluationPath) -> Result<Self, Self::Error> {
203        match value {
204            EvaluationPath::Local(path) => path
205                .into_os_string()
206                .into_string()
207                .map_err(|_| anyhow!("path cannot be represented as a UTF-8 string")),
208            EvaluationPath::Remote(url) => Ok(url.into()),
209        }
210    }
211}
212
213#[cfg(test)]
214mod test {
215    use pretty_assertions::assert_eq;
216
217    use super::*;
218
219    #[test]
220    fn test_file_urls() {
221        assert!(is_file_url("file:///foo/bar/baz"));
222        assert!(is_file_url("FiLe:///foo/bar/baz"));
223        assert!(is_file_url("FILE:///foo/bar/baz"));
224        assert!(!is_file_url("https://example.com/bar/baz"));
225        assert!(!is_file_url("az://foo/bar/baz"));
226    }
227
228    #[test]
229    fn test_urls() {
230        assert!(is_url("http://example.com/foo/bar/baz"));
231        assert!(is_url("HtTp://example.com/foo/bar/baz"));
232        assert!(is_url("HTTP://example.com/foo/bar/baz"));
233        assert!(is_url("https://example.com/foo/bar/baz"));
234        assert!(is_url("HtTpS://example.com/foo/bar/baz"));
235        assert!(is_url("HTTPS://example.com/foo/bar/baz"));
236        assert!(is_url("file:///foo/bar/baz"));
237        assert!(is_url("FiLe:///foo/bar/baz"));
238        assert!(is_url("FILE:///foo/bar/baz"));
239        assert!(is_url("az://foo/bar/baz"));
240        assert!(is_url("aZ://foo/bar/baz"));
241        assert!(is_url("AZ://foo/bar/baz"));
242        assert!(is_url("s3://foo/bar/baz"));
243        assert!(is_url("S3://foo/bar/baz"));
244        assert!(is_url("gs://foo/bar/baz"));
245        assert!(is_url("gS://foo/bar/baz"));
246        assert!(is_url("GS://foo/bar/baz"));
247        assert!(!is_url("foo://foo/bar/baz"));
248    }
249
250    #[test]
251    fn test_url_parsing() {
252        assert_eq!(
253            parse_url("http://example.com/foo/bar/baz")
254                .map(String::from)
255                .as_deref(),
256            Some("http://example.com/foo/bar/baz")
257        );
258        assert_eq!(
259            parse_url("https://example.com/foo/bar/baz")
260                .map(String::from)
261                .as_deref(),
262            Some("https://example.com/foo/bar/baz")
263        );
264        assert_eq!(
265            parse_url("file:///foo/bar/baz")
266                .map(String::from)
267                .as_deref(),
268            Some("file:///foo/bar/baz")
269        );
270        assert_eq!(
271            parse_url("az://foo/bar/baz").map(String::from).as_deref(),
272            Some("az://foo/bar/baz")
273        );
274        assert_eq!(
275            parse_url("s3://foo/bar/baz").map(String::from).as_deref(),
276            Some("s3://foo/bar/baz")
277        );
278        assert_eq!(
279            parse_url("gs://foo/bar/baz").map(String::from).as_deref(),
280            Some("gs://foo/bar/baz")
281        );
282        assert_eq!(
283            parse_url("foo://foo/bar/baz").map(String::from).as_deref(),
284            None
285        );
286    }
287
288    #[test]
289    fn test_evaluation_path_parsing() {
290        let p: EvaluationPath = "/foo/bar/baz".parse().expect("should parse");
291        assert_eq!(
292            p.unwrap_local().to_str().unwrap().replace("\\", "/"),
293            "/foo/bar/baz"
294        );
295
296        let p: EvaluationPath = "foo".parse().expect("should parse");
297        assert_eq!(p.unwrap_local().as_os_str(), "foo");
298
299        #[cfg(unix)]
300        {
301            let p: EvaluationPath = "file:///foo/bar/baz".parse().expect("should parse");
302            assert_eq!(p.unwrap_local().as_os_str(), "/foo/bar/baz");
303        }
304
305        #[cfg(windows)]
306        {
307            let p: EvaluationPath = "file:///C:/foo/bar/baz".parse().expect("should parse");
308            assert_eq!(p.unwrap_local().as_os_str(), "C:\\foo\\bar\\baz");
309        }
310
311        let p: EvaluationPath = "https://example.com/foo/bar/baz"
312            .parse()
313            .expect("should parse");
314        assert_eq!(
315            p.unwrap_remote().as_str(),
316            "https://example.com/foo/bar/baz"
317        );
318
319        let p: EvaluationPath = "az://foo/bar/baz".parse().expect("should parse");
320        assert_eq!(p.unwrap_remote().as_str(), "az://foo/bar/baz");
321
322        let p: EvaluationPath = "s3://foo/bar/baz".parse().expect("should parse");
323        assert_eq!(p.unwrap_remote().as_str(), "s3://foo/bar/baz");
324
325        let p: EvaluationPath = "gs://foo/bar/baz".parse().expect("should parse");
326        assert_eq!(p.unwrap_remote().as_str(), "gs://foo/bar/baz");
327    }
328
329    #[test]
330    fn test_evaluation_path_join() {
331        let p: EvaluationPath = "/foo/bar/baz".parse().expect("should parse");
332        assert_eq!(
333            p.join("qux/../quux")
334                .expect("should join")
335                .unwrap_local()
336                .to_str()
337                .unwrap()
338                .replace("\\", "/"),
339            "/foo/bar/baz/quux"
340        );
341
342        let p: EvaluationPath = "foo".parse().expect("should parse");
343        assert_eq!(
344            p.join("qux/../quux")
345                .expect("should join")
346                .unwrap_local()
347                .to_str()
348                .unwrap()
349                .replace("\\", "/"),
350            "foo/quux"
351        );
352
353        #[cfg(unix)]
354        {
355            let p: EvaluationPath = "file:///foo/bar/baz".parse().expect("should parse");
356            assert_eq!(
357                p.join("qux/../quux")
358                    .expect("should join")
359                    .unwrap_local()
360                    .as_os_str(),
361                "/foo/bar/baz/quux"
362            );
363        }
364
365        #[cfg(windows)]
366        {
367            let p: EvaluationPath = "file:///C:/foo/bar/baz".parse().expect("should parse");
368            assert_eq!(
369                p.join("qux/../quux")
370                    .expect("should join")
371                    .unwrap_local()
372                    .as_os_str(),
373                "C:\\foo\\bar\\baz\\quux"
374            );
375        }
376
377        let p: EvaluationPath = "https://example.com/foo/bar/baz"
378            .parse()
379            .expect("should parse");
380        assert_eq!(
381            p.join("qux/../quux")
382                .expect("should join")
383                .unwrap_remote()
384                .as_str(),
385            "https://example.com/foo/bar/quux"
386        );
387
388        let p: EvaluationPath = "https://example.com/foo/bar/baz/"
389            .parse()
390            .expect("should parse");
391        assert_eq!(
392            p.join("qux/../quux")
393                .expect("should join")
394                .unwrap_remote()
395                .as_str(),
396            "https://example.com/foo/bar/baz/quux"
397        );
398
399        let p: EvaluationPath = "az://foo/bar/baz/".parse().expect("should parse");
400        assert_eq!(
401            p.join("qux/../quux")
402                .expect("should join")
403                .unwrap_remote()
404                .as_str(),
405            "az://foo/bar/baz/quux"
406        );
407
408        let p: EvaluationPath = "s3://foo/bar/baz/".parse().expect("should parse");
409        assert_eq!(
410            p.join("qux/../quux")
411                .expect("should join")
412                .unwrap_remote()
413                .as_str(),
414            "s3://foo/bar/baz/quux"
415        );
416
417        let p: EvaluationPath = "gs://foo/bar/baz/".parse().expect("should parse");
418        assert_eq!(
419            p.join("qux/../quux")
420                .expect("should join")
421                .unwrap_remote()
422                .as_str(),
423            "gs://foo/bar/baz/quux"
424        );
425    }
426}