wdl_engine/
path.rs

1//! Representation of evaluation paths that support URLs.
2
3use std::fmt;
4use std::path::Path;
5use std::path::PathBuf;
6use std::str::FromStr;
7
8use anyhow::Context;
9use anyhow::Result;
10use anyhow::anyhow;
11use anyhow::bail;
12use path_clean::PathClean;
13use url::Url;
14
15use crate::ContentKind;
16use crate::digest::Digest;
17use crate::digest::calculate_local_digest;
18use crate::digest::calculate_remote_digest;
19use crate::http::Transferer;
20
21/// The URL schemes supported by this crate.
22const SUPPORTED_SCHEMES: &[&str] = &["http", "https", "file", "az", "s3", "gs"];
23
24/// Determines if the given string is prefixed with a `file` URL scheme.
25pub fn is_file_url(s: &str) -> bool {
26    s.parse::<Url>()
27        .ok()
28        .map(|url| url.scheme() == "file")
29        .unwrap_or(false)
30}
31
32/// Determines if the given string is prefixed with a supported URL scheme.
33pub fn is_supported_url(s: &str) -> bool {
34    s.parse::<Url>()
35        .ok()
36        .map(|url| has_supported_scheme(&url))
37        .unwrap_or(false)
38}
39
40/// Parses a string into a URL.
41///
42/// Returns `None` if the string is not a supported scheme or not a valid URL.
43pub fn parse_supported_url(s: &str) -> Option<Url> {
44    match s.parse() {
45        Ok(url) if has_supported_scheme(&url) => Some(url),
46        _ => None,
47    }
48}
49
50/// Returns `true` if the given URL has a scheme supported by this crate.
51pub fn has_supported_scheme(url: &Url) -> bool {
52    SUPPORTED_SCHEMES.contains(&url.scheme())
53}
54
55/// Represents a path used in evaluation that may be either local or remote.
56#[derive(Debug, Clone, PartialEq, Eq, Hash)]
57pub enum EvaluationPath {
58    /// The path is local (i.e. on the host).
59    Local(PathBuf),
60    /// The path is remote.
61    Remote(Url),
62}
63
64impl EvaluationPath {
65    /// Joins the given path to this path.
66    pub fn join(&self, path: &str) -> Result<Self> {
67        // URLs are absolute, so they can't be joined
68        if is_supported_url(path) {
69            return path.parse();
70        }
71
72        // We can't join an absolute local path either
73        let p = Path::new(path);
74        if p.is_absolute() {
75            return Ok(Self::Local(p.clean()));
76        }
77
78        match self {
79            Self::Local(dir) => Ok(Self::Local(dir.join(path).clean())),
80            Self::Remote(dir) => dir
81                .join(path)
82                .map(Self::Remote)
83                .with_context(|| format!("failed to join `{path}` to URL `{dir}`")),
84        }
85    }
86
87    /// Converts the path to a local path.
88    ///
89    /// Returns `None` if the path is remote.
90    pub fn as_local(&self) -> Option<&Path> {
91        match self {
92            Self::Local(path) => Some(path),
93            Self::Remote(_) => None,
94        }
95    }
96
97    /// Unwraps the path to a local path.
98    ///
99    /// # Panics
100    ///
101    /// Panics if the path is remote.
102    pub fn unwrap_local(self) -> PathBuf {
103        match self {
104            Self::Local(path) => path,
105            Self::Remote(_) => panic!("path is remote"),
106        }
107    }
108
109    /// Converts the path to a remote URL.
110    ///
111    /// Returns `None` if the path is local.
112    pub fn as_remote(&self) -> Option<&Url> {
113        match self {
114            Self::Local(_) => None,
115            Self::Remote(url) => Some(url),
116        }
117    }
118
119    /// Unwraps the path to a remote URL.
120    ///
121    /// # Panics
122    ///
123    /// Panics if the path is local.
124    pub fn unwrap_remote(self) -> Url {
125        match self {
126            Self::Local(_) => panic!("path is local"),
127            Self::Remote(url) => url,
128        }
129    }
130
131    /// Gets the parent of the given path.
132    ///
133    /// Returns `None` if the evaluation path isn't valid or has no parent.
134    pub fn parent_of(path: &str) -> Option<EvaluationPath> {
135        let path = path.parse().ok()?;
136        match path {
137            Self::Local(path) => path.parent().map(|p| Self::Local(p.to_path_buf())),
138            Self::Remote(mut url) => {
139                if url.path() == "/" {
140                    return None;
141                }
142
143                if let Ok(mut segments) = url.path_segments_mut() {
144                    segments.pop_if_empty().pop();
145                }
146
147                Some(Self::Remote(url))
148            }
149        }
150    }
151
152    /// Gets the file name of the path.
153    ///
154    /// Returns `Ok(None)` if the path does not contain a file name (i.e. is
155    /// root).
156    ///
157    /// Returns an error if the file name is not UTF-8.
158    pub fn file_name(&self) -> Result<Option<&str>> {
159        match self {
160            Self::Local(path) => path
161                .file_name()
162                .map(|n| {
163                    n.to_str().with_context(|| {
164                        format!("path `{path}` is not UTF-8", path = path.display())
165                    })
166                })
167                .transpose(),
168            Self::Remote(url) => Ok(url.path_segments().and_then(|mut s| s.next_back())),
169        }
170    }
171
172    /// Returns a display implementation for the path.
173    pub fn display(&self) -> impl fmt::Display {
174        struct Display<'a>(&'a EvaluationPath);
175
176        impl fmt::Display for Display<'_> {
177            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
178                match self.0 {
179                    EvaluationPath::Local(path) => write!(f, "{path}", path = path.display()),
180                    EvaluationPath::Remote(url) => write!(f, "{url}"),
181                }
182            }
183        }
184
185        Display(self)
186    }
187
188    /// Calculates the content digest of the evaluation path.
189    pub async fn calculate_digest(
190        &self,
191        transferer: &dyn Transferer,
192        kind: ContentKind,
193    ) -> Result<Digest> {
194        match self {
195            Self::Local(path) => calculate_local_digest(path, kind).await,
196            Self::Remote(url) => calculate_remote_digest(transferer, url, kind).await,
197        }
198    }
199}
200
201impl FromStr for EvaluationPath {
202    type Err = anyhow::Error;
203
204    fn from_str(s: &str) -> Result<Self> {
205        // Store `file` schemed URLs as local paths.
206        if is_file_url(s) {
207            let url = s
208                .parse::<Url>()
209                .with_context(|| format!("invalid `file` schemed URL `{s}`"))?;
210            return url
211                .to_file_path()
212                .map(|p| Self::Local(p.clean()))
213                .map_err(|_| anyhow!("URL `{s}` cannot be represented as a local file path"));
214        }
215
216        if let Some(url) = parse_supported_url(s) {
217            return Ok(Self::Remote(url));
218        }
219
220        Ok(Self::Local(Path::new(s).clean()))
221    }
222}
223
224impl TryFrom<&str> for EvaluationPath {
225    type Error = anyhow::Error;
226
227    fn try_from(value: &str) -> Result<Self> {
228        value.parse()
229    }
230}
231
232impl TryFrom<EvaluationPath> for String {
233    type Error = anyhow::Error;
234
235    fn try_from(path: EvaluationPath) -> Result<Self> {
236        match path {
237            EvaluationPath::Local(path) => match path.into_os_string().into_string() {
238                Ok(s) => Ok(s),
239                Err(path) => bail!(
240                    "path `{path}` cannot be represented with UTF-8",
241                    path = path.display()
242                ),
243            },
244            EvaluationPath::Remote(url) => Ok(url.into()),
245        }
246    }
247}
248
249impl fmt::Display for EvaluationPath {
250    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
251        match self {
252            Self::Local(path) => path.display().fmt(f),
253            Self::Remote(url) => url.fmt(f),
254        }
255    }
256}
257
258#[cfg(test)]
259mod test {
260    use pretty_assertions::assert_eq;
261
262    use super::*;
263
264    #[test]
265    fn test_file_urls() {
266        assert!(is_file_url("file:///foo/bar/baz"));
267        assert!(is_file_url("FiLe:///foo/bar/baz"));
268        assert!(is_file_url("FILE:///foo/bar/baz"));
269        assert!(!is_file_url("https://example.com/bar/baz"));
270        assert!(!is_file_url("az://foo/bar/baz"));
271    }
272
273    #[test]
274    fn test_urls() {
275        assert!(is_supported_url("http://example.com/foo/bar/baz"));
276        assert!(is_supported_url("HtTp://example.com/foo/bar/baz"));
277        assert!(is_supported_url("HTTP://example.com/foo/bar/baz"));
278        assert!(is_supported_url("https://example.com/foo/bar/baz"));
279        assert!(is_supported_url("HtTpS://example.com/foo/bar/baz"));
280        assert!(is_supported_url("HTTPS://example.com/foo/bar/baz"));
281        assert!(is_supported_url("file:///foo/bar/baz"));
282        assert!(is_supported_url("FiLe:///foo/bar/baz"));
283        assert!(is_supported_url("FILE:///foo/bar/baz"));
284        assert!(is_supported_url("az://foo/bar/baz"));
285        assert!(is_supported_url("aZ://foo/bar/baz"));
286        assert!(is_supported_url("AZ://foo/bar/baz"));
287        assert!(is_supported_url("s3://foo/bar/baz"));
288        assert!(is_supported_url("S3://foo/bar/baz"));
289        assert!(is_supported_url("gs://foo/bar/baz"));
290        assert!(is_supported_url("gS://foo/bar/baz"));
291        assert!(is_supported_url("GS://foo/bar/baz"));
292        assert!(!is_supported_url("foo://foo/bar/baz"));
293    }
294
295    #[test]
296    fn test_url_parsing() {
297        assert_eq!(
298            parse_supported_url("http://example.com/foo/bar/baz")
299                .map(String::from)
300                .as_deref(),
301            Some("http://example.com/foo/bar/baz")
302        );
303        assert_eq!(
304            parse_supported_url("https://example.com/foo/bar/baz")
305                .map(String::from)
306                .as_deref(),
307            Some("https://example.com/foo/bar/baz")
308        );
309        assert_eq!(
310            parse_supported_url("file:///foo/bar/baz")
311                .map(String::from)
312                .as_deref(),
313            Some("file:///foo/bar/baz")
314        );
315        assert_eq!(
316            parse_supported_url("az://foo/bar/baz")
317                .map(String::from)
318                .as_deref(),
319            Some("az://foo/bar/baz")
320        );
321        assert_eq!(
322            parse_supported_url("s3://foo/bar/baz")
323                .map(String::from)
324                .as_deref(),
325            Some("s3://foo/bar/baz")
326        );
327        assert_eq!(
328            parse_supported_url("gs://foo/bar/baz")
329                .map(String::from)
330                .as_deref(),
331            Some("gs://foo/bar/baz")
332        );
333        assert_eq!(
334            parse_supported_url("foo://foo/bar/baz")
335                .map(String::from)
336                .as_deref(),
337            None
338        );
339    }
340
341    #[test]
342    fn test_evaluation_path_parsing() {
343        let p: EvaluationPath = "/foo/bar/baz".parse().expect("should parse");
344        assert_eq!(
345            p.unwrap_local().to_str().unwrap().replace("\\", "/"),
346            "/foo/bar/baz"
347        );
348
349        let p: EvaluationPath = "foo".parse().expect("should parse");
350        assert_eq!(p.unwrap_local().as_os_str(), "foo");
351
352        #[cfg(unix)]
353        {
354            let p: EvaluationPath = "file:///foo/bar/baz".parse().expect("should parse");
355            assert_eq!(p.unwrap_local().as_os_str(), "/foo/bar/baz");
356        }
357
358        #[cfg(windows)]
359        {
360            let p: EvaluationPath = "file:///C:/foo/bar/baz".parse().expect("should parse");
361            assert_eq!(p.unwrap_local().as_os_str(), "C:\\foo\\bar\\baz");
362        }
363
364        let p: EvaluationPath = "https://example.com/foo/bar/baz"
365            .parse()
366            .expect("should parse");
367        assert_eq!(
368            p.unwrap_remote().as_str(),
369            "https://example.com/foo/bar/baz"
370        );
371
372        let p: EvaluationPath = "az://foo/bar/baz".parse().expect("should parse");
373        assert_eq!(p.unwrap_remote().as_str(), "az://foo/bar/baz");
374
375        let p: EvaluationPath = "s3://foo/bar/baz".parse().expect("should parse");
376        assert_eq!(p.unwrap_remote().as_str(), "s3://foo/bar/baz");
377
378        let p: EvaluationPath = "gs://foo/bar/baz".parse().expect("should parse");
379        assert_eq!(p.unwrap_remote().as_str(), "gs://foo/bar/baz");
380    }
381
382    #[test]
383    fn test_evaluation_path_join() {
384        let p: EvaluationPath = "/foo/bar/baz".parse().expect("should parse");
385        assert_eq!(
386            p.join("qux/../quux")
387                .expect("should join")
388                .unwrap_local()
389                .to_str()
390                .unwrap()
391                .replace("\\", "/"),
392            "/foo/bar/baz/quux"
393        );
394
395        let p: EvaluationPath = "foo".parse().expect("should parse");
396        assert_eq!(
397            p.join("qux/../quux")
398                .expect("should join")
399                .unwrap_local()
400                .to_str()
401                .unwrap()
402                .replace("\\", "/"),
403            "foo/quux"
404        );
405
406        #[cfg(unix)]
407        {
408            let p: EvaluationPath = "file:///foo/bar/baz".parse().expect("should parse");
409            assert_eq!(
410                p.join("qux/../quux")
411                    .expect("should join")
412                    .unwrap_local()
413                    .as_os_str(),
414                "/foo/bar/baz/quux"
415            );
416        }
417
418        #[cfg(windows)]
419        {
420            let p: EvaluationPath = "file:///C:/foo/bar/baz".parse().expect("should parse");
421            assert_eq!(
422                p.join("qux/../quux")
423                    .expect("should join")
424                    .unwrap_local()
425                    .as_os_str(),
426                "C:\\foo\\bar\\baz\\quux"
427            );
428        }
429
430        let p: EvaluationPath = "https://example.com/foo/bar/baz"
431            .parse()
432            .expect("should parse");
433        assert_eq!(
434            p.join("qux/../quux")
435                .expect("should join")
436                .unwrap_remote()
437                .as_str(),
438            "https://example.com/foo/bar/quux"
439        );
440
441        let p: EvaluationPath = "https://example.com/foo/bar/baz/"
442            .parse()
443            .expect("should parse");
444        assert_eq!(
445            p.join("qux/../quux")
446                .expect("should join")
447                .unwrap_remote()
448                .as_str(),
449            "https://example.com/foo/bar/baz/quux"
450        );
451
452        let p: EvaluationPath = "az://foo/bar/baz/".parse().expect("should parse");
453        assert_eq!(
454            p.join("qux/../quux")
455                .expect("should join")
456                .unwrap_remote()
457                .as_str(),
458            "az://foo/bar/baz/quux"
459        );
460
461        let p: EvaluationPath = "s3://foo/bar/baz/".parse().expect("should parse");
462        assert_eq!(
463            p.join("qux/../quux")
464                .expect("should join")
465                .unwrap_remote()
466                .as_str(),
467            "s3://foo/bar/baz/quux"
468        );
469
470        let p: EvaluationPath = "gs://foo/bar/baz/".parse().expect("should parse");
471        assert_eq!(
472            p.join("qux/../quux")
473                .expect("should join")
474                .unwrap_remote()
475                .as_str(),
476            "gs://foo/bar/baz/quux"
477        );
478    }
479}