wdl_engine/
path.rs

1//! Representation of evaluation paths that support URLs.
2
3use std::fmt;
4use std::path::Path;
5use std::path::PathBuf;
6use std::path::absolute;
7use std::str::FromStr;
8
9use anyhow::Context;
10use anyhow::Result;
11use anyhow::anyhow;
12use path_clean::PathClean;
13use url::Url;
14
15/// Determines if the given string is prefixed with a `file` URL scheme.
16pub fn is_file_url(s: &str) -> bool {
17    s.get(0..7)
18        .map(|s| s.eq_ignore_ascii_case("file://"))
19        .unwrap_or(false)
20}
21
22/// Determines if the given string is prefixed with a supported URL scheme.
23pub fn is_url(s: &str) -> bool {
24    ["http://", "https://", "file://", "az://", "s3://", "gs://"]
25        .iter()
26        .any(|prefix| {
27            s.get(0..prefix.len())
28                .map(|s| s.eq_ignore_ascii_case(prefix))
29                .unwrap_or(false)
30        })
31}
32
33/// Parses a string into a URL.
34///
35/// Returns `None` if the string is not a supported scheme or not a valid URL.
36pub fn parse_url(s: &str) -> Option<Url> {
37    if !is_url(s) {
38        return None;
39    }
40
41    s.parse().ok()
42}
43
44/// Represents a path used in evaluation that may be either local or remote.
45#[derive(Debug, Clone, PartialEq, Eq)]
46pub enum EvaluationPath {
47    /// The path is local (i.e. on the host).
48    Local(PathBuf),
49    /// The path is remote.
50    Remote(Url),
51}
52
53impl EvaluationPath {
54    /// Joins the given path to this path.
55    pub fn join(&self, path: &str) -> Result<Self> {
56        // URLs are absolute, so they can't be joined
57        if is_url(path) {
58            return path.parse();
59        }
60
61        // We can't join an absolute local path either
62        let p = Path::new(path);
63        if p.is_absolute() {
64            return Ok(Self::Local(p.clean()));
65        }
66
67        match self {
68            Self::Local(dir) => Ok(Self::Local(dir.join(path).clean())),
69            Self::Remote(dir) => dir
70                .join(path)
71                .map(Self::Remote)
72                .with_context(|| format!("failed to join `{path}` to URL `{dir}`")),
73        }
74    }
75
76    /// Gets a string representation of the path.
77    ///
78    /// Returns `None` if the path is local and cannot be represented in UTF-8.
79    pub fn to_str(&self) -> Option<&str> {
80        match self {
81            Self::Local(path) => path.to_str(),
82            Self::Remote(url) => Some(url.as_str()),
83        }
84    }
85
86    /// Converts the path to a local path.
87    ///
88    /// Returns `None` if the path is remote.
89    pub fn as_local(&self) -> Option<&Path> {
90        match self {
91            Self::Local(path) => Some(path),
92            Self::Remote(_) => None,
93        }
94    }
95
96    /// Unwraps the path to a local path.
97    ///
98    /// # Panics
99    ///
100    /// Panics if the path is remote.
101    pub fn unwrap_local(self) -> PathBuf {
102        match self {
103            Self::Local(path) => path,
104            Self::Remote(_) => panic!("path is remote"),
105        }
106    }
107
108    /// Converts the path to a remote URL.
109    ///
110    /// Returns `None` if the path is local.
111    pub fn as_remote(&self) -> Option<&Url> {
112        match self {
113            Self::Local(_) => None,
114            Self::Remote(url) => Some(url),
115        }
116    }
117
118    /// Unwraps the path to a remote URL.
119    ///
120    /// # Panics
121    ///
122    /// Panics if the path is local.
123    pub fn unwrap_remote(self) -> Url {
124        match self {
125            Self::Local(_) => panic!("path is local"),
126            Self::Remote(url) => url,
127        }
128    }
129
130    /// Consumes the path and returns its string representation
131    ///
132    /// Returns `None` if the path is local and cannot be represented in UTF-8.
133    pub fn into_string(self) -> Option<String> {
134        match self {
135            Self::Local(path) => path.into_os_string().into_string().ok(),
136            Self::Remote(url) => Some(url.into()),
137        }
138    }
139
140    /// Gets the parent of the given path.
141    ///
142    /// Returns `None` if the evaluation path isn't valid or has no parent.
143    pub fn parent_of(path: &str) -> Option<EvaluationPath> {
144        let path = path.parse().ok()?;
145        match path {
146            Self::Local(path) => path.parent().map(|p| Self::Local(p.to_path_buf())),
147            Self::Remote(mut url) => {
148                if url.path() == "/" {
149                    return None;
150                }
151
152                if let Ok(mut segments) = url.path_segments_mut() {
153                    segments.pop_if_empty().pop();
154                }
155
156                Some(Self::Remote(url))
157            }
158        }
159    }
160
161    /// Gets the file name of the path.
162    ///
163    /// Returns `Ok(None)` if the path does not contain a file name (i.e. is
164    /// root).
165    ///
166    /// Returns an error if the file name is not UTF-8.
167    pub fn file_name(&self) -> Result<Option<&str>> {
168        match self {
169            Self::Local(path) => path
170                .file_name()
171                .map(|n| {
172                    n.to_str().with_context(|| {
173                        format!("path `{path}` is not UTF-8", path = path.display())
174                    })
175                })
176                .transpose(),
177            Self::Remote(url) => Ok(url.path_segments().and_then(|mut s| s.next_back())),
178        }
179    }
180
181    /// Returns a display implementation for the path.
182    pub fn display(&self) -> impl fmt::Display {
183        struct Display<'a>(&'a EvaluationPath);
184
185        impl fmt::Display for Display<'_> {
186            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
187                match self.0 {
188                    EvaluationPath::Local(path) => write!(f, "{path}", path = path.display()),
189                    EvaluationPath::Remote(url) => write!(f, "{url}"),
190                }
191            }
192        }
193
194        Display(self)
195    }
196
197    /// Makes the evaluation path absolute if it is a local path.
198    pub fn make_absolute(&mut self) {
199        if let Self::Local(path) = self
200            && !path.is_absolute()
201            && let Ok(abs) = absolute(&path)
202        {
203            *path = abs;
204        }
205    }
206}
207
208impl FromStr for EvaluationPath {
209    type Err = anyhow::Error;
210
211    fn from_str(s: &str) -> Result<Self> {
212        // Store `file` schemed URLs as local paths.
213        if is_file_url(s) {
214            let url = s
215                .parse::<Url>()
216                .with_context(|| format!("invalid `file` schemed URL `{s}`"))?;
217            return url
218                .to_file_path()
219                .map(|p| Self::Local(p.clean()))
220                .map_err(|_| anyhow!("URL `{s}` cannot be represented as a local file path"));
221        }
222
223        if let Some(url) = parse_url(s) {
224            return Ok(Self::Remote(url));
225        }
226
227        Ok(Self::Local(Path::new(s).clean()))
228    }
229}
230
231impl TryFrom<&str> for EvaluationPath {
232    type Error = anyhow::Error;
233
234    fn try_from(value: &str) -> Result<Self> {
235        value.parse()
236    }
237}
238
239#[cfg(test)]
240mod test {
241    use pretty_assertions::assert_eq;
242
243    use super::*;
244
245    #[test]
246    fn test_file_urls() {
247        assert!(is_file_url("file:///foo/bar/baz"));
248        assert!(is_file_url("FiLe:///foo/bar/baz"));
249        assert!(is_file_url("FILE:///foo/bar/baz"));
250        assert!(!is_file_url("https://example.com/bar/baz"));
251        assert!(!is_file_url("az://foo/bar/baz"));
252    }
253
254    #[test]
255    fn test_urls() {
256        assert!(is_url("http://example.com/foo/bar/baz"));
257        assert!(is_url("HtTp://example.com/foo/bar/baz"));
258        assert!(is_url("HTTP://example.com/foo/bar/baz"));
259        assert!(is_url("https://example.com/foo/bar/baz"));
260        assert!(is_url("HtTpS://example.com/foo/bar/baz"));
261        assert!(is_url("HTTPS://example.com/foo/bar/baz"));
262        assert!(is_url("file:///foo/bar/baz"));
263        assert!(is_url("FiLe:///foo/bar/baz"));
264        assert!(is_url("FILE:///foo/bar/baz"));
265        assert!(is_url("az://foo/bar/baz"));
266        assert!(is_url("aZ://foo/bar/baz"));
267        assert!(is_url("AZ://foo/bar/baz"));
268        assert!(is_url("s3://foo/bar/baz"));
269        assert!(is_url("S3://foo/bar/baz"));
270        assert!(is_url("gs://foo/bar/baz"));
271        assert!(is_url("gS://foo/bar/baz"));
272        assert!(is_url("GS://foo/bar/baz"));
273        assert!(!is_url("foo://foo/bar/baz"));
274    }
275
276    #[test]
277    fn test_url_parsing() {
278        assert_eq!(
279            parse_url("http://example.com/foo/bar/baz")
280                .map(String::from)
281                .as_deref(),
282            Some("http://example.com/foo/bar/baz")
283        );
284        assert_eq!(
285            parse_url("https://example.com/foo/bar/baz")
286                .map(String::from)
287                .as_deref(),
288            Some("https://example.com/foo/bar/baz")
289        );
290        assert_eq!(
291            parse_url("file:///foo/bar/baz")
292                .map(String::from)
293                .as_deref(),
294            Some("file:///foo/bar/baz")
295        );
296        assert_eq!(
297            parse_url("az://foo/bar/baz").map(String::from).as_deref(),
298            Some("az://foo/bar/baz")
299        );
300        assert_eq!(
301            parse_url("s3://foo/bar/baz").map(String::from).as_deref(),
302            Some("s3://foo/bar/baz")
303        );
304        assert_eq!(
305            parse_url("gs://foo/bar/baz").map(String::from).as_deref(),
306            Some("gs://foo/bar/baz")
307        );
308        assert_eq!(
309            parse_url("foo://foo/bar/baz").map(String::from).as_deref(),
310            None
311        );
312    }
313
314    #[test]
315    fn test_evaluation_path_parsing() {
316        let p: EvaluationPath = "/foo/bar/baz".parse().expect("should parse");
317        assert_eq!(
318            p.unwrap_local().to_str().unwrap().replace("\\", "/"),
319            "/foo/bar/baz"
320        );
321
322        let p: EvaluationPath = "foo".parse().expect("should parse");
323        assert_eq!(p.unwrap_local().as_os_str(), "foo");
324
325        #[cfg(unix)]
326        {
327            let p: EvaluationPath = "file:///foo/bar/baz".parse().expect("should parse");
328            assert_eq!(p.unwrap_local().as_os_str(), "/foo/bar/baz");
329        }
330
331        #[cfg(windows)]
332        {
333            let p: EvaluationPath = "file:///C:/foo/bar/baz".parse().expect("should parse");
334            assert_eq!(p.unwrap_local().as_os_str(), "C:\\foo\\bar\\baz");
335        }
336
337        let p: EvaluationPath = "https://example.com/foo/bar/baz"
338            .parse()
339            .expect("should parse");
340        assert_eq!(
341            p.unwrap_remote().as_str(),
342            "https://example.com/foo/bar/baz"
343        );
344
345        let p: EvaluationPath = "az://foo/bar/baz".parse().expect("should parse");
346        assert_eq!(p.unwrap_remote().as_str(), "az://foo/bar/baz");
347
348        let p: EvaluationPath = "s3://foo/bar/baz".parse().expect("should parse");
349        assert_eq!(p.unwrap_remote().as_str(), "s3://foo/bar/baz");
350
351        let p: EvaluationPath = "gs://foo/bar/baz".parse().expect("should parse");
352        assert_eq!(p.unwrap_remote().as_str(), "gs://foo/bar/baz");
353    }
354
355    #[test]
356    fn test_evaluation_path_join() {
357        let p: EvaluationPath = "/foo/bar/baz".parse().expect("should parse");
358        assert_eq!(
359            p.join("qux/../quux")
360                .expect("should join")
361                .unwrap_local()
362                .to_str()
363                .unwrap()
364                .replace("\\", "/"),
365            "/foo/bar/baz/quux"
366        );
367
368        let p: EvaluationPath = "foo".parse().expect("should parse");
369        assert_eq!(
370            p.join("qux/../quux")
371                .expect("should join")
372                .unwrap_local()
373                .to_str()
374                .unwrap()
375                .replace("\\", "/"),
376            "foo/quux"
377        );
378
379        #[cfg(unix)]
380        {
381            let p: EvaluationPath = "file:///foo/bar/baz".parse().expect("should parse");
382            assert_eq!(
383                p.join("qux/../quux")
384                    .expect("should join")
385                    .unwrap_local()
386                    .as_os_str(),
387                "/foo/bar/baz/quux"
388            );
389        }
390
391        #[cfg(windows)]
392        {
393            let p: EvaluationPath = "file:///C:/foo/bar/baz".parse().expect("should parse");
394            assert_eq!(
395                p.join("qux/../quux")
396                    .expect("should join")
397                    .unwrap_local()
398                    .as_os_str(),
399                "C:\\foo\\bar\\baz\\quux"
400            );
401        }
402
403        let p: EvaluationPath = "https://example.com/foo/bar/baz"
404            .parse()
405            .expect("should parse");
406        assert_eq!(
407            p.join("qux/../quux")
408                .expect("should join")
409                .unwrap_remote()
410                .as_str(),
411            "https://example.com/foo/bar/quux"
412        );
413
414        let p: EvaluationPath = "https://example.com/foo/bar/baz/"
415            .parse()
416            .expect("should parse");
417        assert_eq!(
418            p.join("qux/../quux")
419                .expect("should join")
420                .unwrap_remote()
421                .as_str(),
422            "https://example.com/foo/bar/baz/quux"
423        );
424
425        let p: EvaluationPath = "az://foo/bar/baz/".parse().expect("should parse");
426        assert_eq!(
427            p.join("qux/../quux")
428                .expect("should join")
429                .unwrap_remote()
430                .as_str(),
431            "az://foo/bar/baz/quux"
432        );
433
434        let p: EvaluationPath = "s3://foo/bar/baz/".parse().expect("should parse");
435        assert_eq!(
436            p.join("qux/../quux")
437                .expect("should join")
438                .unwrap_remote()
439                .as_str(),
440            "s3://foo/bar/baz/quux"
441        );
442
443        let p: EvaluationPath = "gs://foo/bar/baz/".parse().expect("should parse");
444        assert_eq!(
445            p.join("qux/../quux")
446                .expect("should join")
447                .unwrap_remote()
448                .as_str(),
449            "gs://foo/bar/baz/quux"
450        );
451    }
452}