wdl_engine/
path.rs

1//! Representation of evaluation paths that support URLs.
2
3use std::fmt;
4use std::path::Path;
5use std::path::PathBuf;
6use std::str::FromStr;
7
8use anyhow::Context;
9use anyhow::Result;
10use anyhow::anyhow;
11use path_clean::PathClean;
12use url::Url;
13
14/// Determines if the given string is prefixed with a `file` URL scheme.
15pub fn is_file_url(s: &str) -> bool {
16    s.get(0..7)
17        .map(|s| s.eq_ignore_ascii_case("file://"))
18        .unwrap_or(false)
19}
20
21/// Determines if the given string is prefixed with a supported URL scheme.
22pub fn is_url(s: &str) -> bool {
23    ["http://", "https://", "file://", "az://", "s3://", "gs://"]
24        .iter()
25        .any(|prefix| {
26            s.get(0..prefix.len())
27                .map(|s| s.eq_ignore_ascii_case(prefix))
28                .unwrap_or(false)
29        })
30}
31
32/// Parses a string into a URL.
33///
34/// Returns `None` if the string is not a supported scheme or not a valid URL.
35pub fn parse_url(s: &str) -> Option<Url> {
36    if !is_url(s) {
37        return None;
38    }
39
40    s.parse().ok()
41}
42
43/// Represents a path used in evaluation that may be either local or remote.
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum EvaluationPath {
46    /// The path is local (i.e. on the host).
47    Local(PathBuf),
48    /// The path is remote.
49    Remote(Url),
50}
51
52impl EvaluationPath {
53    /// Joins the given path to this path.
54    pub fn join(&self, path: &str) -> Result<Self> {
55        // URLs are absolute, so they can't be joined
56        if is_url(path) {
57            return path.parse();
58        }
59
60        // We can't join an absolute local path either
61        let p = Path::new(path);
62        if p.is_absolute() {
63            return Ok(Self::Local(p.clean()));
64        }
65
66        match self {
67            Self::Local(dir) => Ok(Self::Local(dir.join(path).clean())),
68            Self::Remote(dir) => dir
69                .join(path)
70                .map(Self::Remote)
71                .with_context(|| format!("failed to join `{path}` to URL `{dir}`")),
72        }
73    }
74
75    /// Gets a string representation of the path.
76    ///
77    /// Returns `None` if the path is local and cannot be represented in UTF-8.
78    pub fn to_str(&self) -> Option<&str> {
79        match self {
80            Self::Local(path) => path.to_str(),
81            Self::Remote(url) => Some(url.as_str()),
82        }
83    }
84
85    /// Converts the path to a local path.
86    ///
87    /// Returns `None` if the path is remote.
88    pub fn as_local(&self) -> Option<&Path> {
89        match self {
90            Self::Local(path) => Some(path),
91            Self::Remote(_) => None,
92        }
93    }
94
95    /// Unwraps the path to a local path.
96    ///
97    /// # Panics
98    ///
99    /// Panics if the path is remote.
100    pub fn unwrap_local(self) -> PathBuf {
101        match self {
102            Self::Local(path) => path,
103            Self::Remote(_) => panic!("path is remote"),
104        }
105    }
106
107    /// Converts the path to a remote URL.
108    ///
109    /// Returns `None` if the path is local.
110    pub fn as_remote(&self) -> Option<&Url> {
111        match self {
112            Self::Local(_) => None,
113            Self::Remote(url) => Some(url),
114        }
115    }
116
117    /// Unwraps the path to a remote URL.
118    ///
119    /// # Panics
120    ///
121    /// Panics if the path is local.
122    pub fn unwrap_remote(self) -> Url {
123        match self {
124            Self::Local(_) => panic!("path is local"),
125            Self::Remote(url) => url,
126        }
127    }
128
129    /// Gets the file name of the path.
130    ///
131    /// Returns `Ok(None)` if the path does not contain a file name (i.e. is
132    /// root).
133    ///
134    /// Returns an error if the file name is not UTF-8.
135    pub fn file_name(&self) -> Result<Option<&str>> {
136        match self {
137            Self::Local(path) => path
138                .file_name()
139                .map(|n| {
140                    n.to_str().with_context(|| {
141                        format!("path `{path}` is not UTF-8", path = path.display())
142                    })
143                })
144                .transpose(),
145            Self::Remote(url) => Ok(url.path_segments().and_then(|mut s| s.next_back())),
146        }
147    }
148
149    /// Returns a display implementation for the path.
150    pub fn display(&self) -> impl fmt::Display {
151        struct Display<'a>(&'a EvaluationPath);
152
153        impl fmt::Display for Display<'_> {
154            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155                match self.0 {
156                    EvaluationPath::Local(path) => write!(f, "{path}", path = path.display()),
157                    EvaluationPath::Remote(url) => write!(f, "{url}"),
158                }
159            }
160        }
161
162        Display(self)
163    }
164}
165
166impl FromStr for EvaluationPath {
167    type Err = anyhow::Error;
168
169    fn from_str(s: &str) -> Result<Self, Self::Err> {
170        // Store `file` schemed URLs as local paths.
171        if is_file_url(s) {
172            let url = s
173                .parse::<Url>()
174                .with_context(|| format!("invalid `file` schemed URL `{s}`"))?;
175            return url
176                .to_file_path()
177                .map(|p| Self::Local(p.clean()))
178                .map_err(|_| anyhow!("URL `{s}` cannot be represented as a local file path"));
179        }
180
181        if let Some(url) = parse_url(s) {
182            return Ok(Self::Remote(url));
183        }
184
185        Ok(Self::Local(Path::new(s).clean()))
186    }
187}
188
189impl TryFrom<EvaluationPath> for String {
190    type Error = anyhow::Error;
191
192    fn try_from(value: EvaluationPath) -> Result<Self, Self::Error> {
193        match value {
194            EvaluationPath::Local(path) => path
195                .into_os_string()
196                .into_string()
197                .map_err(|_| anyhow!("path cannot be represented as a UTF-8 string")),
198            EvaluationPath::Remote(url) => Ok(url.into()),
199        }
200    }
201}
202
203#[cfg(test)]
204mod test {
205    use pretty_assertions::assert_eq;
206
207    use super::*;
208
209    #[test]
210    fn test_file_urls() {
211        assert!(is_file_url("file:///foo/bar/baz"));
212        assert!(is_file_url("FiLe:///foo/bar/baz"));
213        assert!(is_file_url("FILE:///foo/bar/baz"));
214        assert!(!is_file_url("https://example.com/bar/baz"));
215        assert!(!is_file_url("az://foo/bar/baz"));
216    }
217
218    #[test]
219    fn test_urls() {
220        assert!(is_url("http://example.com/foo/bar/baz"));
221        assert!(is_url("HtTp://example.com/foo/bar/baz"));
222        assert!(is_url("HTTP://example.com/foo/bar/baz"));
223        assert!(is_url("https://example.com/foo/bar/baz"));
224        assert!(is_url("HtTpS://example.com/foo/bar/baz"));
225        assert!(is_url("HTTPS://example.com/foo/bar/baz"));
226        assert!(is_url("file:///foo/bar/baz"));
227        assert!(is_url("FiLe:///foo/bar/baz"));
228        assert!(is_url("FILE:///foo/bar/baz"));
229        assert!(is_url("az://foo/bar/baz"));
230        assert!(is_url("aZ://foo/bar/baz"));
231        assert!(is_url("AZ://foo/bar/baz"));
232        assert!(is_url("s3://foo/bar/baz"));
233        assert!(is_url("S3://foo/bar/baz"));
234        assert!(is_url("gs://foo/bar/baz"));
235        assert!(is_url("gS://foo/bar/baz"));
236        assert!(is_url("GS://foo/bar/baz"));
237        assert!(!is_url("foo://foo/bar/baz"));
238    }
239
240    #[test]
241    fn test_url_parsing() {
242        assert_eq!(
243            parse_url("http://example.com/foo/bar/baz")
244                .map(String::from)
245                .as_deref(),
246            Some("http://example.com/foo/bar/baz")
247        );
248        assert_eq!(
249            parse_url("https://example.com/foo/bar/baz")
250                .map(String::from)
251                .as_deref(),
252            Some("https://example.com/foo/bar/baz")
253        );
254        assert_eq!(
255            parse_url("file:///foo/bar/baz")
256                .map(String::from)
257                .as_deref(),
258            Some("file:///foo/bar/baz")
259        );
260        assert_eq!(
261            parse_url("az://foo/bar/baz").map(String::from).as_deref(),
262            Some("az://foo/bar/baz")
263        );
264        assert_eq!(
265            parse_url("s3://foo/bar/baz").map(String::from).as_deref(),
266            Some("s3://foo/bar/baz")
267        );
268        assert_eq!(
269            parse_url("gs://foo/bar/baz").map(String::from).as_deref(),
270            Some("gs://foo/bar/baz")
271        );
272        assert_eq!(
273            parse_url("foo://foo/bar/baz").map(String::from).as_deref(),
274            None
275        );
276    }
277
278    #[test]
279    fn test_evaluation_path_parsing() {
280        let p: EvaluationPath = "/foo/bar/baz".parse().expect("should parse");
281        assert_eq!(
282            p.unwrap_local().to_str().unwrap().replace("\\", "/"),
283            "/foo/bar/baz"
284        );
285
286        let p: EvaluationPath = "foo".parse().expect("should parse");
287        assert_eq!(p.unwrap_local().as_os_str(), "foo");
288
289        #[cfg(unix)]
290        {
291            let p: EvaluationPath = "file:///foo/bar/baz".parse().expect("should parse");
292            assert_eq!(p.unwrap_local().as_os_str(), "/foo/bar/baz");
293        }
294
295        #[cfg(windows)]
296        {
297            let p: EvaluationPath = "file:///C:/foo/bar/baz".parse().expect("should parse");
298            assert_eq!(p.unwrap_local().as_os_str(), "C:\\foo\\bar\\baz");
299        }
300
301        let p: EvaluationPath = "https://example.com/foo/bar/baz"
302            .parse()
303            .expect("should parse");
304        assert_eq!(
305            p.unwrap_remote().as_str(),
306            "https://example.com/foo/bar/baz"
307        );
308
309        let p: EvaluationPath = "az://foo/bar/baz".parse().expect("should parse");
310        assert_eq!(p.unwrap_remote().as_str(), "az://foo/bar/baz");
311
312        let p: EvaluationPath = "s3://foo/bar/baz".parse().expect("should parse");
313        assert_eq!(p.unwrap_remote().as_str(), "s3://foo/bar/baz");
314
315        let p: EvaluationPath = "gs://foo/bar/baz".parse().expect("should parse");
316        assert_eq!(p.unwrap_remote().as_str(), "gs://foo/bar/baz");
317    }
318
319    #[test]
320    fn test_evaluation_path_join() {
321        let p: EvaluationPath = "/foo/bar/baz".parse().expect("should parse");
322        assert_eq!(
323            p.join("qux/../quux")
324                .expect("should join")
325                .unwrap_local()
326                .to_str()
327                .unwrap()
328                .replace("\\", "/"),
329            "/foo/bar/baz/quux"
330        );
331
332        let p: EvaluationPath = "foo".parse().expect("should parse");
333        assert_eq!(
334            p.join("qux/../quux")
335                .expect("should join")
336                .unwrap_local()
337                .to_str()
338                .unwrap()
339                .replace("\\", "/"),
340            "foo/quux"
341        );
342
343        #[cfg(unix)]
344        {
345            let p: EvaluationPath = "file:///foo/bar/baz".parse().expect("should parse");
346            assert_eq!(
347                p.join("qux/../quux")
348                    .expect("should join")
349                    .unwrap_local()
350                    .as_os_str(),
351                "/foo/bar/baz/quux"
352            );
353        }
354
355        #[cfg(windows)]
356        {
357            let p: EvaluationPath = "file:///C:/foo/bar/baz".parse().expect("should parse");
358            assert_eq!(
359                p.join("qux/../quux")
360                    .expect("should join")
361                    .unwrap_local()
362                    .as_os_str(),
363                "C:\\foo\\bar\\baz\\quux"
364            );
365        }
366
367        let p: EvaluationPath = "https://example.com/foo/bar/baz"
368            .parse()
369            .expect("should parse");
370        assert_eq!(
371            p.join("qux/../quux")
372                .expect("should join")
373                .unwrap_remote()
374                .as_str(),
375            "https://example.com/foo/bar/quux"
376        );
377
378        let p: EvaluationPath = "https://example.com/foo/bar/baz/"
379            .parse()
380            .expect("should parse");
381        assert_eq!(
382            p.join("qux/../quux")
383                .expect("should join")
384                .unwrap_remote()
385                .as_str(),
386            "https://example.com/foo/bar/baz/quux"
387        );
388
389        let p: EvaluationPath = "az://foo/bar/baz/".parse().expect("should parse");
390        assert_eq!(
391            p.join("qux/../quux")
392                .expect("should join")
393                .unwrap_remote()
394                .as_str(),
395            "az://foo/bar/baz/quux"
396        );
397
398        let p: EvaluationPath = "s3://foo/bar/baz/".parse().expect("should parse");
399        assert_eq!(
400            p.join("qux/../quux")
401                .expect("should join")
402                .unwrap_remote()
403                .as_str(),
404            "s3://foo/bar/baz/quux"
405        );
406
407        let p: EvaluationPath = "gs://foo/bar/baz/".parse().expect("should parse");
408        assert_eq!(
409            p.join("qux/../quux")
410                .expect("should join")
411                .unwrap_remote()
412                .as_str(),
413            "gs://foo/bar/baz/quux"
414        );
415    }
416}