wdl_engine/
path.rs

1//! Representation of evaluation paths that support URLs.
2
3use std::fmt;
4use std::path::Path;
5use std::path::PathBuf;
6use std::str::FromStr;
7
8use anyhow::Context;
9use anyhow::Result;
10use anyhow::anyhow;
11use path_clean::clean;
12use url::Url;
13
14/// Determines if the given string is prefixed with a `file` URL scheme.
15pub fn is_file_url(s: &str) -> bool {
16    s.get(0..7)
17        .map(|s| s.eq_ignore_ascii_case("file://"))
18        .unwrap_or(false)
19}
20
21/// Determines if the given string is prefixed with a supported URL scheme.
22pub fn is_url(s: &str) -> bool {
23    ["http://", "https://", "file://", "az://", "s3://", "gs://"]
24        .iter()
25        .any(|prefix| {
26            s.get(0..prefix.len())
27                .map(|s| s.eq_ignore_ascii_case(prefix))
28                .unwrap_or(false)
29        })
30}
31
32/// Parses a string into a URL.
33///
34/// Returns `None` if the string is not a supported scheme or not a valid URL.
35pub fn parse_url(s: &str) -> Option<Url> {
36    if !is_url(s) {
37        return None;
38    }
39
40    s.parse().ok()
41}
42
43/// Represents a path used in evaluation that may be either local or remote.
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum EvaluationPath {
46    /// The path is local (i.e. on the host).
47    Local(PathBuf),
48    /// The path is remote.
49    Remote(Url),
50}
51
52impl EvaluationPath {
53    /// Joins the given path to this path.
54    pub fn join(&self, path: &str) -> Result<Self> {
55        // URLs are absolute, so they can't be joined
56        if is_url(path) {
57            return path.parse();
58        }
59
60        // We can't join an absolute local path either
61        if Path::new(path).is_absolute() {
62            return Ok(Self::Local(clean(path)));
63        }
64
65        match self {
66            Self::Local(dir) => Ok(Self::Local(dir.join(clean(path)))),
67            Self::Remote(dir) => dir
68                .join(path)
69                .map(Self::Remote)
70                .with_context(|| format!("failed to join `{path}` to URL `{dir}`")),
71        }
72    }
73
74    /// Gets a string representation of the path.
75    ///
76    /// Returns `None` if the path is local and cannot be represented in UTF-8.
77    pub fn to_str(&self) -> Option<&str> {
78        match self {
79            Self::Local(path) => path.to_str(),
80            Self::Remote(url) => Some(url.as_str()),
81        }
82    }
83
84    /// Converts the path to a local path.
85    ///
86    /// Returns `None` if the path is remote.
87    pub fn as_local(&self) -> Option<&Path> {
88        match self {
89            Self::Local(path) => Some(path),
90            Self::Remote(_) => None,
91        }
92    }
93
94    /// Unwraps the path to a local path.
95    ///
96    /// # Panics
97    ///
98    /// Panics if the path is remote.
99    pub fn unwrap_local(self) -> PathBuf {
100        match self {
101            Self::Local(path) => path,
102            Self::Remote(_) => panic!("path is remote"),
103        }
104    }
105
106    /// Converts the path to a remote URL.
107    ///
108    /// Returns `None` if the path is local.
109    pub fn as_remote(&self) -> Option<&Url> {
110        match self {
111            Self::Local(_) => None,
112            Self::Remote(url) => Some(url),
113        }
114    }
115
116    /// Unwraps the path to a remote URL.
117    ///
118    /// # Panics
119    ///
120    /// Panics if the path is local.
121    pub fn unwrap_remote(self) -> Url {
122        match self {
123            Self::Local(_) => panic!("path is local"),
124            Self::Remote(url) => url,
125        }
126    }
127
128    /// Gets the file name of the path.
129    ///
130    /// Returns `Ok(None)` if the path does not contain a file name (i.e. is
131    /// root).
132    ///
133    /// Returns an error if the file name is not UTF-8.
134    pub fn file_name(&self) -> Result<Option<&str>> {
135        match self {
136            Self::Local(path) => path
137                .file_name()
138                .map(|n| {
139                    n.to_str().with_context(|| {
140                        format!("path `{path}` is not UTF-8", path = path.display())
141                    })
142                })
143                .transpose(),
144            Self::Remote(url) => Ok(url.path_segments().and_then(|mut s| s.next_back())),
145        }
146    }
147
148    /// Returns a display implementation for the path.
149    pub fn display(&self) -> impl fmt::Display {
150        struct Display<'a>(&'a EvaluationPath);
151
152        impl fmt::Display for Display<'_> {
153            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
154                match self.0 {
155                    EvaluationPath::Local(path) => write!(f, "{path}", path = path.display()),
156                    EvaluationPath::Remote(url) => write!(f, "{url}"),
157                }
158            }
159        }
160
161        Display(self)
162    }
163}
164
165impl FromStr for EvaluationPath {
166    type Err = anyhow::Error;
167
168    fn from_str(s: &str) -> Result<Self, Self::Err> {
169        // Store `file` schemed URLs as local paths.
170        if is_file_url(s) {
171            let url = s
172                .parse::<Url>()
173                .with_context(|| format!("invalid `file` schemed URL `{s}`"))?;
174            return url
175                .to_file_path()
176                .map(|p| Self::Local(clean(p)))
177                .map_err(|_| anyhow!("URL `{s}` cannot be represented as a local file path"));
178        }
179
180        if let Some(url) = parse_url(s) {
181            return Ok(Self::Remote(url));
182        }
183
184        Ok(Self::Local(clean(s)))
185    }
186}
187
188impl TryFrom<EvaluationPath> for String {
189    type Error = anyhow::Error;
190
191    fn try_from(value: EvaluationPath) -> Result<Self, Self::Error> {
192        match value {
193            EvaluationPath::Local(path) => path
194                .into_os_string()
195                .into_string()
196                .map_err(|_| anyhow!("path cannot be represented as a UTF-8 string")),
197            EvaluationPath::Remote(url) => Ok(url.into()),
198        }
199    }
200}
201
202#[cfg(test)]
203mod test {
204    use pretty_assertions::assert_eq;
205
206    use super::*;
207
208    #[test]
209    fn test_file_urls() {
210        assert!(is_file_url("file:///foo/bar/baz"));
211        assert!(is_file_url("FiLe:///foo/bar/baz"));
212        assert!(is_file_url("FILE:///foo/bar/baz"));
213        assert!(!is_file_url("https://example.com/bar/baz"));
214        assert!(!is_file_url("az://foo/bar/baz"));
215    }
216
217    #[test]
218    fn test_urls() {
219        assert!(is_url("http://example.com/foo/bar/baz"));
220        assert!(is_url("HtTp://example.com/foo/bar/baz"));
221        assert!(is_url("HTTP://example.com/foo/bar/baz"));
222        assert!(is_url("https://example.com/foo/bar/baz"));
223        assert!(is_url("HtTpS://example.com/foo/bar/baz"));
224        assert!(is_url("HTTPS://example.com/foo/bar/baz"));
225        assert!(is_url("file:///foo/bar/baz"));
226        assert!(is_url("FiLe:///foo/bar/baz"));
227        assert!(is_url("FILE:///foo/bar/baz"));
228        assert!(is_url("az://foo/bar/baz"));
229        assert!(is_url("aZ://foo/bar/baz"));
230        assert!(is_url("AZ://foo/bar/baz"));
231        assert!(is_url("s3://foo/bar/baz"));
232        assert!(is_url("S3://foo/bar/baz"));
233        assert!(is_url("gs://foo/bar/baz"));
234        assert!(is_url("gS://foo/bar/baz"));
235        assert!(is_url("GS://foo/bar/baz"));
236        assert!(!is_url("foo://foo/bar/baz"));
237    }
238
239    #[test]
240    fn test_url_parsing() {
241        assert_eq!(
242            parse_url("http://example.com/foo/bar/baz")
243                .map(String::from)
244                .as_deref(),
245            Some("http://example.com/foo/bar/baz")
246        );
247        assert_eq!(
248            parse_url("https://example.com/foo/bar/baz")
249                .map(String::from)
250                .as_deref(),
251            Some("https://example.com/foo/bar/baz")
252        );
253        assert_eq!(
254            parse_url("file:///foo/bar/baz")
255                .map(String::from)
256                .as_deref(),
257            Some("file:///foo/bar/baz")
258        );
259        assert_eq!(
260            parse_url("az://foo/bar/baz").map(String::from).as_deref(),
261            Some("az://foo/bar/baz")
262        );
263        assert_eq!(
264            parse_url("s3://foo/bar/baz").map(String::from).as_deref(),
265            Some("s3://foo/bar/baz")
266        );
267        assert_eq!(
268            parse_url("gs://foo/bar/baz").map(String::from).as_deref(),
269            Some("gs://foo/bar/baz")
270        );
271        assert_eq!(
272            parse_url("foo://foo/bar/baz").map(String::from).as_deref(),
273            None
274        );
275    }
276
277    #[test]
278    fn test_evaluation_path_parsing() {
279        let p: EvaluationPath = "/foo/bar/baz".parse().expect("should parse");
280        assert_eq!(
281            p.unwrap_local().to_str().unwrap().replace("\\", "/"),
282            "/foo/bar/baz"
283        );
284
285        let p: EvaluationPath = "foo".parse().expect("should parse");
286        assert_eq!(p.unwrap_local().as_os_str(), "foo");
287
288        #[cfg(unix)]
289        {
290            let p: EvaluationPath = "file:///foo/bar/baz".parse().expect("should parse");
291            assert_eq!(p.unwrap_local().as_os_str(), "/foo/bar/baz");
292        }
293
294        #[cfg(windows)]
295        {
296            let p: EvaluationPath = "file:///C:/foo/bar/baz".parse().expect("should parse");
297            assert_eq!(p.unwrap_local().as_os_str(), "C:\\foo\\bar\\baz");
298        }
299
300        let p: EvaluationPath = "https://example.com/foo/bar/baz"
301            .parse()
302            .expect("should parse");
303        assert_eq!(
304            p.unwrap_remote().as_str(),
305            "https://example.com/foo/bar/baz"
306        );
307
308        let p: EvaluationPath = "az://foo/bar/baz".parse().expect("should parse");
309        assert_eq!(p.unwrap_remote().as_str(), "az://foo/bar/baz");
310
311        let p: EvaluationPath = "s3://foo/bar/baz".parse().expect("should parse");
312        assert_eq!(p.unwrap_remote().as_str(), "s3://foo/bar/baz");
313
314        let p: EvaluationPath = "gs://foo/bar/baz".parse().expect("should parse");
315        assert_eq!(p.unwrap_remote().as_str(), "gs://foo/bar/baz");
316    }
317
318    #[test]
319    fn test_evaluation_path_join() {
320        let p: EvaluationPath = "/foo/bar/baz".parse().expect("should parse");
321        assert_eq!(
322            p.join("qux/../quux")
323                .expect("should join")
324                .unwrap_local()
325                .to_str()
326                .unwrap()
327                .replace("\\", "/"),
328            "/foo/bar/baz/quux"
329        );
330
331        let p: EvaluationPath = "foo".parse().expect("should parse");
332        assert_eq!(
333            p.join("qux/../quux")
334                .expect("should join")
335                .unwrap_local()
336                .to_str()
337                .unwrap()
338                .replace("\\", "/"),
339            "foo/quux"
340        );
341
342        #[cfg(unix)]
343        {
344            let p: EvaluationPath = "file:///foo/bar/baz".parse().expect("should parse");
345            assert_eq!(
346                p.join("qux/../quux")
347                    .expect("should join")
348                    .unwrap_local()
349                    .as_os_str(),
350                "/foo/bar/baz/quux"
351            );
352        }
353
354        #[cfg(windows)]
355        {
356            let p: EvaluationPath = "file:///C:/foo/bar/baz".parse().expect("should parse");
357            assert_eq!(
358                p.join("qux/../quux")
359                    .expect("should join")
360                    .unwrap_local()
361                    .as_os_str(),
362                "C:\\foo\\bar\\baz\\quux"
363            );
364        }
365
366        let p: EvaluationPath = "https://example.com/foo/bar/baz"
367            .parse()
368            .expect("should parse");
369        assert_eq!(
370            p.join("qux/../quux")
371                .expect("should join")
372                .unwrap_remote()
373                .as_str(),
374            "https://example.com/foo/bar/quux"
375        );
376
377        let p: EvaluationPath = "https://example.com/foo/bar/baz/"
378            .parse()
379            .expect("should parse");
380        assert_eq!(
381            p.join("qux/../quux")
382                .expect("should join")
383                .unwrap_remote()
384                .as_str(),
385            "https://example.com/foo/bar/baz/quux"
386        );
387
388        let p: EvaluationPath = "az://foo/bar/baz/".parse().expect("should parse");
389        assert_eq!(
390            p.join("qux/../quux")
391                .expect("should join")
392                .unwrap_remote()
393                .as_str(),
394            "az://foo/bar/baz/quux"
395        );
396
397        let p: EvaluationPath = "s3://foo/bar/baz/".parse().expect("should parse");
398        assert_eq!(
399            p.join("qux/../quux")
400                .expect("should join")
401                .unwrap_remote()
402                .as_str(),
403            "s3://foo/bar/baz/quux"
404        );
405
406        let p: EvaluationPath = "gs://foo/bar/baz/".parse().expect("should parse");
407        assert_eq!(
408            p.join("qux/../quux")
409                .expect("should join")
410                .unwrap_remote()
411                .as_str(),
412            "gs://foo/bar/baz/quux"
413        );
414    }
415}