wdl_engine/
path.rs

1//! Representation of evaluation paths that support URLs.
2
3use std::fmt;
4use std::path::Path;
5use std::path::PathBuf;
6use std::str::FromStr;
7
8use anyhow::Context;
9use anyhow::Result;
10use anyhow::anyhow;
11use anyhow::bail;
12use path_clean::PathClean;
13use url::Url;
14
15use crate::ContentKind;
16use crate::config::ContentDigestMode;
17use crate::digest::Digest;
18use crate::digest::calculate_local_digest;
19use crate::digest::calculate_remote_digest;
20use crate::http::Transferer;
21
22/// The URL schemes supported by this crate.
23const SUPPORTED_SCHEMES: &[&str] = &["http://", "https://", "file://", "az://", "s3://", "gs://"];
24
25/// Helper to check if a given string starts with the given prefix, ignoring
26/// ASCII case.
27fn starts_with_ignore_ascii_case(s: &str, prefix: &str) -> bool {
28    s.get(0..prefix.len())
29        .map(|s| s.eq_ignore_ascii_case(prefix))
30        .unwrap_or(false)
31}
32
33/// Determines if the given string is prefixed with a `file` URL scheme.
34pub(crate) fn is_file_url(s: &str) -> bool {
35    starts_with_ignore_ascii_case(s.trim_start(), "file://")
36}
37
38/// Determines if the given string is prefixed with a supported URL scheme.
39pub(crate) fn is_supported_url(s: &str) -> bool {
40    SUPPORTED_SCHEMES
41        .iter()
42        .any(|scheme| starts_with_ignore_ascii_case(s.trim_start(), scheme))
43}
44
45/// Represents the kind of an evaluation path.
46#[derive(Debug, Clone, PartialEq, Eq, Hash)]
47pub(crate) enum EvaluationPathKind {
48    /// The path is local (i.e. on the host).
49    Local(PathBuf),
50    /// The path is remote.
51    Remote(Url),
52}
53
54impl fmt::Display for EvaluationPathKind {
55    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
56        match self {
57            Self::Local(path) => write!(f, "{path}", path = path.display()),
58            Self::Remote(url) => write!(f, "{url}"),
59        }
60    }
61}
62
63/// Represents a path used in evaluation that may be either local or remote.
64#[derive(Debug, Clone, PartialEq, Eq, Hash)]
65pub struct EvaluationPath(EvaluationPathKind);
66
67impl EvaluationPath {
68    /// Constructs an `EvaluationPath` from a local path.
69    ///
70    /// This is an internal method where we assume the path is already "clean".
71    pub(crate) fn from_local_path(path: PathBuf) -> Self {
72        Self(EvaluationPathKind::Local(path))
73    }
74
75    /// Joins the given path to this path.
76    pub fn join(&self, path: &str) -> Result<Self> {
77        // URLs are absolute, so they can't be joined
78        if is_supported_url(path) {
79            return path.parse();
80        }
81
82        // We can't join an absolute local path either
83        let p = Path::new(path);
84        if p.is_absolute() {
85            return Ok(Self(EvaluationPathKind::Local(p.clean())));
86        }
87
88        match &self.0 {
89            EvaluationPathKind::Local(dir) => {
90                Ok(Self(EvaluationPathKind::Local(dir.join(path).clean())))
91            }
92            EvaluationPathKind::Remote(dir) => Ok(Self(
93                dir.join(path)
94                    .map(EvaluationPathKind::Remote)
95                    .with_context(|| format!("failed to join `{path}` to URL `{dir}`"))?,
96            )),
97        }
98    }
99
100    /// Gets the underlying evaluation path kind.
101    pub(crate) fn kind(&self) -> &EvaluationPathKind {
102        &self.0
103    }
104
105    /// Converts to the underlying evaluation path kind.
106    pub(crate) fn into_kind(self) -> EvaluationPathKind {
107        self.0
108    }
109
110    /// Returns `true` if the path is local.
111    pub fn is_local(&self) -> bool {
112        matches!(&self.0, EvaluationPathKind::Local(_))
113    }
114
115    /// Converts the path to a local path.
116    ///
117    /// Returns `None` if the path is remote.
118    pub fn as_local(&self) -> Option<&Path> {
119        match &self.0 {
120            EvaluationPathKind::Local(path) => Some(path),
121            EvaluationPathKind::Remote(_) => None,
122        }
123    }
124
125    /// Unwraps the path to a local path.
126    ///
127    /// # Panics
128    ///
129    /// Panics if the path is remote.
130    pub fn unwrap_local(self) -> PathBuf {
131        match self.0 {
132            EvaluationPathKind::Local(path) => path,
133            EvaluationPathKind::Remote(_) => panic!("path is remote"),
134        }
135    }
136
137    /// Returns `true` if the path is remote.
138    pub fn is_remote(&self) -> bool {
139        matches!(&self.0, EvaluationPathKind::Remote(_))
140    }
141
142    /// Converts the path to a remote URL.
143    ///
144    /// Returns `None` if the path is local.
145    pub fn as_remote(&self) -> Option<&Url> {
146        match &self.0 {
147            EvaluationPathKind::Local(_) => None,
148            EvaluationPathKind::Remote(url) => Some(url),
149        }
150    }
151
152    /// Unwraps the path to a remote URL.
153    ///
154    /// # Panics
155    ///
156    /// Panics if the path is local.
157    pub fn unwrap_remote(self) -> Url {
158        match self.0 {
159            EvaluationPathKind::Local(_) => panic!("path is local"),
160            EvaluationPathKind::Remote(url) => url,
161        }
162    }
163
164    /// Gets the parent of the given path.
165    ///
166    /// Returns `None` if the evaluation path isn't valid or has no parent.
167    pub fn parent_of(path: &str) -> Option<Self> {
168        let path: EvaluationPath = path.parse().ok()?;
169        match path.0 {
170            EvaluationPathKind::Local(path) => path
171                .parent()
172                .map(|p| Self(EvaluationPathKind::Local(p.to_path_buf()))),
173            EvaluationPathKind::Remote(mut url) => {
174                if url.path() == "/" {
175                    return None;
176                }
177
178                if let Ok(mut segments) = url.path_segments_mut() {
179                    segments.pop_if_empty().pop();
180                }
181
182                Some(Self(EvaluationPathKind::Remote(url)))
183            }
184        }
185    }
186
187    /// Gets the file name of the path.
188    ///
189    /// Returns `Ok(None)` if the path does not contain a file name (i.e. is
190    /// root).
191    ///
192    /// Returns an error if the file name is not UTF-8.
193    pub fn file_name(&self) -> Result<Option<&str>> {
194        match &self.0 {
195            EvaluationPathKind::Local(path) => path
196                .file_name()
197                .map(|n| {
198                    n.to_str().with_context(|| {
199                        format!("path `{path}` is not UTF-8", path = path.display())
200                    })
201                })
202                .transpose(),
203            EvaluationPathKind::Remote(url) => {
204                Ok(url.path_segments().and_then(|mut s| s.next_back()))
205            }
206        }
207    }
208
209    /// Calculates the content digest of the evaluation path.
210    pub(crate) async fn calculate_digest(
211        &self,
212        transferer: &dyn Transferer,
213        kind: ContentKind,
214        mode: ContentDigestMode,
215    ) -> Result<Digest> {
216        match &self.0 {
217            EvaluationPathKind::Local(path) => calculate_local_digest(path, kind, mode).await,
218            EvaluationPathKind::Remote(url) => calculate_remote_digest(transferer, url, kind).await,
219        }
220    }
221}
222
223impl FromStr for EvaluationPath {
224    type Err = anyhow::Error;
225
226    fn from_str(s: &str) -> Result<Self> {
227        // Store `file` schemed URLs as local paths.
228        if is_file_url(s) {
229            let url = s
230                .parse::<Url>()
231                .with_context(|| format!("invalid `file` schemed URL `{s}`"))?;
232            return url
233                .to_file_path()
234                .map(|p| Self(EvaluationPathKind::Local(p.clean())))
235                .map_err(|_| anyhow!("URL `{s}` cannot be represented as a local file path"));
236        }
237
238        if is_supported_url(s) {
239            return Ok(Self(EvaluationPathKind::Remote(
240                s.parse().with_context(|| format!("URL `{s}` is invalid"))?,
241            )));
242        }
243
244        Ok(Self(EvaluationPathKind::Local(Path::new(s).clean())))
245    }
246}
247
248impl fmt::Display for EvaluationPath {
249    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
250        self.0.fmt(f)
251    }
252}
253
254impl TryFrom<&str> for EvaluationPath {
255    type Error = anyhow::Error;
256
257    fn try_from(value: &str) -> Result<Self> {
258        value.parse()
259    }
260}
261
262impl TryFrom<EvaluationPath> for String {
263    type Error = anyhow::Error;
264
265    fn try_from(path: EvaluationPath) -> Result<Self> {
266        match path.0 {
267            EvaluationPathKind::Local(path) => match path.into_os_string().into_string() {
268                Ok(s) => Ok(s),
269                Err(path) => bail!(
270                    "path `{path}` cannot be represented with UTF-8",
271                    path = path.display()
272                ),
273            },
274            EvaluationPathKind::Remote(url) => Ok(url.into()),
275        }
276    }
277}
278
279impl From<&Path> for EvaluationPath {
280    fn from(path: &Path) -> Self {
281        Self(EvaluationPathKind::Local(path.clean()))
282    }
283}
284
285impl TryFrom<Url> for EvaluationPath {
286    type Error = anyhow::Error;
287
288    fn try_from(url: Url) -> std::result::Result<Self, Self::Error> {
289        if !is_supported_url(url.as_str()) {
290            bail!("URL `{url}` is not supported");
291        }
292
293        Ok(Self(EvaluationPathKind::Remote(url)))
294    }
295}
296
297#[cfg(test)]
298mod test {
299    use pretty_assertions::assert_eq;
300
301    use super::*;
302
303    #[test]
304    fn test_file_urls() {
305        assert!(is_file_url("file:///foo/bar/baz"));
306        assert!(is_file_url("FiLe:///foo/bar/baz"));
307        assert!(is_file_url("FILE:///foo/bar/baz"));
308        assert!(!is_file_url("https://example.com/bar/baz"));
309        assert!(!is_file_url("az://foo/bar/baz"));
310    }
311
312    #[test]
313    fn test_urls() {
314        assert!(is_supported_url("http://example.com/foo/bar/baz"));
315        assert!(is_supported_url("HtTp://example.com/foo/bar/baz"));
316        assert!(is_supported_url("HTTP://example.com/foo/bar/baz"));
317        assert!(is_supported_url("https://example.com/foo/bar/baz"));
318        assert!(is_supported_url("HtTpS://example.com/foo/bar/baz"));
319        assert!(is_supported_url("HTTPS://example.com/foo/bar/baz"));
320        assert!(is_supported_url("file:///foo/bar/baz"));
321        assert!(is_supported_url("FiLe:///foo/bar/baz"));
322        assert!(is_supported_url("FILE:///foo/bar/baz"));
323        assert!(is_supported_url("az://foo/bar/baz"));
324        assert!(is_supported_url("aZ://foo/bar/baz"));
325        assert!(is_supported_url("AZ://foo/bar/baz"));
326        assert!(is_supported_url("s3://foo/bar/baz"));
327        assert!(is_supported_url("S3://foo/bar/baz"));
328        assert!(is_supported_url("gs://foo/bar/baz"));
329        assert!(is_supported_url("gS://foo/bar/baz"));
330        assert!(is_supported_url("GS://foo/bar/baz"));
331        assert!(!is_supported_url("foo://foo/bar/baz"));
332    }
333
334    #[test]
335    fn test_evaluation_path_parsing() {
336        let p: EvaluationPath = "/foo/bar/baz".parse().expect("should parse");
337        assert_eq!(
338            p.unwrap_local().to_str().unwrap().replace("\\", "/"),
339            "/foo/bar/baz"
340        );
341
342        let p: EvaluationPath = "foo".parse().expect("should parse");
343        assert_eq!(p.unwrap_local().as_os_str(), "foo");
344
345        #[cfg(unix)]
346        {
347            let p: EvaluationPath = "file:///foo/bar/baz".parse().expect("should parse");
348            assert_eq!(p.unwrap_local().as_os_str(), "/foo/bar/baz");
349        }
350
351        #[cfg(windows)]
352        {
353            let p: EvaluationPath = "file:///C:/foo/bar/baz".parse().expect("should parse");
354            assert_eq!(p.unwrap_local().as_os_str(), "C:\\foo\\bar\\baz");
355        }
356
357        let p: EvaluationPath = "https://example.com/foo/bar/baz"
358            .parse()
359            .expect("should parse");
360        assert_eq!(
361            p.unwrap_remote().as_str(),
362            "https://example.com/foo/bar/baz"
363        );
364
365        let p: EvaluationPath = "az://foo/bar/baz".parse().expect("should parse");
366        assert_eq!(p.unwrap_remote().as_str(), "az://foo/bar/baz");
367
368        let p: EvaluationPath = "s3://foo/bar/baz".parse().expect("should parse");
369        assert_eq!(p.unwrap_remote().as_str(), "s3://foo/bar/baz");
370
371        let p: EvaluationPath = "gs://foo/bar/baz".parse().expect("should parse");
372        assert_eq!(p.unwrap_remote().as_str(), "gs://foo/bar/baz");
373    }
374
375    #[test]
376    fn test_evaluation_path_join() {
377        let p: EvaluationPath = "/foo/bar/baz".parse().expect("should parse");
378        assert_eq!(
379            p.join("qux/../quux")
380                .expect("should join")
381                .unwrap_local()
382                .to_str()
383                .unwrap()
384                .replace("\\", "/"),
385            "/foo/bar/baz/quux"
386        );
387
388        let p: EvaluationPath = "foo".parse().expect("should parse");
389        assert_eq!(
390            p.join("qux/../quux")
391                .expect("should join")
392                .unwrap_local()
393                .to_str()
394                .unwrap()
395                .replace("\\", "/"),
396            "foo/quux"
397        );
398
399        #[cfg(unix)]
400        {
401            let p: EvaluationPath = "file:///foo/bar/baz".parse().expect("should parse");
402            assert_eq!(
403                p.join("qux/../quux")
404                    .expect("should join")
405                    .unwrap_local()
406                    .as_os_str(),
407                "/foo/bar/baz/quux"
408            );
409        }
410
411        #[cfg(windows)]
412        {
413            let p: EvaluationPath = "file:///C:/foo/bar/baz".parse().expect("should parse");
414            assert_eq!(
415                p.join("qux/../quux")
416                    .expect("should join")
417                    .unwrap_local()
418                    .as_os_str(),
419                "C:\\foo\\bar\\baz\\quux"
420            );
421        }
422
423        let p: EvaluationPath = "https://example.com/foo/bar/baz"
424            .parse()
425            .expect("should parse");
426        assert_eq!(
427            p.join("qux/../quux")
428                .expect("should join")
429                .unwrap_remote()
430                .as_str(),
431            "https://example.com/foo/bar/quux"
432        );
433
434        let p: EvaluationPath = "https://example.com/foo/bar/baz/"
435            .parse()
436            .expect("should parse");
437        assert_eq!(
438            p.join("qux/../quux")
439                .expect("should join")
440                .unwrap_remote()
441                .as_str(),
442            "https://example.com/foo/bar/baz/quux"
443        );
444
445        let p: EvaluationPath = "az://foo/bar/baz/".parse().expect("should parse");
446        assert_eq!(
447            p.join("qux/../quux")
448                .expect("should join")
449                .unwrap_remote()
450                .as_str(),
451            "az://foo/bar/baz/quux"
452        );
453
454        let p: EvaluationPath = "s3://foo/bar/baz/".parse().expect("should parse");
455        assert_eq!(
456            p.join("qux/../quux")
457                .expect("should join")
458                .unwrap_remote()
459                .as_str(),
460            "s3://foo/bar/baz/quux"
461        );
462
463        let p: EvaluationPath = "gs://foo/bar/baz/".parse().expect("should parse");
464        assert_eq!(
465            p.join("qux/../quux")
466                .expect("should join")
467                .unwrap_remote()
468                .as_str(),
469            "gs://foo/bar/baz/quux"
470        );
471    }
472}