fusio/path/
mod.rs

1//! A path abstraction that can be used to represent paths in a cloud-agnostic way.
2
3use std::{fmt::Formatter, path::PathBuf};
4
5use itertools::Itertools;
6use percent_encoding::percent_decode;
7use thiserror::Error;
8use url::Url;
9
10/// The delimiter to separate object namespaces, creating a directory structure.
11pub const DELIMITER: &str = "/";
12
13/// The path delimiter as a single byte
14pub const DELIMITER_BYTE: u8 = DELIMITER.as_bytes()[0];
15
16mod parts;
17
18pub use parts::{InvalidPart, PathPart};
19
20#[derive(Debug, Error)]
21#[error(transparent)]
22pub enum Error {
23    #[error("Path \"{path}\" contained empty path segment")]
24    EmptySegment { path: String },
25    #[error("Error parsing Path \"{path}\": {source}")]
26    BadSegment { path: String, source: InvalidPart },
27    #[error("Failed to canonicalize path \"{path}\": {source}")]
28    Canonicalize {
29        path: std::path::PathBuf,
30        source: std::io::Error,
31    },
32    #[error("Unable to convert path \"{path}\" to URL")]
33    InvalidPath { path: PathBuf },
34    #[error("Unable to convert url \"{url}\" to Path")]
35    InvalidUrl { url: Url },
36    #[error("Path \"{path}\" contained non-unicode characters: {source}")]
37    NonUnicode {
38        path: String,
39        source: std::str::Utf8Error,
40    },
41    #[error("Path {path} does not start with prefix {prefix}")]
42    PrefixMismatch { path: String, prefix: String },
43}
44
45#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Ord, PartialOrd)]
46pub struct Path {
47    raw: String,
48}
49
50#[cfg(not(target_arch = "wasm32"))]
51impl Path {
52    pub fn from_filesystem_path(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
53        let absolute = std::fs::canonicalize(&path).map_err(|err| Error::Canonicalize {
54            path: path.as_ref().to_path_buf(),
55            source: err,
56        })?;
57
58        Self::from_absolute_path(absolute)
59    }
60
61    pub fn from_absolute_path(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
62        Self::from_absolute_path_with_base(path, None)
63    }
64
65    pub(crate) fn from_absolute_path_with_base(
66        path: impl AsRef<std::path::Path>,
67        base: Option<&url::Url>,
68    ) -> Result<Self, Error> {
69        let url = absolute_path_to_url(path)?;
70        let path = match base {
71            Some(prefix) => {
72                url.path()
73                    .strip_prefix(prefix.path())
74                    .ok_or_else(|| Error::PrefixMismatch {
75                        path: url.path().to_string(),
76                        prefix: prefix.to_string(),
77                    })?
78            }
79            None => url.path(),
80        };
81
82        // Reverse any percent encoding performed by conversion to URL
83        Self::from_url_path(path)
84    }
85}
86
87#[cfg(target_arch = "wasm32")]
88impl Path {
89    pub fn from_opfs_path(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
90        Self::parse(path.as_ref().to_str().unwrap())
91    }
92}
93
94impl Path {
95    pub fn new(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
96        #[cfg(target_arch = "wasm32")]
97        {
98            Self::from_opfs_path(path)
99        }
100        #[cfg(not(target_arch = "wasm32"))]
101        Self::from_filesystem_path(path)
102    }
103
104    pub fn parse(path: impl AsRef<str>) -> Result<Self, Error> {
105        let path = path.as_ref();
106
107        let stripped = path.strip_prefix(DELIMITER).unwrap_or(path);
108        if stripped.is_empty() {
109            return Ok(Default::default());
110        }
111
112        let stripped = stripped.strip_suffix(DELIMITER).unwrap_or(stripped);
113
114        for segment in stripped.split(DELIMITER) {
115            if segment.is_empty() {
116                return Err(Error::EmptySegment {
117                    path: path.to_string(),
118                });
119            }
120            PathPart::parse(segment).map_err(|err| Error::BadSegment {
121                path: path.to_string(),
122                source: err,
123            })?;
124        }
125
126        Ok(Self {
127            raw: stripped.to_string(),
128        })
129    }
130
131    pub fn from_url_path(path: impl AsRef<str>) -> Result<Self, Error> {
132        let path = path.as_ref();
133        let decoded = percent_decode(path.as_bytes())
134            .decode_utf8()
135            .map_err(|err| Error::NonUnicode {
136                path: path.to_string(),
137                source: err,
138            })?;
139
140        Self::parse(decoded)
141    }
142
143    pub fn parts(&self) -> impl Iterator<Item = PathPart<'_>> {
144        self.raw
145            .split_terminator(DELIMITER)
146            .map(|s| PathPart { raw: s.into() })
147    }
148
149    pub fn filename(&self) -> Option<&str> {
150        match self.raw.is_empty() {
151            true => None,
152            false => self.raw.rsplit(DELIMITER).next(),
153        }
154    }
155
156    pub fn extension(&self) -> Option<&str> {
157        self.filename()
158            .and_then(|f| f.rsplit_once('.'))
159            .and_then(|(_, extension)| {
160                if extension.is_empty() {
161                    None
162                } else {
163                    Some(extension)
164                }
165            })
166    }
167
168    pub fn prefix_match(&self, prefix: &Self) -> Option<impl Iterator<Item = PathPart<'_>> + '_> {
169        let mut stripped = self.raw.strip_prefix(&prefix.raw)?;
170        if !stripped.is_empty() && !prefix.raw.is_empty() {
171            stripped = stripped.strip_prefix(DELIMITER)?;
172        }
173        let iter = stripped
174            .split_terminator(DELIMITER)
175            .map(|x| PathPart { raw: x.into() });
176        Some(iter)
177    }
178
179    pub fn prefix_matches(&self, prefix: &Self) -> bool {
180        self.prefix_match(prefix).is_some()
181    }
182
183    pub fn child<'a>(&self, child: impl Into<PathPart<'a>>) -> Self {
184        let raw = match self.raw.is_empty() {
185            true => format!("{}", child.into().raw),
186            false => format!("{}{}{}", self.raw, DELIMITER, child.into().raw),
187        };
188
189        Self { raw }
190    }
191}
192
193#[cfg(feature = "object_store")]
194impl From<Path> for object_store::path::Path {
195    fn from(value: Path) -> Self {
196        object_store::path::Path::from(value.as_ref())
197    }
198}
199
200#[cfg(feature = "object_store")]
201impl From<object_store::path::Path> for Path {
202    fn from(value: object_store::path::Path) -> Self {
203        Self::from(value.as_ref())
204    }
205}
206
207impl AsRef<str> for Path {
208    fn as_ref(&self) -> &str {
209        &self.raw
210    }
211}
212
213impl From<&str> for Path {
214    fn from(path: &str) -> Self {
215        Self::from_iter(path.split(DELIMITER))
216    }
217}
218
219impl From<String> for Path {
220    fn from(path: String) -> Self {
221        Self::from_iter(path.split(DELIMITER))
222    }
223}
224
225impl From<Path> for String {
226    fn from(path: Path) -> Self {
227        path.raw
228    }
229}
230
231impl std::fmt::Display for Path {
232    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
233        self.raw.fmt(f)
234    }
235}
236
237impl<'a, I> FromIterator<I> for Path
238where
239    I: Into<PathPart<'a>>,
240{
241    fn from_iter<T: IntoIterator<Item = I>>(iter: T) -> Self {
242        let raw = T::into_iter(iter)
243            .map(|s| s.into())
244            .filter(|s| !s.raw.is_empty())
245            .map(|s| s.raw)
246            .join(DELIMITER);
247
248        Self { raw }
249    }
250}
251
252#[cfg(not(target_arch = "wasm32"))]
253pub(crate) fn absolute_path_to_url(path: impl AsRef<std::path::Path>) -> Result<Url, Error> {
254    Url::from_file_path(&path).map_err(|_| Error::InvalidPath {
255        path: path.as_ref().into(),
256    })
257}
258
259#[cfg(not(target_arch = "wasm32"))]
260pub fn path_to_local(location: &Path) -> Result<PathBuf, Error> {
261    let mut url = Url::parse("file:///").unwrap();
262    url.path_segments_mut()
263        .expect("url path")
264        // technically not necessary as Path ignores empty segments
265        // but avoids creating paths with "//" which look odd in error messages.
266        .pop_if_empty()
267        .extend(location.parts());
268
269    let path = url.to_file_path().map_err(|_| Error::InvalidUrl { url })?;
270
271    #[cfg(target_os = "windows")]
272    let path = {
273        let path = path.to_string_lossy();
274
275        // Assume the first char is the drive letter and the next is a colon.
276        let mut out = String::new();
277        let drive = &path[..2]; // The drive letter and colon (e.g., "C:")
278        let filepath = &path[2..].replace(':', "%3A"); // Replace subsequent colons
279        out.push_str(drive);
280        out.push_str(filepath);
281        PathBuf::from(out)
282    };
283
284    Ok(path)
285}
286
287#[cfg(test)]
288#[cfg(not(target_arch = "wasm32"))]
289mod tests {
290    use std::fs::canonicalize;
291
292    use tempfile::NamedTempFile;
293
294    use super::*;
295
296    #[test]
297    fn cloud_prefix_with_trailing_delimiter() {
298        let prefix = Path::from_iter(["test"]);
299        assert_eq!(prefix.as_ref(), "test");
300    }
301
302    #[test]
303    fn push_encodes() {
304        let location = Path::from_iter(["foo/bar", "baz%2Ftest"]);
305        assert_eq!(location.as_ref(), "foo%2Fbar/baz%252Ftest");
306    }
307
308    #[test]
309    fn test_parse() {
310        assert_eq!(Path::parse("/").unwrap().as_ref(), "");
311        assert_eq!(Path::parse("").unwrap().as_ref(), "");
312
313        let err = Path::parse("//").unwrap_err();
314        assert!(matches!(err, Error::EmptySegment { .. }));
315
316        assert_eq!(Path::parse("/foo/bar/").unwrap().as_ref(), "foo/bar");
317        assert_eq!(Path::parse("foo/bar/").unwrap().as_ref(), "foo/bar");
318        assert_eq!(Path::parse("foo/bar").unwrap().as_ref(), "foo/bar");
319
320        let err = Path::parse("foo///bar").unwrap_err();
321        assert!(matches!(err, Error::EmptySegment { .. }));
322    }
323
324    #[test]
325    fn convert_raw_before_partial_eq() {
326        // dir and file_name
327        let cloud = Path::from("test_dir/test_file.json");
328        let built = Path::from_iter(["test_dir", "test_file.json"]);
329
330        assert_eq!(built, cloud);
331
332        // dir and file_name w/o dot
333        let cloud = Path::from("test_dir/test_file");
334        let built = Path::from_iter(["test_dir", "test_file"]);
335
336        assert_eq!(built, cloud);
337
338        // dir, no file
339        let cloud = Path::from("test_dir/");
340        let built = Path::from_iter(["test_dir"]);
341        assert_eq!(built, cloud);
342
343        // file_name, no dir
344        let cloud = Path::from("test_file.json");
345        let built = Path::from_iter(["test_file.json"]);
346        assert_eq!(built, cloud);
347
348        // empty
349        let cloud = Path::from("");
350        let built = Path::from_iter(["", ""]);
351
352        assert_eq!(built, cloud);
353    }
354
355    #[test]
356    fn parts_after_prefix_behavior() {
357        let existing_path = Path::from("apple/bear/cow/dog/egg.json");
358
359        // Prefix with one directory
360        let prefix = Path::from("apple");
361        let expected_parts: Vec<PathPart<'_>> = vec!["bear", "cow", "dog", "egg.json"]
362            .into_iter()
363            .map(Into::into)
364            .collect();
365        let parts: Vec<_> = existing_path.prefix_match(&prefix).unwrap().collect();
366        assert_eq!(parts, expected_parts);
367
368        // Prefix with two directories
369        let prefix = Path::from("apple/bear");
370        let expected_parts: Vec<PathPart<'_>> = vec!["cow", "dog", "egg.json"]
371            .into_iter()
372            .map(Into::into)
373            .collect();
374        let parts: Vec<_> = existing_path.prefix_match(&prefix).unwrap().collect();
375        assert_eq!(parts, expected_parts);
376
377        // Not a prefix
378        let prefix = Path::from("cow");
379        assert!(existing_path.prefix_match(&prefix).is_none());
380
381        // Prefix with a partial directory
382        let prefix = Path::from("ap");
383        assert!(existing_path.prefix_match(&prefix).is_none());
384
385        // Prefix matches but there aren't any parts after it
386        let existing = Path::from("apple/bear/cow/dog");
387
388        assert_eq!(existing.prefix_match(&existing).unwrap().count(), 0);
389        assert_eq!(Path::default().parts().count(), 0);
390    }
391
392    #[test]
393    fn prefix_matches() {
394        let haystack = Path::from_iter(["foo/bar", "baz%2Ftest", "something"]);
395        // self starts with self
396        assert!(
397            haystack.prefix_matches(&haystack),
398            "{haystack:?} should have started with {haystack:?}"
399        );
400
401        // a longer prefix doesn't match
402        let needle = haystack.child("longer now");
403        assert!(
404            !haystack.prefix_matches(&needle),
405            "{haystack:?} shouldn't have started with {needle:?}"
406        );
407
408        // one dir prefix matches
409        let needle = Path::from_iter(["foo/bar"]);
410        assert!(
411            haystack.prefix_matches(&needle),
412            "{haystack:?} should have started with {needle:?}"
413        );
414
415        // two dir prefix matches
416        let needle = needle.child("baz%2Ftest");
417        assert!(
418            haystack.prefix_matches(&needle),
419            "{haystack:?} should have started with {needle:?}"
420        );
421
422        // partial dir prefix doesn't match
423        let needle = Path::from_iter(["f"]);
424        assert!(
425            !haystack.prefix_matches(&needle),
426            "{haystack:?} should not have started with {needle:?}"
427        );
428
429        // one dir and one partial dir doesn't match
430        let needle = Path::from_iter(["foo/bar", "baz"]);
431        assert!(
432            !haystack.prefix_matches(&needle),
433            "{haystack:?} should not have started with {needle:?}"
434        );
435
436        // empty prefix matches
437        let needle = Path::from("");
438        assert!(
439            haystack.prefix_matches(&needle),
440            "{haystack:?} should have started with {needle:?}"
441        );
442    }
443
444    #[test]
445    fn prefix_matches_with_file_name() {
446        let haystack = Path::from_iter(["foo/bar", "baz%2Ftest", "something", "foo.segment"]);
447
448        // All directories match and file name is a prefix
449        let needle = Path::from_iter(["foo/bar", "baz%2Ftest", "something", "foo"]);
450
451        assert!(
452            !haystack.prefix_matches(&needle),
453            "{haystack:?} should not have started with {needle:?}"
454        );
455
456        // All directories match but file name is not a prefix
457        let needle = Path::from_iter(["foo/bar", "baz%2Ftest", "something", "e"]);
458
459        assert!(
460            !haystack.prefix_matches(&needle),
461            "{haystack:?} should not have started with {needle:?}"
462        );
463
464        // Not all directories match; file name is a prefix of the next directory; this
465        // does not match
466        let needle = Path::from_iter(["foo/bar", "baz%2Ftest", "s"]);
467
468        assert!(
469            !haystack.prefix_matches(&needle),
470            "{haystack:?} should not have started with {needle:?}"
471        );
472
473        // Not all directories match; file name is NOT a prefix of the next directory;
474        // no match
475        let needle = Path::from_iter(["foo/bar", "baz%2Ftest", "p"]);
476
477        assert!(
478            !haystack.prefix_matches(&needle),
479            "{haystack:?} should not have started with {needle:?}"
480        );
481    }
482
483    #[test]
484    fn path_containing_spaces() {
485        let a = Path::from_iter(["foo bar", "baz"]);
486        let b = Path::from("foo bar/baz");
487        let c = Path::parse("foo bar/baz").unwrap();
488
489        assert_eq!(a.raw, "foo bar/baz");
490        assert_eq!(a.raw, b.raw);
491        assert_eq!(b.raw, c.raw);
492    }
493
494    #[test]
495    fn from_url_path() {
496        let a = Path::from_url_path("foo%20bar").unwrap();
497        let b = Path::from_url_path("foo/%2E%2E/bar").unwrap_err();
498        let c = Path::from_url_path("foo%2F%252E%252E%2Fbar").unwrap();
499        let d = Path::from_url_path("foo/%252E%252E/bar").unwrap();
500        let e = Path::from_url_path("%48%45%4C%4C%4F").unwrap();
501        let f = Path::from_url_path("foo/%FF/as").unwrap_err();
502
503        assert_eq!(a.raw, "foo bar");
504        assert!(matches!(b, Error::BadSegment { .. }));
505        assert_eq!(c.raw, "foo/%2E%2E/bar");
506        assert_eq!(d.raw, "foo/%2E%2E/bar");
507        assert_eq!(e.raw, "HELLO");
508        assert!(matches!(f, Error::NonUnicode { .. }));
509    }
510
511    #[test]
512    fn filename_from_path() {
513        let a = Path::from("foo/bar");
514        let b = Path::from("foo/bar.baz");
515        let c = Path::from("foo.bar/baz");
516
517        assert_eq!(a.filename(), Some("bar"));
518        assert_eq!(b.filename(), Some("bar.baz"));
519        assert_eq!(c.filename(), Some("baz"));
520    }
521
522    #[test]
523    fn file_extension() {
524        let a = Path::from("foo/bar");
525        let b = Path::from("foo/bar.baz");
526        let c = Path::from("foo.bar/baz");
527        let d = Path::from("foo.bar/baz.qux");
528
529        assert_eq!(a.extension(), None);
530        assert_eq!(b.extension(), Some("baz"));
531        assert_eq!(c.extension(), None);
532        assert_eq!(d.extension(), Some("qux"));
533    }
534
535    #[test]
536    fn test_path_to_local() {
537        let temp_file = NamedTempFile::new().unwrap();
538
539        let this_path = Path::from_filesystem_path(temp_file.path()).unwrap();
540        let std_path = path_to_local(&this_path).unwrap();
541
542        assert_eq!(std_path, canonicalize(temp_file.path()).unwrap());
543    }
544}