1use std::{fmt::Formatter, path::PathBuf};
4
5use itertools::Itertools;
6use percent_encoding::percent_decode;
7use thiserror::Error;
8use url::Url;
9
10pub const DELIMITER: &str = "/";
12
13pub const DELIMITER_BYTE: u8 = DELIMITER.as_bytes()[0];
15
16mod parts;
17
18pub use parts::{InvalidPart, PathPart};
19
20#[derive(Debug, Error)]
21#[error(transparent)]
22pub enum Error {
23 #[error("Path \"{path}\" contained empty path segment")]
24 EmptySegment { path: String },
25 #[error("Error parsing Path \"{path}\": {source}")]
26 BadSegment { path: String, source: InvalidPart },
27 #[error("Failed to canonicalize path \"{path}\": {source}")]
28 Canonicalize {
29 path: std::path::PathBuf,
30 source: std::io::Error,
31 },
32 #[error("Unable to convert path \"{path}\" to URL")]
33 InvalidPath { path: PathBuf },
34 #[error("Unable to convert url \"{url}\" to Path")]
35 InvalidUrl { url: Url },
36 #[error("Path \"{path}\" contained non-unicode characters: {source}")]
37 NonUnicode {
38 path: String,
39 source: std::str::Utf8Error,
40 },
41 #[error("Path {path} does not start with prefix {prefix}")]
42 PrefixMismatch { path: String, prefix: String },
43}
44
45#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Ord, PartialOrd)]
46pub struct Path {
47 raw: String,
48}
49
50#[cfg(not(target_arch = "wasm32"))]
51impl Path {
52 pub fn from_filesystem_path(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
53 let absolute = std::fs::canonicalize(&path).map_err(|err| Error::Canonicalize {
54 path: path.as_ref().to_path_buf(),
55 source: err,
56 })?;
57
58 Self::from_absolute_path(absolute)
59 }
60
61 pub fn from_absolute_path(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
62 Self::from_absolute_path_with_base(path, None)
63 }
64
65 pub(crate) fn from_absolute_path_with_base(
66 path: impl AsRef<std::path::Path>,
67 base: Option<&url::Url>,
68 ) -> Result<Self, Error> {
69 let url = absolute_path_to_url(path)?;
70 let path = match base {
71 Some(prefix) => {
72 url.path()
73 .strip_prefix(prefix.path())
74 .ok_or_else(|| Error::PrefixMismatch {
75 path: url.path().to_string(),
76 prefix: prefix.to_string(),
77 })?
78 }
79 None => url.path(),
80 };
81
82 Self::from_url_path(path)
84 }
85}
86
87#[cfg(target_arch = "wasm32")]
88impl Path {
89 pub fn from_opfs_path(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
90 Self::parse(path.as_ref().to_str().unwrap())
91 }
92}
93
94impl Path {
95 pub fn new(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
96 #[cfg(target_arch = "wasm32")]
97 {
98 Self::from_opfs_path(path)
99 }
100 #[cfg(not(target_arch = "wasm32"))]
101 Self::from_filesystem_path(path)
102 }
103
104 pub fn parse(path: impl AsRef<str>) -> Result<Self, Error> {
105 let path = path.as_ref();
106
107 let stripped = path.strip_prefix(DELIMITER).unwrap_or(path);
108 if stripped.is_empty() {
109 return Ok(Default::default());
110 }
111
112 let stripped = stripped.strip_suffix(DELIMITER).unwrap_or(stripped);
113
114 for segment in stripped.split(DELIMITER) {
115 if segment.is_empty() {
116 return Err(Error::EmptySegment {
117 path: path.to_string(),
118 });
119 }
120 PathPart::parse(segment).map_err(|err| Error::BadSegment {
121 path: path.to_string(),
122 source: err,
123 })?;
124 }
125
126 Ok(Self {
127 raw: stripped.to_string(),
128 })
129 }
130
131 pub fn from_url_path(path: impl AsRef<str>) -> Result<Self, Error> {
132 let path = path.as_ref();
133 let decoded = percent_decode(path.as_bytes())
134 .decode_utf8()
135 .map_err(|err| Error::NonUnicode {
136 path: path.to_string(),
137 source: err,
138 })?;
139
140 Self::parse(decoded)
141 }
142
143 pub fn parts(&self) -> impl Iterator<Item = PathPart<'_>> {
144 self.raw
145 .split_terminator(DELIMITER)
146 .map(|s| PathPart { raw: s.into() })
147 }
148
149 pub fn filename(&self) -> Option<&str> {
150 match self.raw.is_empty() {
151 true => None,
152 false => self.raw.rsplit(DELIMITER).next(),
153 }
154 }
155
156 pub fn extension(&self) -> Option<&str> {
157 self.filename()
158 .and_then(|f| f.rsplit_once('.'))
159 .and_then(|(_, extension)| {
160 if extension.is_empty() {
161 None
162 } else {
163 Some(extension)
164 }
165 })
166 }
167
168 pub fn prefix_match(&self, prefix: &Self) -> Option<impl Iterator<Item = PathPart<'_>> + '_> {
169 let mut stripped = self.raw.strip_prefix(&prefix.raw)?;
170 if !stripped.is_empty() && !prefix.raw.is_empty() {
171 stripped = stripped.strip_prefix(DELIMITER)?;
172 }
173 let iter = stripped
174 .split_terminator(DELIMITER)
175 .map(|x| PathPart { raw: x.into() });
176 Some(iter)
177 }
178
179 pub fn prefix_matches(&self, prefix: &Self) -> bool {
180 self.prefix_match(prefix).is_some()
181 }
182
183 pub fn child<'a>(&self, child: impl Into<PathPart<'a>>) -> Self {
184 let raw = match self.raw.is_empty() {
185 true => format!("{}", child.into().raw),
186 false => format!("{}{}{}", self.raw, DELIMITER, child.into().raw),
187 };
188
189 Self { raw }
190 }
191}
192
193#[cfg(feature = "object_store")]
194impl From<Path> for object_store::path::Path {
195 fn from(value: Path) -> Self {
196 object_store::path::Path::from(value.as_ref())
197 }
198}
199
200#[cfg(feature = "object_store")]
201impl From<object_store::path::Path> for Path {
202 fn from(value: object_store::path::Path) -> Self {
203 Self::from(value.as_ref())
204 }
205}
206
207impl AsRef<str> for Path {
208 fn as_ref(&self) -> &str {
209 &self.raw
210 }
211}
212
213impl From<&str> for Path {
214 fn from(path: &str) -> Self {
215 Self::from_iter(path.split(DELIMITER))
216 }
217}
218
219impl From<String> for Path {
220 fn from(path: String) -> Self {
221 Self::from_iter(path.split(DELIMITER))
222 }
223}
224
225impl From<Path> for String {
226 fn from(path: Path) -> Self {
227 path.raw
228 }
229}
230
231impl std::fmt::Display for Path {
232 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
233 self.raw.fmt(f)
234 }
235}
236
237impl<'a, I> FromIterator<I> for Path
238where
239 I: Into<PathPart<'a>>,
240{
241 fn from_iter<T: IntoIterator<Item = I>>(iter: T) -> Self {
242 let raw = T::into_iter(iter)
243 .map(|s| s.into())
244 .filter(|s| !s.raw.is_empty())
245 .map(|s| s.raw)
246 .join(DELIMITER);
247
248 Self { raw }
249 }
250}
251
252#[cfg(not(target_arch = "wasm32"))]
253pub(crate) fn absolute_path_to_url(path: impl AsRef<std::path::Path>) -> Result<Url, Error> {
254 Url::from_file_path(&path).map_err(|_| Error::InvalidPath {
255 path: path.as_ref().into(),
256 })
257}
258
259#[cfg(not(target_arch = "wasm32"))]
260pub fn path_to_local(location: &Path) -> Result<PathBuf, Error> {
261 let mut url = Url::parse("file:///").unwrap();
262 url.path_segments_mut()
263 .expect("url path")
264 .pop_if_empty()
267 .extend(location.parts());
268
269 let path = url.to_file_path().map_err(|_| Error::InvalidUrl { url })?;
270
271 #[cfg(target_os = "windows")]
272 let path = {
273 let path = path.to_string_lossy();
274
275 let mut out = String::new();
277 let drive = &path[..2]; let filepath = &path[2..].replace(':', "%3A"); out.push_str(drive);
280 out.push_str(filepath);
281 PathBuf::from(out)
282 };
283
284 Ok(path)
285}
286
287#[cfg(test)]
288#[cfg(not(target_arch = "wasm32"))]
289mod tests {
290 use std::fs::canonicalize;
291
292 use tempfile::NamedTempFile;
293
294 use super::*;
295
296 #[test]
297 fn cloud_prefix_with_trailing_delimiter() {
298 let prefix = Path::from_iter(["test"]);
299 assert_eq!(prefix.as_ref(), "test");
300 }
301
302 #[test]
303 fn push_encodes() {
304 let location = Path::from_iter(["foo/bar", "baz%2Ftest"]);
305 assert_eq!(location.as_ref(), "foo%2Fbar/baz%252Ftest");
306 }
307
308 #[test]
309 fn test_parse() {
310 assert_eq!(Path::parse("/").unwrap().as_ref(), "");
311 assert_eq!(Path::parse("").unwrap().as_ref(), "");
312
313 let err = Path::parse("//").unwrap_err();
314 assert!(matches!(err, Error::EmptySegment { .. }));
315
316 assert_eq!(Path::parse("/foo/bar/").unwrap().as_ref(), "foo/bar");
317 assert_eq!(Path::parse("foo/bar/").unwrap().as_ref(), "foo/bar");
318 assert_eq!(Path::parse("foo/bar").unwrap().as_ref(), "foo/bar");
319
320 let err = Path::parse("foo///bar").unwrap_err();
321 assert!(matches!(err, Error::EmptySegment { .. }));
322 }
323
324 #[test]
325 fn convert_raw_before_partial_eq() {
326 let cloud = Path::from("test_dir/test_file.json");
328 let built = Path::from_iter(["test_dir", "test_file.json"]);
329
330 assert_eq!(built, cloud);
331
332 let cloud = Path::from("test_dir/test_file");
334 let built = Path::from_iter(["test_dir", "test_file"]);
335
336 assert_eq!(built, cloud);
337
338 let cloud = Path::from("test_dir/");
340 let built = Path::from_iter(["test_dir"]);
341 assert_eq!(built, cloud);
342
343 let cloud = Path::from("test_file.json");
345 let built = Path::from_iter(["test_file.json"]);
346 assert_eq!(built, cloud);
347
348 let cloud = Path::from("");
350 let built = Path::from_iter(["", ""]);
351
352 assert_eq!(built, cloud);
353 }
354
355 #[test]
356 fn parts_after_prefix_behavior() {
357 let existing_path = Path::from("apple/bear/cow/dog/egg.json");
358
359 let prefix = Path::from("apple");
361 let expected_parts: Vec<PathPart<'_>> = vec!["bear", "cow", "dog", "egg.json"]
362 .into_iter()
363 .map(Into::into)
364 .collect();
365 let parts: Vec<_> = existing_path.prefix_match(&prefix).unwrap().collect();
366 assert_eq!(parts, expected_parts);
367
368 let prefix = Path::from("apple/bear");
370 let expected_parts: Vec<PathPart<'_>> = vec!["cow", "dog", "egg.json"]
371 .into_iter()
372 .map(Into::into)
373 .collect();
374 let parts: Vec<_> = existing_path.prefix_match(&prefix).unwrap().collect();
375 assert_eq!(parts, expected_parts);
376
377 let prefix = Path::from("cow");
379 assert!(existing_path.prefix_match(&prefix).is_none());
380
381 let prefix = Path::from("ap");
383 assert!(existing_path.prefix_match(&prefix).is_none());
384
385 let existing = Path::from("apple/bear/cow/dog");
387
388 assert_eq!(existing.prefix_match(&existing).unwrap().count(), 0);
389 assert_eq!(Path::default().parts().count(), 0);
390 }
391
392 #[test]
393 fn prefix_matches() {
394 let haystack = Path::from_iter(["foo/bar", "baz%2Ftest", "something"]);
395 assert!(
397 haystack.prefix_matches(&haystack),
398 "{haystack:?} should have started with {haystack:?}"
399 );
400
401 let needle = haystack.child("longer now");
403 assert!(
404 !haystack.prefix_matches(&needle),
405 "{haystack:?} shouldn't have started with {needle:?}"
406 );
407
408 let needle = Path::from_iter(["foo/bar"]);
410 assert!(
411 haystack.prefix_matches(&needle),
412 "{haystack:?} should have started with {needle:?}"
413 );
414
415 let needle = needle.child("baz%2Ftest");
417 assert!(
418 haystack.prefix_matches(&needle),
419 "{haystack:?} should have started with {needle:?}"
420 );
421
422 let needle = Path::from_iter(["f"]);
424 assert!(
425 !haystack.prefix_matches(&needle),
426 "{haystack:?} should not have started with {needle:?}"
427 );
428
429 let needle = Path::from_iter(["foo/bar", "baz"]);
431 assert!(
432 !haystack.prefix_matches(&needle),
433 "{haystack:?} should not have started with {needle:?}"
434 );
435
436 let needle = Path::from("");
438 assert!(
439 haystack.prefix_matches(&needle),
440 "{haystack:?} should have started with {needle:?}"
441 );
442 }
443
444 #[test]
445 fn prefix_matches_with_file_name() {
446 let haystack = Path::from_iter(["foo/bar", "baz%2Ftest", "something", "foo.segment"]);
447
448 let needle = Path::from_iter(["foo/bar", "baz%2Ftest", "something", "foo"]);
450
451 assert!(
452 !haystack.prefix_matches(&needle),
453 "{haystack:?} should not have started with {needle:?}"
454 );
455
456 let needle = Path::from_iter(["foo/bar", "baz%2Ftest", "something", "e"]);
458
459 assert!(
460 !haystack.prefix_matches(&needle),
461 "{haystack:?} should not have started with {needle:?}"
462 );
463
464 let needle = Path::from_iter(["foo/bar", "baz%2Ftest", "s"]);
467
468 assert!(
469 !haystack.prefix_matches(&needle),
470 "{haystack:?} should not have started with {needle:?}"
471 );
472
473 let needle = Path::from_iter(["foo/bar", "baz%2Ftest", "p"]);
476
477 assert!(
478 !haystack.prefix_matches(&needle),
479 "{haystack:?} should not have started with {needle:?}"
480 );
481 }
482
483 #[test]
484 fn path_containing_spaces() {
485 let a = Path::from_iter(["foo bar", "baz"]);
486 let b = Path::from("foo bar/baz");
487 let c = Path::parse("foo bar/baz").unwrap();
488
489 assert_eq!(a.raw, "foo bar/baz");
490 assert_eq!(a.raw, b.raw);
491 assert_eq!(b.raw, c.raw);
492 }
493
494 #[test]
495 fn from_url_path() {
496 let a = Path::from_url_path("foo%20bar").unwrap();
497 let b = Path::from_url_path("foo/%2E%2E/bar").unwrap_err();
498 let c = Path::from_url_path("foo%2F%252E%252E%2Fbar").unwrap();
499 let d = Path::from_url_path("foo/%252E%252E/bar").unwrap();
500 let e = Path::from_url_path("%48%45%4C%4C%4F").unwrap();
501 let f = Path::from_url_path("foo/%FF/as").unwrap_err();
502
503 assert_eq!(a.raw, "foo bar");
504 assert!(matches!(b, Error::BadSegment { .. }));
505 assert_eq!(c.raw, "foo/%2E%2E/bar");
506 assert_eq!(d.raw, "foo/%2E%2E/bar");
507 assert_eq!(e.raw, "HELLO");
508 assert!(matches!(f, Error::NonUnicode { .. }));
509 }
510
511 #[test]
512 fn filename_from_path() {
513 let a = Path::from("foo/bar");
514 let b = Path::from("foo/bar.baz");
515 let c = Path::from("foo.bar/baz");
516
517 assert_eq!(a.filename(), Some("bar"));
518 assert_eq!(b.filename(), Some("bar.baz"));
519 assert_eq!(c.filename(), Some("baz"));
520 }
521
522 #[test]
523 fn file_extension() {
524 let a = Path::from("foo/bar");
525 let b = Path::from("foo/bar.baz");
526 let c = Path::from("foo.bar/baz");
527 let d = Path::from("foo.bar/baz.qux");
528
529 assert_eq!(a.extension(), None);
530 assert_eq!(b.extension(), Some("baz"));
531 assert_eq!(c.extension(), None);
532 assert_eq!(d.extension(), Some("qux"));
533 }
534
535 #[test]
536 fn test_path_to_local() {
537 let temp_file = NamedTempFile::new().unwrap();
538
539 let this_path = Path::from_filesystem_path(temp_file.path()).unwrap();
540 let std_path = path_to_local(&this_path).unwrap();
541
542 assert_eq!(std_path, canonicalize(temp_file.path()).unwrap());
543 }
544}