netidx_core/path.rs
1//! Hierarchical path handling and manipulation.
2use crate::pack::{Pack, PackError};
3use arcstr::{literal, ArcStr};
4use bytes::{Buf, BufMut};
5use escaping::Escape;
6use std::{
7 borrow::{Borrow, Cow},
8 cell::RefCell,
9 cmp::{Eq, Ord, PartialEq, PartialOrd},
10 convert::{AsRef, From},
11 fmt,
12 iter::{DoubleEndedIterator, Iterator},
13 ops::Deref,
14 result::Result,
15 str::{self, FromStr},
16 sync::LazyLock,
17};
18
19pub const SEP: char = '/';
20pub const ROOT: &str = "/";
21pub const PATH_ESC: LazyLock<Escape> =
22 LazyLock::new(|| Escape::new('\\', &['\\', '/'], &[], None).unwrap());
23
24fn is_canonical(s: &str) -> bool {
25 for _ in Path::parts(s).filter(|p| *p == "") {
26 return false;
27 }
28 true
29}
30
31fn canonize(s: &str) -> String {
32 let mut res = String::with_capacity(s.len());
33 if s.len() > 0 {
34 if s.starts_with(SEP) {
35 res.push(SEP)
36 }
37 let mut first = true;
38 for p in Path::parts(s).filter(|p| *p != "") {
39 if first {
40 first = false;
41 } else {
42 res.push(SEP)
43 }
44 res.push_str(p);
45 }
46 }
47 res
48}
49
50/// A path in the netidx namespace.
51///
52/// Paths are immutable and reference counted.
53/// Path components are seperated by /, which may be escaped
54/// with \. / and \ are the only special characters in path, any other
55/// unicode character may be used. Path lengths are not limited on the
56/// local machine, but may be restricted by maximum message size on
57/// the wire.
58#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
59pub struct Path(ArcStr);
60
61impl Pack for Path {
62 fn encoded_len(&self) -> usize {
63 <ArcStr as Pack>::encoded_len(&self.0)
64 }
65
66 fn encode(&self, buf: &mut impl BufMut) -> Result<(), PackError> {
67 <ArcStr as Pack>::encode(&self.0, buf)
68 }
69
70 fn decode(buf: &mut impl Buf) -> Result<Self, PackError> {
71 Ok(Path::from(<ArcStr as Pack>::decode(buf)?))
72 }
73}
74
75impl fmt::Display for Path {
76 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
77 self.0.fmt(f)
78 }
79}
80
81impl AsRef<str> for Path {
82 fn as_ref(&self) -> &str {
83 &*self.0
84 }
85}
86
87impl Borrow<str> for Path {
88 fn borrow(&self) -> &str {
89 &*self.0
90 }
91}
92
93impl Deref for Path {
94 type Target = str;
95
96 fn deref(&self) -> &Self::Target {
97 &self.0
98 }
99}
100
101impl From<String> for Path {
102 fn from(s: String) -> Path {
103 if is_canonical(&s) {
104 Path(ArcStr::from(s))
105 } else {
106 Path(ArcStr::from(canonize(&s)))
107 }
108 }
109}
110
111impl From<&'static str> for Path {
112 fn from(s: &'static str) -> Path {
113 if is_canonical(s) {
114 Path(ArcStr::from(s))
115 } else {
116 Path(ArcStr::from(canonize(s)))
117 }
118 }
119}
120
121impl<'a> From<&'a String> for Path {
122 fn from(s: &String) -> Path {
123 if is_canonical(s.as_str()) {
124 Path(ArcStr::from(s.clone()))
125 } else {
126 Path(ArcStr::from(canonize(s.as_str())))
127 }
128 }
129}
130
131impl From<ArcStr> for Path {
132 fn from(s: ArcStr) -> Path {
133 if is_canonical(&*s) {
134 Path(s)
135 } else {
136 Path(ArcStr::from(canonize(&*s)))
137 }
138 }
139}
140
141impl From<&ArcStr> for Path {
142 fn from(s: &ArcStr) -> Path {
143 if is_canonical(s) {
144 Path(s.clone())
145 } else {
146 Path(ArcStr::from(canonize(s)))
147 }
148 }
149}
150
151impl<C: Borrow<str>> FromIterator<C> for Path {
152 fn from_iter<T: IntoIterator<Item = C>>(iter: T) -> Self {
153 thread_local! {
154 static BUF: RefCell<String> = RefCell::new(String::new());
155 }
156 BUF.with_borrow_mut(|buf| {
157 buf.clear();
158 buf.push(SEP);
159 for c in iter {
160 PATH_ESC.escape_to(c.borrow(), buf);
161 buf.push(SEP)
162 }
163 if buf.len() > 1 {
164 buf.pop(); // remove trailing sep
165 }
166 Self(ArcStr::from(buf.as_str()))
167 })
168 }
169}
170
171impl FromStr for Path {
172 type Err = anyhow::Error;
173
174 fn from_str(s: &str) -> Result<Self, Self::Err> {
175 if is_canonical(s) {
176 Ok(Path(ArcStr::from(s)))
177 } else {
178 Ok(Path(ArcStr::from(canonize(s))))
179 }
180 }
181}
182
183impl Into<ArcStr> for Path {
184 fn into(self) -> ArcStr {
185 self.0
186 }
187}
188
189pub enum DirNames<'a> {
190 Root(bool),
191 Path { cur: &'a str, all: &'a str, base: usize },
192}
193
194impl<'a> Iterator for DirNames<'a> {
195 type Item = &'a str;
196
197 fn next(&mut self) -> Option<Self::Item> {
198 match self {
199 DirNames::Path { cur, all, base } => {
200 if *base >= all.len() {
201 None
202 } else {
203 match Path::find_sep(cur) {
204 None => {
205 *base = all.len();
206 Some(all)
207 }
208 Some(p) => {
209 *base += p + 1;
210 *cur = &all[*base..];
211 Some(&all[0..*base - 1])
212 }
213 }
214 }
215 }
216 DirNames::Root(true) => {
217 *self = DirNames::Root(false);
218 Some("/")
219 }
220 DirNames::Root(false) => None,
221 }
222 }
223}
224
225impl<'a> DoubleEndedIterator for DirNames<'a> {
226 fn next_back(&mut self) -> Option<Self::Item> {
227 match self {
228 DirNames::Path { cur: _, all, base: _ } => match Path::dirname(*all) {
229 Some(dn) => {
230 let res = *all;
231 *all = dn;
232 Some(res)
233 }
234 None => {
235 if all == &ROOT {
236 *self = DirNames::Root(false);
237 Some("/")
238 } else {
239 let res = *all;
240 *all = &ROOT;
241 Some(res)
242 }
243 }
244 },
245 DirNames::Root(true) => {
246 *self = DirNames::Root(false);
247 Some("/")
248 }
249 DirNames::Root(false) => None,
250 }
251 }
252}
253
254impl Path {
255 pub const SEP: char = SEP;
256 pub const ROOT: &str = ROOT;
257
258 /// create a path from a non static str by copying the contents of the str
259 pub fn from_str(s: &str) -> Self {
260 if is_canonical(s) {
261 Path(ArcStr::from(s))
262 } else {
263 Path(ArcStr::from(canonize(s)))
264 }
265 }
266
267 /// returns /
268 pub fn root() -> Path {
269 Path(literal!("/"))
270 }
271
272 /// returns true if the path starts with /, false otherwise
273 pub fn is_absolute<T: AsRef<str> + ?Sized>(p: &T) -> bool {
274 p.as_ref().starts_with(SEP)
275 }
276
277 /// true if this path is a parent to the specified path. A path is it's own parent.
278 ///
279 /// # Examples
280 /// ```
281 /// use netidx_core::path::Path;
282 /// assert!(Path::is_parent("/", "/foo/bar/baz"));
283 /// assert!(Path::is_parent("/foo/bar", "/foo/bar/baz"));
284 /// assert!(!Path::is_parent("/foo/bar", "/foo/bareth/bazeth"));
285 /// assert!(Path::is_parent("/foo/bar", "/foo/bar"));
286 /// ```
287 pub fn is_parent<T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
288 parent: &T,
289 other: &U,
290 ) -> bool {
291 let parent = parent.as_ref();
292 let other = other.as_ref();
293 parent == "/"
294 || (other.starts_with(parent)
295 && (other.len() == parent.len()
296 || other.as_bytes()[parent.len()] == SEP as u8))
297 }
298
299 /// true if this path is the parent to the specified path, and is
300 /// exactly 1 level above the specfied path.
301 ///
302 /// # Examples
303 /// ```
304 /// use netidx_core::path::Path;
305 /// assert!(!Path::is_immediate_parent("/", "/foo/bar/baz"));
306 /// assert!(Path::is_immediate_parent("/foo/bar", "/foo/bar/baz"));
307 /// assert!(!Path::is_immediate_parent("/foo/bar", "/foo/bareth/bazeth"));
308 /// assert!(!Path::is_immediate_parent("/foo/bar", "/foo/bar"));
309 /// assert!(!Path::is_immediate_parent("/", "/"));
310 /// ```
311 pub fn is_immediate_parent<T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
312 parent: &T,
313 other: &U,
314 ) -> bool {
315 let parent = if parent.as_ref() == "/" { None } else { Some(parent.as_ref()) };
316 other.as_ref().len() > 0
317 && other.as_ref() != "/"
318 && Path::dirname(other) == parent
319 }
320
321 /// strips prefix from path at the separator boundry, including
322 /// the separator. Returns None if prefix is not a parent of path
323 /// (even if it happens to be a prefix).
324 pub fn strip_prefix<'a, T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
325 prefix: &T,
326 path: &'a U,
327 ) -> Option<&'a str> {
328 if Path::is_parent(prefix, path) {
329 path.as_ref()
330 .strip_prefix(prefix.as_ref())
331 .map(|s| s.strip_prefix("/").unwrap_or(s))
332 } else {
333 None
334 }
335 }
336
337 /// finds the longest common parent of the two specified paths, /
338 /// in the case they are completely disjoint.
339 pub fn lcp<'a, T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
340 path0: &'a T,
341 path1: &'a U,
342 ) -> &'a str {
343 let (mut p0, p1) = if path0.as_ref().len() <= path1.as_ref().len() {
344 (path0.as_ref(), path1.as_ref())
345 } else {
346 (path1.as_ref(), path0.as_ref())
347 };
348 loop {
349 if Path::is_parent(p0, p1) {
350 return p0;
351 } else {
352 match Path::dirname(p0) {
353 Some(p) => p0 = p,
354 None => return "/",
355 }
356 }
357 }
358 }
359
360 /// This will escape the path seperator and the escape character
361 /// in a path part. If you want to be sure that e.g. `append` will
362 /// only append 1 level, then you should call this function on
363 /// your part before appending it.
364 ///
365 /// # Examples
366 /// ```
367 /// use netidx_core::path::Path;
368 /// assert_eq!("foo\\/bar", &*Path::escape("foo/bar"));
369 /// assert_eq!("\\\\hello world", &*Path::escape("\\hello world"));
370 /// ```
371 pub fn escape<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> Cow<'a, str> {
372 PATH_ESC.escape(s)
373 }
374
375 /// This will unescape the path seperator and the escape character
376 /// in a path part.
377 ///
378 /// # Examples
379 /// ```
380 /// use netidx_core::path::Path;
381 /// assert_eq!("foo/bar", &*Path::unescape("foo\\/bar"));
382 /// assert_eq!("\\hello world", &*Path::unescape("\\\\hello world"));
383 /// ```
384 pub fn unescape<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> Cow<'a, str> {
385 PATH_ESC.unescape(s)
386 }
387
388 /// return a new path with the specified string appended as a new
389 /// part separated by the pathsep char.
390 ///
391 /// # Examples
392 /// ```
393 /// use netidx_core::path::Path;
394 /// let p = Path::root().append("bar").append("baz");
395 /// assert_eq!(&*p, "/bar/baz");
396 ///
397 /// let p = Path::root().append("/bar").append("//baz//////foo/");
398 /// assert_eq!(&*p, "/bar/baz/foo");
399 /// ```
400 pub fn append<T: AsRef<str> + ?Sized>(&self, other: &T) -> Self {
401 let other = other.as_ref();
402 if other.len() == 0 {
403 self.clone()
404 } else {
405 let mut res = String::with_capacity(self.as_ref().len() + other.len());
406 res.push_str(self.as_ref());
407 res.push(SEP);
408 res.push_str(other);
409 Path::from(res)
410 }
411 }
412
413 /// alias for append
414 pub fn join<T: AsRef<str> + ?Sized>(&self, other: &T) -> Self {
415 self.append(other)
416 }
417
418 /// return an iterator over the parts of the path. The path
419 /// separator may be escaped with \. and a literal \ may be
420 /// represented as \\.
421 ///
422 /// # Examples
423 /// ```
424 /// use netidx_core::path::Path;
425 /// let p = Path::from("/foo/bar/baz");
426 /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec!["foo", "bar", "baz"]);
427 ///
428 /// let p = Path::from(r"/foo\/bar/baz");
429 /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec![r"foo\/bar", "baz"]);
430 ///
431 /// let p = Path::from(r"/foo\\/bar/baz");
432 /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec![r"foo\\", "bar", "baz"]);
433 ///
434 /// let p = Path::from(r"/foo\\\/bar/baz");
435 /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec![r"foo\\\/bar", "baz"]);
436 /// ```
437 pub fn parts<T: AsRef<str> + ?Sized>(s: &T) -> impl Iterator<Item = &str> {
438 let s = s.as_ref();
439 let skip = if s == "/" {
440 2
441 } else if s.starts_with("/") {
442 1
443 } else {
444 0
445 };
446 let e = PATH_ESC.clone();
447 e.split(s, SEP).skip(skip)
448 }
449
450 /// Return an iterator over all the dirnames in the path starting
451 /// from the root and ending with the entire path.
452 ///
453 /// # Examples
454 /// ```
455 /// use netidx_core::path::Path;
456 /// let p = Path::from("/some/path/ending/in/foo");
457 /// let mut bn = Path::dirnames(&p);
458 /// assert_eq!(bn.next(), Some("/"));
459 /// assert_eq!(bn.next(), Some("/some"));
460 /// assert_eq!(bn.next(), Some("/some/path"));
461 /// assert_eq!(bn.next(), Some("/some/path/ending"));
462 /// assert_eq!(bn.next(), Some("/some/path/ending/in"));
463 /// assert_eq!(bn.next(), Some("/some/path/ending/in/foo"));
464 /// assert_eq!(bn.next(), None);
465 /// let mut bn = Path::dirnames(&p);
466 /// assert_eq!(bn.next_back(), Some("/some/path/ending/in/foo"));
467 /// assert_eq!(bn.next_back(), Some("/some/path/ending/in"));
468 /// assert_eq!(bn.next_back(), Some("/some/path/ending"));
469 /// assert_eq!(bn.next_back(), Some("/some/path"));
470 /// assert_eq!(bn.next_back(), Some("/some"));
471 /// assert_eq!(bn.next_back(), Some("/"));
472 /// assert_eq!(bn.next_back(), None);
473 /// ```
474 pub fn dirnames<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> DirNames<'a> {
475 let s = s.as_ref();
476 if s == "/" {
477 DirNames::Root(true)
478 } else {
479 DirNames::Path { cur: s, all: s, base: 1 }
480 }
481 }
482
483 /// Return the number of levels in the path.
484 ///
485 /// # Examples
486 /// ```
487 /// use netidx_core::path::Path;
488 /// let p = Path::from("/foo/bar/baz");
489 /// assert_eq!(Path::levels(&p), 3);
490 /// ```
491 pub fn levels<T: AsRef<str> + ?Sized>(s: &T) -> usize {
492 let mut p = 0;
493 for _ in Path::parts(s) {
494 p += 1
495 }
496 p
497 }
498
499 /// return the path without the last part, or return None if the
500 /// path is empty or /.
501 ///
502 /// # Examples
503 /// ```
504 /// use netidx_core::path::Path;
505 /// let p = Path::from("/foo/bar/baz");
506 /// assert_eq!(Path::dirname(&p), Some("/foo/bar"));
507 ///
508 /// let p = Path::root();
509 /// assert_eq!(Path::dirname(&p), None);
510 ///
511 /// let p = Path::from("/foo");
512 /// assert_eq!(Path::dirname(&p), None);
513 /// ```
514 pub fn dirname<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> Option<&'a str> {
515 let s = s.as_ref();
516 Path::rfind_sep(s).and_then(|i| if i == 0 { None } else { Some(&s[0..i]) })
517 }
518
519 pub fn dirname_with_sep<T: AsRef<str> + ?Sized>(s: &T) -> Option<&str> {
520 let s = s.as_ref();
521 Path::rfind_sep(s).and_then(|i| if i == 0 { None } else { Some(&s[0..i + 1]) })
522 }
523
524 /// return the last part of the path, or return None if the path
525 /// is empty.
526 ///
527 /// # Examples
528 /// ```
529 /// use netidx_core::path::Path;
530 /// let p = Path::from("/foo/bar/baz");
531 /// assert_eq!(Path::basename(&p), Some("baz"));
532 ///
533 /// let p = Path::from("foo");
534 /// assert_eq!(Path::basename(&p), Some("foo"));
535 ///
536 /// let p = Path::from("foo/bar");
537 /// assert_eq!(Path::basename(&p), Some("bar"));
538 ///
539 /// let p = Path::from("");
540 /// assert_eq!(Path::basename(&p), None);
541 ///
542 /// let p = Path::from("/");
543 /// assert_eq!(Path::basename(&p), None);
544 /// ```
545 pub fn basename<T: AsRef<str> + ?Sized>(s: &T) -> Option<&str> {
546 let s = s.as_ref();
547 match Path::rfind_sep(s) {
548 None => {
549 if s.len() > 0 {
550 Some(s)
551 } else {
552 None
553 }
554 }
555 Some(i) => {
556 if s.len() <= 1 {
557 None
558 } else {
559 Some(&s[i + 1..s.len()])
560 }
561 }
562 }
563 }
564
565 fn find_sep_int<F: Fn(&str) -> Option<usize>>(mut s: &str, f: F) -> Option<usize> {
566 if s.len() == 0 {
567 None
568 } else {
569 loop {
570 match f(s) {
571 None => return None,
572 Some(i) => {
573 if !PATH_ESC.is_escaped(s, i) {
574 return Some(i);
575 } else {
576 s = &s[0..i];
577 }
578 }
579 }
580 }
581 }
582 }
583
584 /// return the position of the last path separator in the path, or
585 /// None if there isn't one.
586 ///
587 /// # Examples
588 /// ```
589 /// use netidx_core::path::Path;
590 /// let p = Path::from("/foo/bar/baz");
591 /// assert_eq!(Path::rfind_sep(&p), Some(8));
592 ///
593 /// let p = Path::from("");
594 /// assert_eq!(Path::rfind_sep(&p), None);
595 /// ```
596 pub fn rfind_sep<T: AsRef<str> + ?Sized>(s: &T) -> Option<usize> {
597 let s = s.as_ref();
598 Path::find_sep_int(s, |s| s.rfind(SEP))
599 }
600
601 /// return the position of the first path separator in the path, or
602 /// None if there isn't one.
603 ///
604 /// # Examples
605 /// ```
606 /// use netidx_core::path::Path;
607 /// let p = Path::from("foo/bar/baz");
608 /// assert_eq!(Path::find_sep(&p), Some(3));
609 ///
610 /// let p = Path::from("");
611 /// assert_eq!(Path::find_sep(&p), None);
612 /// ```
613 pub fn find_sep<T: AsRef<str> + ?Sized>(s: &T) -> Option<usize> {
614 let s = s.as_ref();
615 Path::find_sep_int(s, |s| s.find(SEP))
616 }
617}