netidx_core/path.rs
1use crate::pack::{Pack, PackError};
2use arcstr::ArcStr;
3use bytes::{Buf, BufMut};
4use escaping::Escape;
5use std::{
6 borrow::{Borrow, Cow},
7 cell::RefCell,
8 cmp::{Eq, Ord, PartialEq, PartialOrd},
9 convert::{AsRef, From},
10 fmt,
11 iter::{DoubleEndedIterator, Iterator},
12 ops::Deref,
13 result::Result,
14 str::{self, FromStr},
15 sync::LazyLock,
16};
17
18pub const SEP: char = '/';
19pub const ROOT: &str = "/";
20pub const PATH_ESC: LazyLock<Escape> =
21 LazyLock::new(|| Escape::new('\\', &['\\', '/'], &[], None).unwrap());
22
23fn is_canonical(s: &str) -> bool {
24 for _ in Path::parts(s).filter(|p| *p == "") {
25 return false;
26 }
27 true
28}
29
30fn canonize(s: &str) -> String {
31 let mut res = String::with_capacity(s.len());
32 if s.len() > 0 {
33 if s.starts_with(SEP) {
34 res.push(SEP)
35 }
36 let mut first = true;
37 for p in Path::parts(s).filter(|p| *p != "") {
38 if first {
39 first = false;
40 } else {
41 res.push(SEP)
42 }
43 res.push_str(p);
44 }
45 }
46 res
47}
48
49/// A path in the namespace. Paths are immutable and reference
50/// counted. Path components are seperated by /, which may be escaped
51/// with \. / and \ are the only special characters in path, any other
52/// unicode character may be used. Path lengths are not limited on the
53/// local machine, but may be restricted by maximum message size on
54/// the wire.
55#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
56pub struct Path(ArcStr);
57
58impl Pack for Path {
59 fn encoded_len(&self) -> usize {
60 <ArcStr as Pack>::encoded_len(&self.0)
61 }
62
63 fn encode(&self, buf: &mut impl BufMut) -> Result<(), PackError> {
64 <ArcStr as Pack>::encode(&self.0, buf)
65 }
66
67 fn decode(buf: &mut impl Buf) -> Result<Self, PackError> {
68 Ok(Path::from(<ArcStr as Pack>::decode(buf)?))
69 }
70}
71
72impl fmt::Display for Path {
73 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74 self.0.fmt(f)
75 }
76}
77
78impl AsRef<str> for Path {
79 fn as_ref(&self) -> &str {
80 &*self.0
81 }
82}
83
84impl Borrow<str> for Path {
85 fn borrow(&self) -> &str {
86 &*self.0
87 }
88}
89
90impl Deref for Path {
91 type Target = str;
92
93 fn deref(&self) -> &Self::Target {
94 &self.0
95 }
96}
97
98impl From<String> for Path {
99 fn from(s: String) -> Path {
100 if is_canonical(&s) {
101 Path(ArcStr::from(s))
102 } else {
103 Path(ArcStr::from(canonize(&s)))
104 }
105 }
106}
107
108impl From<&'static str> for Path {
109 fn from(s: &'static str) -> Path {
110 if is_canonical(s) {
111 Path(ArcStr::from(s))
112 } else {
113 Path(ArcStr::from(canonize(s)))
114 }
115 }
116}
117
118impl<'a> From<&'a String> for Path {
119 fn from(s: &String) -> Path {
120 if is_canonical(s.as_str()) {
121 Path(ArcStr::from(s.clone()))
122 } else {
123 Path(ArcStr::from(canonize(s.as_str())))
124 }
125 }
126}
127
128impl From<ArcStr> for Path {
129 fn from(s: ArcStr) -> Path {
130 if is_canonical(&*s) {
131 Path(s)
132 } else {
133 Path(ArcStr::from(canonize(&*s)))
134 }
135 }
136}
137
138impl From<&ArcStr> for Path {
139 fn from(s: &ArcStr) -> Path {
140 if is_canonical(s) {
141 Path(s.clone())
142 } else {
143 Path(ArcStr::from(canonize(s)))
144 }
145 }
146}
147
148impl<C: Borrow<str>> FromIterator<C> for Path {
149 fn from_iter<T: IntoIterator<Item = C>>(iter: T) -> Self {
150 thread_local! {
151 static BUF: RefCell<String> = RefCell::new(String::new());
152 }
153 BUF.with_borrow_mut(|buf| {
154 buf.clear();
155 buf.push(SEP);
156 for c in iter {
157 PATH_ESC.escape_to(c.borrow(), buf);
158 buf.push(SEP)
159 }
160 if buf.len() > 1 {
161 buf.pop(); // remove trailing sep
162 }
163 Self(ArcStr::from(buf.as_str()))
164 })
165 }
166}
167
168impl FromStr for Path {
169 type Err = anyhow::Error;
170
171 fn from_str(s: &str) -> Result<Self, Self::Err> {
172 if is_canonical(s) {
173 Ok(Path(ArcStr::from(s)))
174 } else {
175 Ok(Path(ArcStr::from(canonize(s))))
176 }
177 }
178}
179
180impl Into<ArcStr> for Path {
181 fn into(self) -> ArcStr {
182 self.0
183 }
184}
185
186pub enum DirNames<'a> {
187 Root(bool),
188 Path { cur: &'a str, all: &'a str, base: usize },
189}
190
191impl<'a> Iterator for DirNames<'a> {
192 type Item = &'a str;
193
194 fn next(&mut self) -> Option<Self::Item> {
195 match self {
196 DirNames::Path { cur, all, base } => {
197 if *base >= all.len() {
198 None
199 } else {
200 match Path::find_sep(cur) {
201 None => {
202 *base = all.len();
203 Some(all)
204 }
205 Some(p) => {
206 *base += p + 1;
207 *cur = &all[*base..];
208 Some(&all[0..*base - 1])
209 }
210 }
211 }
212 }
213 DirNames::Root(true) => {
214 *self = DirNames::Root(false);
215 Some("/")
216 }
217 DirNames::Root(false) => None,
218 }
219 }
220}
221
222impl<'a> DoubleEndedIterator for DirNames<'a> {
223 fn next_back(&mut self) -> Option<Self::Item> {
224 match self {
225 DirNames::Path { cur: _, all, base: _ } => match Path::dirname(*all) {
226 Some(dn) => {
227 let res = *all;
228 *all = dn;
229 Some(res)
230 }
231 None => {
232 if all == &ROOT {
233 *self = DirNames::Root(false);
234 Some("/")
235 } else {
236 let res = *all;
237 *all = &ROOT;
238 Some(res)
239 }
240 }
241 },
242 DirNames::Root(true) => {
243 *self = DirNames::Root(false);
244 Some("/")
245 }
246 DirNames::Root(false) => None,
247 }
248 }
249}
250
251impl Path {
252 pub const SEP: char = SEP;
253 pub const ROOT: &str = ROOT;
254
255 /// create a path from a non static str by copying the contents of the str
256 pub fn from_str(s: &str) -> Self {
257 if is_canonical(s) {
258 Path(ArcStr::from(s))
259 } else {
260 Path(ArcStr::from(canonize(s)))
261 }
262 }
263
264 /// returns /
265 pub fn root() -> Path {
266 // CR estokes: need a good solution for using SEP here
267 Path::from("/")
268 }
269
270 /// returns true if the path starts with /, false otherwise
271 pub fn is_absolute<T: AsRef<str> + ?Sized>(p: &T) -> bool {
272 p.as_ref().starts_with(SEP)
273 }
274
275 /// true if this path is a parent to the specified path. A path is it's own parent.
276 ///
277 /// # Examples
278 /// ```
279 /// use netidx_core::path::Path;
280 /// assert!(Path::is_parent("/", "/foo/bar/baz"));
281 /// assert!(Path::is_parent("/foo/bar", "/foo/bar/baz"));
282 /// assert!(!Path::is_parent("/foo/bar", "/foo/bareth/bazeth"));
283 /// assert!(Path::is_parent("/foo/bar", "/foo/bar"));
284 /// ```
285 pub fn is_parent<T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
286 parent: &T,
287 other: &U,
288 ) -> bool {
289 let parent = parent.as_ref();
290 let other = other.as_ref();
291 parent == "/"
292 || (other.starts_with(parent)
293 && (other.len() == parent.len()
294 || other.as_bytes()[parent.len()] == SEP as u8))
295 }
296
297 /// true if this path is the parent to the specified path, and is
298 /// exactly 1 level above the specfied path.
299 ///
300 /// # Examples
301 /// ```
302 /// use netidx_core::path::Path;
303 /// assert!(!Path::is_immediate_parent("/", "/foo/bar/baz"));
304 /// assert!(Path::is_immediate_parent("/foo/bar", "/foo/bar/baz"));
305 /// assert!(!Path::is_immediate_parent("/foo/bar", "/foo/bareth/bazeth"));
306 /// assert!(!Path::is_immediate_parent("/foo/bar", "/foo/bar"));
307 /// assert!(!Path::is_immediate_parent("/", "/"));
308 /// ```
309 pub fn is_immediate_parent<T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
310 parent: &T,
311 other: &U,
312 ) -> bool {
313 let parent = if parent.as_ref() == "/" { None } else { Some(parent.as_ref()) };
314 other.as_ref().len() > 0
315 && other.as_ref() != "/"
316 && Path::dirname(other) == parent
317 }
318
319 /// strips prefix from path at the separator boundry, including
320 /// the separator. Returns None if prefix is not a parent of path
321 /// (even if it happens to be a prefix).
322 pub fn strip_prefix<'a, T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
323 prefix: &T,
324 path: &'a U,
325 ) -> Option<&'a str> {
326 if Path::is_parent(prefix, path) {
327 path.as_ref()
328 .strip_prefix(prefix.as_ref())
329 .map(|s| s.strip_prefix("/").unwrap_or(s))
330 } else {
331 None
332 }
333 }
334
335 /// finds the longest common parent of the two specified paths, /
336 /// in the case they are completely disjoint.
337 pub fn lcp<'a, T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
338 path0: &'a T,
339 path1: &'a U,
340 ) -> &'a str {
341 let (mut p0, p1) = if path0.as_ref().len() <= path1.as_ref().len() {
342 (path0.as_ref(), path1.as_ref())
343 } else {
344 (path1.as_ref(), path0.as_ref())
345 };
346 loop {
347 if Path::is_parent(p0, p1) {
348 return p0;
349 } else {
350 match Path::dirname(p0) {
351 Some(p) => p0 = p,
352 None => return "/",
353 }
354 }
355 }
356 }
357
358 /// This will escape the path seperator and the escape character
359 /// in a path part. If you want to be sure that e.g. `append` will
360 /// only append 1 level, then you should call this function on
361 /// your part before appending it.
362 ///
363 /// # Examples
364 /// ```
365 /// use netidx_core::path::Path;
366 /// assert_eq!("foo\\/bar", &*Path::escape("foo/bar"));
367 /// assert_eq!("\\\\hello world", &*Path::escape("\\hello world"));
368 /// ```
369 pub fn escape<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> Cow<'a, str> {
370 PATH_ESC.escape(s)
371 }
372
373 /// This will unescape the path seperator and the escape character
374 /// in a path part.
375 ///
376 /// # Examples
377 /// ```
378 /// use netidx_core::path::Path;
379 /// assert_eq!("foo/bar", &*Path::unescape("foo\\/bar"));
380 /// assert_eq!("\\hello world", &*Path::unescape("\\\\hello world"));
381 /// ```
382 pub fn unescape<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> Cow<'a, str> {
383 PATH_ESC.unescape(s)
384 }
385
386 /// return a new path with the specified string appended as a new
387 /// part separated by the pathsep char.
388 ///
389 /// # Examples
390 /// ```
391 /// use netidx_core::path::Path;
392 /// let p = Path::root().append("bar").append("baz");
393 /// assert_eq!(&*p, "/bar/baz");
394 ///
395 /// let p = Path::root().append("/bar").append("//baz//////foo/");
396 /// assert_eq!(&*p, "/bar/baz/foo");
397 /// ```
398 pub fn append<T: AsRef<str> + ?Sized>(&self, other: &T) -> Self {
399 let other = other.as_ref();
400 if other.len() == 0 {
401 self.clone()
402 } else {
403 let mut res = String::with_capacity(self.as_ref().len() + other.len());
404 res.push_str(self.as_ref());
405 res.push(SEP);
406 res.push_str(other);
407 Path::from(res)
408 }
409 }
410
411 /// return an iterator over the parts of the path. The path
412 /// separator may be escaped with \. and a literal \ may be
413 /// represented as \\.
414 ///
415 /// # Examples
416 /// ```
417 /// use netidx_core::path::Path;
418 /// let p = Path::from("/foo/bar/baz");
419 /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec!["foo", "bar", "baz"]);
420 ///
421 /// let p = Path::from(r"/foo\/bar/baz");
422 /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec![r"foo\/bar", "baz"]);
423 ///
424 /// let p = Path::from(r"/foo\\/bar/baz");
425 /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec![r"foo\\", "bar", "baz"]);
426 ///
427 /// let p = Path::from(r"/foo\\\/bar/baz");
428 /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec![r"foo\\\/bar", "baz"]);
429 /// ```
430 pub fn parts<T: AsRef<str> + ?Sized>(s: &T) -> impl Iterator<Item = &str> {
431 let s = s.as_ref();
432 let skip = if s == "/" {
433 2
434 } else if s.starts_with("/") {
435 1
436 } else {
437 0
438 };
439 let e = PATH_ESC.clone();
440 e.split(s, SEP).skip(skip)
441 }
442
443 /// Return an iterator over all the dirnames in the path starting
444 /// from the root and ending with the entire path.
445 ///
446 /// # Examples
447 /// ```
448 /// use netidx_core::path::Path;
449 /// let p = Path::from("/some/path/ending/in/foo");
450 /// let mut bn = Path::dirnames(&p);
451 /// assert_eq!(bn.next(), Some("/"));
452 /// assert_eq!(bn.next(), Some("/some"));
453 /// assert_eq!(bn.next(), Some("/some/path"));
454 /// assert_eq!(bn.next(), Some("/some/path/ending"));
455 /// assert_eq!(bn.next(), Some("/some/path/ending/in"));
456 /// assert_eq!(bn.next(), Some("/some/path/ending/in/foo"));
457 /// assert_eq!(bn.next(), None);
458 /// let mut bn = Path::dirnames(&p);
459 /// assert_eq!(bn.next_back(), Some("/some/path/ending/in/foo"));
460 /// assert_eq!(bn.next_back(), Some("/some/path/ending/in"));
461 /// assert_eq!(bn.next_back(), Some("/some/path/ending"));
462 /// assert_eq!(bn.next_back(), Some("/some/path"));
463 /// assert_eq!(bn.next_back(), Some("/some"));
464 /// assert_eq!(bn.next_back(), Some("/"));
465 /// assert_eq!(bn.next_back(), None);
466 /// ```
467 pub fn dirnames<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> DirNames<'a> {
468 let s = s.as_ref();
469 if s == "/" {
470 DirNames::Root(true)
471 } else {
472 DirNames::Path { cur: s, all: s, base: 1 }
473 }
474 }
475
476 /// Return the number of levels in the path.
477 ///
478 /// # Examples
479 /// ```
480 /// use netidx_core::path::Path;
481 /// let p = Path::from("/foo/bar/baz");
482 /// assert_eq!(Path::levels(&p), 3);
483 /// ```
484 pub fn levels<T: AsRef<str> + ?Sized>(s: &T) -> usize {
485 let mut p = 0;
486 for _ in Path::parts(s) {
487 p += 1
488 }
489 p
490 }
491
492 /// return the path without the last part, or return None if the
493 /// path is empty or /.
494 ///
495 /// # Examples
496 /// ```
497 /// use netidx_core::path::Path;
498 /// let p = Path::from("/foo/bar/baz");
499 /// assert_eq!(Path::dirname(&p), Some("/foo/bar"));
500 ///
501 /// let p = Path::root();
502 /// assert_eq!(Path::dirname(&p), None);
503 ///
504 /// let p = Path::from("/foo");
505 /// assert_eq!(Path::dirname(&p), None);
506 /// ```
507 pub fn dirname<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> Option<&'a str> {
508 let s = s.as_ref();
509 Path::rfind_sep(s).and_then(|i| if i == 0 { None } else { Some(&s[0..i]) })
510 }
511
512 pub fn dirname_with_sep<T: AsRef<str> + ?Sized>(s: &T) -> Option<&str> {
513 let s = s.as_ref();
514 Path::rfind_sep(s).and_then(|i| if i == 0 { None } else { Some(&s[0..i + 1]) })
515 }
516
517 /// return the last part of the path, or return None if the path
518 /// is empty.
519 ///
520 /// # Examples
521 /// ```
522 /// use netidx_core::path::Path;
523 /// let p = Path::from("/foo/bar/baz");
524 /// assert_eq!(Path::basename(&p), Some("baz"));
525 ///
526 /// let p = Path::from("foo");
527 /// assert_eq!(Path::basename(&p), Some("foo"));
528 ///
529 /// let p = Path::from("foo/bar");
530 /// assert_eq!(Path::basename(&p), Some("bar"));
531 ///
532 /// let p = Path::from("");
533 /// assert_eq!(Path::basename(&p), None);
534 ///
535 /// let p = Path::from("/");
536 /// assert_eq!(Path::basename(&p), None);
537 /// ```
538 pub fn basename<T: AsRef<str> + ?Sized>(s: &T) -> Option<&str> {
539 let s = s.as_ref();
540 match Path::rfind_sep(s) {
541 None => {
542 if s.len() > 0 {
543 Some(s)
544 } else {
545 None
546 }
547 }
548 Some(i) => {
549 if s.len() <= 1 {
550 None
551 } else {
552 Some(&s[i + 1..s.len()])
553 }
554 }
555 }
556 }
557
558 fn find_sep_int<F: Fn(&str) -> Option<usize>>(mut s: &str, f: F) -> Option<usize> {
559 if s.len() == 0 {
560 None
561 } else {
562 loop {
563 match f(s) {
564 None => return None,
565 Some(i) => {
566 if !PATH_ESC.is_escaped(s, i) {
567 return Some(i);
568 } else {
569 s = &s[0..i];
570 }
571 }
572 }
573 }
574 }
575 }
576
577 /// return the position of the last path separator in the path, or
578 /// None if there isn't one.
579 ///
580 /// # Examples
581 /// ```
582 /// use netidx_core::path::Path;
583 /// let p = Path::from("/foo/bar/baz");
584 /// assert_eq!(Path::rfind_sep(&p), Some(8));
585 ///
586 /// let p = Path::from("");
587 /// assert_eq!(Path::rfind_sep(&p), None);
588 /// ```
589 pub fn rfind_sep<T: AsRef<str> + ?Sized>(s: &T) -> Option<usize> {
590 let s = s.as_ref();
591 Path::find_sep_int(s, |s| s.rfind(SEP))
592 }
593
594 /// return the position of the first path separator in the path, or
595 /// None if there isn't one.
596 ///
597 /// # Examples
598 /// ```
599 /// use netidx_core::path::Path;
600 /// let p = Path::from("foo/bar/baz");
601 /// assert_eq!(Path::find_sep(&p), Some(3));
602 ///
603 /// let p = Path::from("");
604 /// assert_eq!(Path::find_sep(&p), None);
605 /// ```
606 pub fn find_sep<T: AsRef<str> + ?Sized>(s: &T) -> Option<usize> {
607 let s = s.as_ref();
608 Path::find_sep_int(s, |s| s.find(SEP))
609 }
610}