netidx_core/
path.rs

1use crate::pack::{Pack, PackError};
2use arcstr::ArcStr;
3use bytes::{Buf, BufMut};
4use escaping::Escape;
5use std::{
6    borrow::{Borrow, Cow},
7    cell::RefCell,
8    cmp::{Eq, Ord, PartialEq, PartialOrd},
9    convert::{AsRef, From},
10    fmt,
11    iter::{DoubleEndedIterator, Iterator},
12    ops::Deref,
13    result::Result,
14    str::{self, FromStr},
15    sync::LazyLock,
16};
17
18pub const SEP: char = '/';
19pub const ROOT: &str = "/";
20pub const PATH_ESC: LazyLock<Escape> =
21    LazyLock::new(|| Escape::new('\\', &['\\', '/'], &[], None).unwrap());
22
23fn is_canonical(s: &str) -> bool {
24    for _ in Path::parts(s).filter(|p| *p == "") {
25        return false;
26    }
27    true
28}
29
30fn canonize(s: &str) -> String {
31    let mut res = String::with_capacity(s.len());
32    if s.len() > 0 {
33        if s.starts_with(SEP) {
34            res.push(SEP)
35        }
36        let mut first = true;
37        for p in Path::parts(s).filter(|p| *p != "") {
38            if first {
39                first = false;
40            } else {
41                res.push(SEP)
42            }
43            res.push_str(p);
44        }
45    }
46    res
47}
48
49/// A path in the namespace. Paths are immutable and reference
50/// counted.  Path components are seperated by /, which may be escaped
51/// with \. / and \ are the only special characters in path, any other
52/// unicode character may be used. Path lengths are not limited on the
53/// local machine, but may be restricted by maximum message size on
54/// the wire.
55#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
56pub struct Path(ArcStr);
57
58impl Pack for Path {
59    fn encoded_len(&self) -> usize {
60        <ArcStr as Pack>::encoded_len(&self.0)
61    }
62
63    fn encode(&self, buf: &mut impl BufMut) -> Result<(), PackError> {
64        <ArcStr as Pack>::encode(&self.0, buf)
65    }
66
67    fn decode(buf: &mut impl Buf) -> Result<Self, PackError> {
68        Ok(Path::from(<ArcStr as Pack>::decode(buf)?))
69    }
70}
71
72impl fmt::Display for Path {
73    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74        self.0.fmt(f)
75    }
76}
77
78impl AsRef<str> for Path {
79    fn as_ref(&self) -> &str {
80        &*self.0
81    }
82}
83
84impl Borrow<str> for Path {
85    fn borrow(&self) -> &str {
86        &*self.0
87    }
88}
89
90impl Deref for Path {
91    type Target = str;
92
93    fn deref(&self) -> &Self::Target {
94        &self.0
95    }
96}
97
98impl From<String> for Path {
99    fn from(s: String) -> Path {
100        if is_canonical(&s) {
101            Path(ArcStr::from(s))
102        } else {
103            Path(ArcStr::from(canonize(&s)))
104        }
105    }
106}
107
108impl From<&'static str> for Path {
109    fn from(s: &'static str) -> Path {
110        if is_canonical(s) {
111            Path(ArcStr::from(s))
112        } else {
113            Path(ArcStr::from(canonize(s)))
114        }
115    }
116}
117
118impl<'a> From<&'a String> for Path {
119    fn from(s: &String) -> Path {
120        if is_canonical(s.as_str()) {
121            Path(ArcStr::from(s.clone()))
122        } else {
123            Path(ArcStr::from(canonize(s.as_str())))
124        }
125    }
126}
127
128impl From<ArcStr> for Path {
129    fn from(s: ArcStr) -> Path {
130        if is_canonical(&*s) {
131            Path(s)
132        } else {
133            Path(ArcStr::from(canonize(&*s)))
134        }
135    }
136}
137
138impl From<&ArcStr> for Path {
139    fn from(s: &ArcStr) -> Path {
140        if is_canonical(s) {
141            Path(s.clone())
142        } else {
143            Path(ArcStr::from(canonize(s)))
144        }
145    }
146}
147
148impl<C: Borrow<str>> FromIterator<C> for Path {
149    fn from_iter<T: IntoIterator<Item = C>>(iter: T) -> Self {
150        thread_local! {
151            static BUF: RefCell<String> = RefCell::new(String::new());
152        }
153        BUF.with_borrow_mut(|buf| {
154            buf.clear();
155            buf.push(SEP);
156            for c in iter {
157                PATH_ESC.escape_to(c.borrow(), buf);
158                buf.push(SEP)
159            }
160            if buf.len() > 1 {
161                buf.pop(); // remove trailing sep
162            }
163            Self(ArcStr::from(buf.as_str()))
164        })
165    }
166}
167
168impl FromStr for Path {
169    type Err = anyhow::Error;
170
171    fn from_str(s: &str) -> Result<Self, Self::Err> {
172        if is_canonical(s) {
173            Ok(Path(ArcStr::from(s)))
174        } else {
175            Ok(Path(ArcStr::from(canonize(s))))
176        }
177    }
178}
179
180impl Into<ArcStr> for Path {
181    fn into(self) -> ArcStr {
182        self.0
183    }
184}
185
186pub enum DirNames<'a> {
187    Root(bool),
188    Path { cur: &'a str, all: &'a str, base: usize },
189}
190
191impl<'a> Iterator for DirNames<'a> {
192    type Item = &'a str;
193
194    fn next(&mut self) -> Option<Self::Item> {
195        match self {
196            DirNames::Path { cur, all, base } => {
197                if *base >= all.len() {
198                    None
199                } else {
200                    match Path::find_sep(cur) {
201                        None => {
202                            *base = all.len();
203                            Some(all)
204                        }
205                        Some(p) => {
206                            *base += p + 1;
207                            *cur = &all[*base..];
208                            Some(&all[0..*base - 1])
209                        }
210                    }
211                }
212            }
213            DirNames::Root(true) => {
214                *self = DirNames::Root(false);
215                Some("/")
216            }
217            DirNames::Root(false) => None,
218        }
219    }
220}
221
222impl<'a> DoubleEndedIterator for DirNames<'a> {
223    fn next_back(&mut self) -> Option<Self::Item> {
224        match self {
225            DirNames::Path { cur: _, all, base: _ } => match Path::dirname(*all) {
226                Some(dn) => {
227                    let res = *all;
228                    *all = dn;
229                    Some(res)
230                }
231                None => {
232                    if all == &ROOT {
233                        *self = DirNames::Root(false);
234                        Some("/")
235                    } else {
236                        let res = *all;
237                        *all = &ROOT;
238                        Some(res)
239                    }
240                }
241            },
242            DirNames::Root(true) => {
243                *self = DirNames::Root(false);
244                Some("/")
245            }
246            DirNames::Root(false) => None,
247        }
248    }
249}
250
251impl Path {
252    pub const SEP: char = SEP;
253    pub const ROOT: &str = ROOT;
254
255    /// create a path from a non static str by copying the contents of the str
256    pub fn from_str(s: &str) -> Self {
257        if is_canonical(s) {
258            Path(ArcStr::from(s))
259        } else {
260            Path(ArcStr::from(canonize(s)))
261        }
262    }
263
264    /// returns /
265    pub fn root() -> Path {
266        // CR estokes: need a good solution for using SEP here
267        Path::from("/")
268    }
269
270    /// returns true if the path starts with /, false otherwise
271    pub fn is_absolute<T: AsRef<str> + ?Sized>(p: &T) -> bool {
272        p.as_ref().starts_with(SEP)
273    }
274
275    /// true if this path is a parent to the specified path. A path is it's own parent.
276    ///
277    /// # Examples
278    /// ```
279    /// use netidx_core::path::Path;
280    /// assert!(Path::is_parent("/", "/foo/bar/baz"));
281    /// assert!(Path::is_parent("/foo/bar", "/foo/bar/baz"));
282    /// assert!(!Path::is_parent("/foo/bar", "/foo/bareth/bazeth"));
283    /// assert!(Path::is_parent("/foo/bar", "/foo/bar"));
284    /// ```
285    pub fn is_parent<T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
286        parent: &T,
287        other: &U,
288    ) -> bool {
289        let parent = parent.as_ref();
290        let other = other.as_ref();
291        parent == "/"
292            || (other.starts_with(parent)
293                && (other.len() == parent.len()
294                    || other.as_bytes()[parent.len()] == SEP as u8))
295    }
296
297    /// true if this path is the parent to the specified path, and is
298    /// exactly 1 level above the specfied path.
299    ///
300    /// # Examples
301    /// ```
302    /// use netidx_core::path::Path;
303    /// assert!(!Path::is_immediate_parent("/", "/foo/bar/baz"));
304    /// assert!(Path::is_immediate_parent("/foo/bar", "/foo/bar/baz"));
305    /// assert!(!Path::is_immediate_parent("/foo/bar", "/foo/bareth/bazeth"));
306    /// assert!(!Path::is_immediate_parent("/foo/bar", "/foo/bar"));
307    /// assert!(!Path::is_immediate_parent("/", "/"));
308    /// ```
309    pub fn is_immediate_parent<T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
310        parent: &T,
311        other: &U,
312    ) -> bool {
313        let parent = if parent.as_ref() == "/" { None } else { Some(parent.as_ref()) };
314        other.as_ref().len() > 0
315            && other.as_ref() != "/"
316            && Path::dirname(other) == parent
317    }
318
319    /// strips prefix from path at the separator boundry, including
320    /// the separator. Returns None if prefix is not a parent of path
321    /// (even if it happens to be a prefix).
322    pub fn strip_prefix<'a, T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
323        prefix: &T,
324        path: &'a U,
325    ) -> Option<&'a str> {
326        if Path::is_parent(prefix, path) {
327            path.as_ref()
328                .strip_prefix(prefix.as_ref())
329                .map(|s| s.strip_prefix("/").unwrap_or(s))
330        } else {
331            None
332        }
333    }
334
335    /// finds the longest common parent of the two specified paths, /
336    /// in the case they are completely disjoint.
337    pub fn lcp<'a, T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
338        path0: &'a T,
339        path1: &'a U,
340    ) -> &'a str {
341        let (mut p0, p1) = if path0.as_ref().len() <= path1.as_ref().len() {
342            (path0.as_ref(), path1.as_ref())
343        } else {
344            (path1.as_ref(), path0.as_ref())
345        };
346        loop {
347            if Path::is_parent(p0, p1) {
348                return p0;
349            } else {
350                match Path::dirname(p0) {
351                    Some(p) => p0 = p,
352                    None => return "/",
353                }
354            }
355        }
356    }
357
358    /// This will escape the path seperator and the escape character
359    /// in a path part. If you want to be sure that e.g. `append` will
360    /// only append 1 level, then you should call this function on
361    /// your part before appending it.
362    ///
363    /// # Examples
364    /// ```
365    /// use netidx_core::path::Path;
366    /// assert_eq!("foo\\/bar", &*Path::escape("foo/bar"));
367    /// assert_eq!("\\\\hello world", &*Path::escape("\\hello world"));
368    /// ```
369    pub fn escape<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> Cow<'a, str> {
370        PATH_ESC.escape(s)
371    }
372
373    /// This will unescape the path seperator and the escape character
374    /// in a path part.
375    ///
376    /// # Examples
377    /// ```
378    /// use netidx_core::path::Path;
379    /// assert_eq!("foo/bar", &*Path::unescape("foo\\/bar"));
380    /// assert_eq!("\\hello world", &*Path::unescape("\\\\hello world"));
381    /// ```
382    pub fn unescape<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> Cow<'a, str> {
383        PATH_ESC.unescape(s)
384    }
385
386    /// return a new path with the specified string appended as a new
387    /// part separated by the pathsep char.
388    ///
389    /// # Examples
390    /// ```
391    /// use netidx_core::path::Path;
392    /// let p = Path::root().append("bar").append("baz");
393    /// assert_eq!(&*p, "/bar/baz");
394    ///
395    /// let p = Path::root().append("/bar").append("//baz//////foo/");
396    /// assert_eq!(&*p, "/bar/baz/foo");
397    /// ```
398    pub fn append<T: AsRef<str> + ?Sized>(&self, other: &T) -> Self {
399        let other = other.as_ref();
400        if other.len() == 0 {
401            self.clone()
402        } else {
403            let mut res = String::with_capacity(self.as_ref().len() + other.len());
404            res.push_str(self.as_ref());
405            res.push(SEP);
406            res.push_str(other);
407            Path::from(res)
408        }
409    }
410
411    /// return an iterator over the parts of the path. The path
412    /// separator may be escaped with \. and a literal \ may be
413    /// represented as \\.
414    ///
415    /// # Examples
416    /// ```
417    /// use netidx_core::path::Path;
418    /// let p = Path::from("/foo/bar/baz");
419    /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec!["foo", "bar", "baz"]);
420    ///
421    /// let p = Path::from(r"/foo\/bar/baz");
422    /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec![r"foo\/bar", "baz"]);
423    ///
424    /// let p = Path::from(r"/foo\\/bar/baz");
425    /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec![r"foo\\", "bar", "baz"]);
426    ///
427    /// let p = Path::from(r"/foo\\\/bar/baz");
428    /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec![r"foo\\\/bar", "baz"]);
429    /// ```
430    pub fn parts<T: AsRef<str> + ?Sized>(s: &T) -> impl Iterator<Item = &str> {
431        let s = s.as_ref();
432        let skip = if s == "/" {
433            2
434        } else if s.starts_with("/") {
435            1
436        } else {
437            0
438        };
439        let e = PATH_ESC.clone();
440        e.split(s, SEP).skip(skip)
441    }
442
443    /// Return an iterator over all the dirnames in the path starting
444    /// from the root and ending with the entire path.
445    ///
446    /// # Examples
447    /// ```
448    /// use netidx_core::path::Path;
449    /// let p = Path::from("/some/path/ending/in/foo");
450    /// let mut bn = Path::dirnames(&p);
451    /// assert_eq!(bn.next(), Some("/"));
452    /// assert_eq!(bn.next(), Some("/some"));
453    /// assert_eq!(bn.next(), Some("/some/path"));
454    /// assert_eq!(bn.next(), Some("/some/path/ending"));
455    /// assert_eq!(bn.next(), Some("/some/path/ending/in"));
456    /// assert_eq!(bn.next(), Some("/some/path/ending/in/foo"));
457    /// assert_eq!(bn.next(), None);
458    /// let mut bn = Path::dirnames(&p);
459    /// assert_eq!(bn.next_back(), Some("/some/path/ending/in/foo"));
460    /// assert_eq!(bn.next_back(), Some("/some/path/ending/in"));
461    /// assert_eq!(bn.next_back(), Some("/some/path/ending"));
462    /// assert_eq!(bn.next_back(), Some("/some/path"));
463    /// assert_eq!(bn.next_back(), Some("/some"));
464    /// assert_eq!(bn.next_back(), Some("/"));
465    /// assert_eq!(bn.next_back(), None);
466    /// ```
467    pub fn dirnames<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> DirNames<'a> {
468        let s = s.as_ref();
469        if s == "/" {
470            DirNames::Root(true)
471        } else {
472            DirNames::Path { cur: s, all: s, base: 1 }
473        }
474    }
475
476    /// Return the number of levels in the path.
477    ///
478    /// # Examples
479    /// ```
480    /// use netidx_core::path::Path;
481    /// let p = Path::from("/foo/bar/baz");
482    /// assert_eq!(Path::levels(&p), 3);
483    /// ```
484    pub fn levels<T: AsRef<str> + ?Sized>(s: &T) -> usize {
485        let mut p = 0;
486        for _ in Path::parts(s) {
487            p += 1
488        }
489        p
490    }
491
492    /// return the path without the last part, or return None if the
493    /// path is empty or /.
494    ///
495    /// # Examples
496    /// ```
497    /// use netidx_core::path::Path;
498    /// let p = Path::from("/foo/bar/baz");
499    /// assert_eq!(Path::dirname(&p), Some("/foo/bar"));
500    ///
501    /// let p = Path::root();
502    /// assert_eq!(Path::dirname(&p), None);
503    ///
504    /// let p = Path::from("/foo");
505    /// assert_eq!(Path::dirname(&p), None);
506    /// ```
507    pub fn dirname<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> Option<&'a str> {
508        let s = s.as_ref();
509        Path::rfind_sep(s).and_then(|i| if i == 0 { None } else { Some(&s[0..i]) })
510    }
511
512    pub fn dirname_with_sep<T: AsRef<str> + ?Sized>(s: &T) -> Option<&str> {
513        let s = s.as_ref();
514        Path::rfind_sep(s).and_then(|i| if i == 0 { None } else { Some(&s[0..i + 1]) })
515    }
516
517    /// return the last part of the path, or return None if the path
518    /// is empty.
519    ///
520    /// # Examples
521    /// ```
522    /// use netidx_core::path::Path;
523    /// let p = Path::from("/foo/bar/baz");
524    /// assert_eq!(Path::basename(&p), Some("baz"));
525    ///
526    /// let p = Path::from("foo");
527    /// assert_eq!(Path::basename(&p), Some("foo"));
528    ///
529    /// let p = Path::from("foo/bar");
530    /// assert_eq!(Path::basename(&p), Some("bar"));
531    ///
532    /// let p = Path::from("");
533    /// assert_eq!(Path::basename(&p), None);
534    ///
535    /// let p = Path::from("/");
536    /// assert_eq!(Path::basename(&p), None);
537    /// ```
538    pub fn basename<T: AsRef<str> + ?Sized>(s: &T) -> Option<&str> {
539        let s = s.as_ref();
540        match Path::rfind_sep(s) {
541            None => {
542                if s.len() > 0 {
543                    Some(s)
544                } else {
545                    None
546                }
547            }
548            Some(i) => {
549                if s.len() <= 1 {
550                    None
551                } else {
552                    Some(&s[i + 1..s.len()])
553                }
554            }
555        }
556    }
557
558    fn find_sep_int<F: Fn(&str) -> Option<usize>>(mut s: &str, f: F) -> Option<usize> {
559        if s.len() == 0 {
560            None
561        } else {
562            loop {
563                match f(s) {
564                    None => return None,
565                    Some(i) => {
566                        if !PATH_ESC.is_escaped(s, i) {
567                            return Some(i);
568                        } else {
569                            s = &s[0..i];
570                        }
571                    }
572                }
573            }
574        }
575    }
576
577    /// return the position of the last path separator in the path, or
578    /// None if there isn't one.
579    ///
580    /// # Examples
581    /// ```
582    /// use netidx_core::path::Path;
583    /// let p = Path::from("/foo/bar/baz");
584    /// assert_eq!(Path::rfind_sep(&p), Some(8));
585    ///
586    /// let p = Path::from("");
587    /// assert_eq!(Path::rfind_sep(&p), None);
588    /// ```
589    pub fn rfind_sep<T: AsRef<str> + ?Sized>(s: &T) -> Option<usize> {
590        let s = s.as_ref();
591        Path::find_sep_int(s, |s| s.rfind(SEP))
592    }
593
594    /// return the position of the first path separator in the path, or
595    /// None if there isn't one.
596    ///
597    /// # Examples
598    /// ```
599    /// use netidx_core::path::Path;
600    /// let p = Path::from("foo/bar/baz");
601    /// assert_eq!(Path::find_sep(&p), Some(3));
602    ///
603    /// let p = Path::from("");
604    /// assert_eq!(Path::find_sep(&p), None);
605    /// ```
606    pub fn find_sep<T: AsRef<str> + ?Sized>(s: &T) -> Option<usize> {
607        let s = s.as_ref();
608        Path::find_sep_int(s, |s| s.find(SEP))
609    }
610}