netidx_core/
path.rs

1//! Hierarchical path handling and manipulation.
2use crate::pack::{Pack, PackError};
3use arcstr::{literal, ArcStr};
4use bytes::{Buf, BufMut};
5use escaping::Escape;
6use std::{
7    borrow::{Borrow, Cow},
8    cell::RefCell,
9    cmp::{Eq, Ord, PartialEq, PartialOrd},
10    convert::{AsRef, From},
11    fmt,
12    iter::{DoubleEndedIterator, Iterator},
13    ops::Deref,
14    result::Result,
15    str::{self, FromStr},
16    sync::LazyLock,
17};
18
19pub const SEP: char = '/';
20pub const ROOT: &str = "/";
21pub const PATH_ESC: LazyLock<Escape> =
22    LazyLock::new(|| Escape::new('\\', &['\\', '/'], &[], None).unwrap());
23
24fn is_canonical(s: &str) -> bool {
25    for _ in Path::parts(s).filter(|p| *p == "") {
26        return false;
27    }
28    true
29}
30
31fn canonize(s: &str) -> String {
32    let mut res = String::with_capacity(s.len());
33    if s.len() > 0 {
34        if s.starts_with(SEP) {
35            res.push(SEP)
36        }
37        let mut first = true;
38        for p in Path::parts(s).filter(|p| *p != "") {
39            if first {
40                first = false;
41            } else {
42                res.push(SEP)
43            }
44            res.push_str(p);
45        }
46    }
47    res
48}
49
50/// A path in the netidx namespace.
51///
52/// Paths are immutable and reference counted.
53/// Path components are seperated by /, which may be escaped
54/// with \. / and \ are the only special characters in path, any other
55/// unicode character may be used. Path lengths are not limited on the
56/// local machine, but may be restricted by maximum message size on
57/// the wire.
58#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
59pub struct Path(ArcStr);
60
61impl Pack for Path {
62    fn encoded_len(&self) -> usize {
63        <ArcStr as Pack>::encoded_len(&self.0)
64    }
65
66    fn encode(&self, buf: &mut impl BufMut) -> Result<(), PackError> {
67        <ArcStr as Pack>::encode(&self.0, buf)
68    }
69
70    fn decode(buf: &mut impl Buf) -> Result<Self, PackError> {
71        Ok(Path::from(<ArcStr as Pack>::decode(buf)?))
72    }
73}
74
75impl fmt::Display for Path {
76    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
77        self.0.fmt(f)
78    }
79}
80
81impl AsRef<str> for Path {
82    fn as_ref(&self) -> &str {
83        &*self.0
84    }
85}
86
87impl Borrow<str> for Path {
88    fn borrow(&self) -> &str {
89        &*self.0
90    }
91}
92
93impl Deref for Path {
94    type Target = str;
95
96    fn deref(&self) -> &Self::Target {
97        &self.0
98    }
99}
100
101impl From<String> for Path {
102    fn from(s: String) -> Path {
103        if is_canonical(&s) {
104            Path(ArcStr::from(s))
105        } else {
106            Path(ArcStr::from(canonize(&s)))
107        }
108    }
109}
110
111impl From<&'static str> for Path {
112    fn from(s: &'static str) -> Path {
113        if is_canonical(s) {
114            Path(ArcStr::from(s))
115        } else {
116            Path(ArcStr::from(canonize(s)))
117        }
118    }
119}
120
121impl<'a> From<&'a String> for Path {
122    fn from(s: &String) -> Path {
123        if is_canonical(s.as_str()) {
124            Path(ArcStr::from(s.clone()))
125        } else {
126            Path(ArcStr::from(canonize(s.as_str())))
127        }
128    }
129}
130
131impl From<ArcStr> for Path {
132    fn from(s: ArcStr) -> Path {
133        if is_canonical(&*s) {
134            Path(s)
135        } else {
136            Path(ArcStr::from(canonize(&*s)))
137        }
138    }
139}
140
141impl From<&ArcStr> for Path {
142    fn from(s: &ArcStr) -> Path {
143        if is_canonical(s) {
144            Path(s.clone())
145        } else {
146            Path(ArcStr::from(canonize(s)))
147        }
148    }
149}
150
151impl<C: Borrow<str>> FromIterator<C> for Path {
152    fn from_iter<T: IntoIterator<Item = C>>(iter: T) -> Self {
153        thread_local! {
154            static BUF: RefCell<String> = RefCell::new(String::new());
155        }
156        BUF.with_borrow_mut(|buf| {
157            buf.clear();
158            buf.push(SEP);
159            for c in iter {
160                PATH_ESC.escape_to(c.borrow(), buf);
161                buf.push(SEP)
162            }
163            if buf.len() > 1 {
164                buf.pop(); // remove trailing sep
165            }
166            Self(ArcStr::from(buf.as_str()))
167        })
168    }
169}
170
171impl FromStr for Path {
172    type Err = anyhow::Error;
173
174    fn from_str(s: &str) -> Result<Self, Self::Err> {
175        if is_canonical(s) {
176            Ok(Path(ArcStr::from(s)))
177        } else {
178            Ok(Path(ArcStr::from(canonize(s))))
179        }
180    }
181}
182
183impl Into<ArcStr> for Path {
184    fn into(self) -> ArcStr {
185        self.0
186    }
187}
188
189pub enum DirNames<'a> {
190    Root(bool),
191    Path { cur: &'a str, all: &'a str, base: usize },
192}
193
194impl<'a> Iterator for DirNames<'a> {
195    type Item = &'a str;
196
197    fn next(&mut self) -> Option<Self::Item> {
198        match self {
199            DirNames::Path { cur, all, base } => {
200                if *base >= all.len() {
201                    None
202                } else {
203                    match Path::find_sep(cur) {
204                        None => {
205                            *base = all.len();
206                            Some(all)
207                        }
208                        Some(p) => {
209                            *base += p + 1;
210                            *cur = &all[*base..];
211                            Some(&all[0..*base - 1])
212                        }
213                    }
214                }
215            }
216            DirNames::Root(true) => {
217                *self = DirNames::Root(false);
218                Some("/")
219            }
220            DirNames::Root(false) => None,
221        }
222    }
223}
224
225impl<'a> DoubleEndedIterator for DirNames<'a> {
226    fn next_back(&mut self) -> Option<Self::Item> {
227        match self {
228            DirNames::Path { cur: _, all, base: _ } => match Path::dirname(*all) {
229                Some(dn) => {
230                    let res = *all;
231                    *all = dn;
232                    Some(res)
233                }
234                None => {
235                    if all == &ROOT {
236                        *self = DirNames::Root(false);
237                        Some("/")
238                    } else {
239                        let res = *all;
240                        *all = &ROOT;
241                        Some(res)
242                    }
243                }
244            },
245            DirNames::Root(true) => {
246                *self = DirNames::Root(false);
247                Some("/")
248            }
249            DirNames::Root(false) => None,
250        }
251    }
252}
253
254impl Path {
255    pub const SEP: char = SEP;
256    pub const ROOT: &str = ROOT;
257
258    /// create a path from a non static str by copying the contents of the str
259    pub fn from_str(s: &str) -> Self {
260        if is_canonical(s) {
261            Path(ArcStr::from(s))
262        } else {
263            Path(ArcStr::from(canonize(s)))
264        }
265    }
266
267    /// returns /
268    pub fn root() -> Path {
269        Path(literal!("/"))
270    }
271
272    /// returns true if the path starts with /, false otherwise
273    pub fn is_absolute<T: AsRef<str> + ?Sized>(p: &T) -> bool {
274        p.as_ref().starts_with(SEP)
275    }
276
277    /// true if this path is a parent to the specified path. A path is it's own parent.
278    ///
279    /// # Examples
280    /// ```
281    /// use netidx_core::path::Path;
282    /// assert!(Path::is_parent("/", "/foo/bar/baz"));
283    /// assert!(Path::is_parent("/foo/bar", "/foo/bar/baz"));
284    /// assert!(!Path::is_parent("/foo/bar", "/foo/bareth/bazeth"));
285    /// assert!(Path::is_parent("/foo/bar", "/foo/bar"));
286    /// ```
287    pub fn is_parent<T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
288        parent: &T,
289        other: &U,
290    ) -> bool {
291        let parent = parent.as_ref();
292        let other = other.as_ref();
293        parent == "/"
294            || (other.starts_with(parent)
295                && (other.len() == parent.len()
296                    || other.as_bytes()[parent.len()] == SEP as u8))
297    }
298
299    /// true if this path is the parent to the specified path, and is
300    /// exactly 1 level above the specfied path.
301    ///
302    /// # Examples
303    /// ```
304    /// use netidx_core::path::Path;
305    /// assert!(!Path::is_immediate_parent("/", "/foo/bar/baz"));
306    /// assert!(Path::is_immediate_parent("/foo/bar", "/foo/bar/baz"));
307    /// assert!(!Path::is_immediate_parent("/foo/bar", "/foo/bareth/bazeth"));
308    /// assert!(!Path::is_immediate_parent("/foo/bar", "/foo/bar"));
309    /// assert!(!Path::is_immediate_parent("/", "/"));
310    /// ```
311    pub fn is_immediate_parent<T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
312        parent: &T,
313        other: &U,
314    ) -> bool {
315        let parent = if parent.as_ref() == "/" { None } else { Some(parent.as_ref()) };
316        other.as_ref().len() > 0
317            && other.as_ref() != "/"
318            && Path::dirname(other) == parent
319    }
320
321    /// strips prefix from path at the separator boundry, including
322    /// the separator. Returns None if prefix is not a parent of path
323    /// (even if it happens to be a prefix).
324    pub fn strip_prefix<'a, T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
325        prefix: &T,
326        path: &'a U,
327    ) -> Option<&'a str> {
328        if Path::is_parent(prefix, path) {
329            path.as_ref()
330                .strip_prefix(prefix.as_ref())
331                .map(|s| s.strip_prefix("/").unwrap_or(s))
332        } else {
333            None
334        }
335    }
336
337    /// finds the longest common parent of the two specified paths, /
338    /// in the case they are completely disjoint.
339    pub fn lcp<'a, T: AsRef<str> + ?Sized, U: AsRef<str> + ?Sized>(
340        path0: &'a T,
341        path1: &'a U,
342    ) -> &'a str {
343        let (mut p0, p1) = if path0.as_ref().len() <= path1.as_ref().len() {
344            (path0.as_ref(), path1.as_ref())
345        } else {
346            (path1.as_ref(), path0.as_ref())
347        };
348        loop {
349            if Path::is_parent(p0, p1) {
350                return p0;
351            } else {
352                match Path::dirname(p0) {
353                    Some(p) => p0 = p,
354                    None => return "/",
355                }
356            }
357        }
358    }
359
360    /// This will escape the path seperator and the escape character
361    /// in a path part. If you want to be sure that e.g. `append` will
362    /// only append 1 level, then you should call this function on
363    /// your part before appending it.
364    ///
365    /// # Examples
366    /// ```
367    /// use netidx_core::path::Path;
368    /// assert_eq!("foo\\/bar", &*Path::escape("foo/bar"));
369    /// assert_eq!("\\\\hello world", &*Path::escape("\\hello world"));
370    /// ```
371    pub fn escape<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> Cow<'a, str> {
372        PATH_ESC.escape(s)
373    }
374
375    /// This will unescape the path seperator and the escape character
376    /// in a path part.
377    ///
378    /// # Examples
379    /// ```
380    /// use netidx_core::path::Path;
381    /// assert_eq!("foo/bar", &*Path::unescape("foo\\/bar"));
382    /// assert_eq!("\\hello world", &*Path::unescape("\\\\hello world"));
383    /// ```
384    pub fn unescape<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> Cow<'a, str> {
385        PATH_ESC.unescape(s)
386    }
387
388    /// return a new path with the specified string appended as a new
389    /// part separated by the pathsep char.
390    ///
391    /// # Examples
392    /// ```
393    /// use netidx_core::path::Path;
394    /// let p = Path::root().append("bar").append("baz");
395    /// assert_eq!(&*p, "/bar/baz");
396    ///
397    /// let p = Path::root().append("/bar").append("//baz//////foo/");
398    /// assert_eq!(&*p, "/bar/baz/foo");
399    /// ```
400    pub fn append<T: AsRef<str> + ?Sized>(&self, other: &T) -> Self {
401        let other = other.as_ref();
402        if other.len() == 0 {
403            self.clone()
404        } else {
405            let mut res = String::with_capacity(self.as_ref().len() + other.len());
406            res.push_str(self.as_ref());
407            res.push(SEP);
408            res.push_str(other);
409            Path::from(res)
410        }
411    }
412
413    /// alias for append
414    pub fn join<T: AsRef<str> + ?Sized>(&self, other: &T) -> Self {
415        self.append(other)
416    }
417
418    /// return an iterator over the parts of the path. The path
419    /// separator may be escaped with \. and a literal \ may be
420    /// represented as \\.
421    ///
422    /// # Examples
423    /// ```
424    /// use netidx_core::path::Path;
425    /// let p = Path::from("/foo/bar/baz");
426    /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec!["foo", "bar", "baz"]);
427    ///
428    /// let p = Path::from(r"/foo\/bar/baz");
429    /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec![r"foo\/bar", "baz"]);
430    ///
431    /// let p = Path::from(r"/foo\\/bar/baz");
432    /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec![r"foo\\", "bar", "baz"]);
433    ///
434    /// let p = Path::from(r"/foo\\\/bar/baz");
435    /// assert_eq!(Path::parts(&p).collect::<Vec<_>>(), vec![r"foo\\\/bar", "baz"]);
436    /// ```
437    pub fn parts<T: AsRef<str> + ?Sized>(s: &T) -> impl Iterator<Item = &str> {
438        let s = s.as_ref();
439        let skip = if s == "/" {
440            2
441        } else if s.starts_with("/") {
442            1
443        } else {
444            0
445        };
446        let e = PATH_ESC.clone();
447        e.split(s, SEP).skip(skip)
448    }
449
450    /// Return an iterator over all the dirnames in the path starting
451    /// from the root and ending with the entire path.
452    ///
453    /// # Examples
454    /// ```
455    /// use netidx_core::path::Path;
456    /// let p = Path::from("/some/path/ending/in/foo");
457    /// let mut bn = Path::dirnames(&p);
458    /// assert_eq!(bn.next(), Some("/"));
459    /// assert_eq!(bn.next(), Some("/some"));
460    /// assert_eq!(bn.next(), Some("/some/path"));
461    /// assert_eq!(bn.next(), Some("/some/path/ending"));
462    /// assert_eq!(bn.next(), Some("/some/path/ending/in"));
463    /// assert_eq!(bn.next(), Some("/some/path/ending/in/foo"));
464    /// assert_eq!(bn.next(), None);
465    /// let mut bn = Path::dirnames(&p);
466    /// assert_eq!(bn.next_back(), Some("/some/path/ending/in/foo"));
467    /// assert_eq!(bn.next_back(), Some("/some/path/ending/in"));
468    /// assert_eq!(bn.next_back(), Some("/some/path/ending"));
469    /// assert_eq!(bn.next_back(), Some("/some/path"));
470    /// assert_eq!(bn.next_back(), Some("/some"));
471    /// assert_eq!(bn.next_back(), Some("/"));
472    /// assert_eq!(bn.next_back(), None);
473    /// ```
474    pub fn dirnames<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> DirNames<'a> {
475        let s = s.as_ref();
476        if s == "/" {
477            DirNames::Root(true)
478        } else {
479            DirNames::Path { cur: s, all: s, base: 1 }
480        }
481    }
482
483    /// Return the number of levels in the path.
484    ///
485    /// # Examples
486    /// ```
487    /// use netidx_core::path::Path;
488    /// let p = Path::from("/foo/bar/baz");
489    /// assert_eq!(Path::levels(&p), 3);
490    /// ```
491    pub fn levels<T: AsRef<str> + ?Sized>(s: &T) -> usize {
492        let mut p = 0;
493        for _ in Path::parts(s) {
494            p += 1
495        }
496        p
497    }
498
499    /// return the path without the last part, or return None if the
500    /// path is empty or /.
501    ///
502    /// # Examples
503    /// ```
504    /// use netidx_core::path::Path;
505    /// let p = Path::from("/foo/bar/baz");
506    /// assert_eq!(Path::dirname(&p), Some("/foo/bar"));
507    ///
508    /// let p = Path::root();
509    /// assert_eq!(Path::dirname(&p), None);
510    ///
511    /// let p = Path::from("/foo");
512    /// assert_eq!(Path::dirname(&p), None);
513    /// ```
514    pub fn dirname<'a, T: AsRef<str> + ?Sized>(s: &'a T) -> Option<&'a str> {
515        let s = s.as_ref();
516        Path::rfind_sep(s).and_then(|i| if i == 0 { None } else { Some(&s[0..i]) })
517    }
518
519    pub fn dirname_with_sep<T: AsRef<str> + ?Sized>(s: &T) -> Option<&str> {
520        let s = s.as_ref();
521        Path::rfind_sep(s).and_then(|i| if i == 0 { None } else { Some(&s[0..i + 1]) })
522    }
523
524    /// return the last part of the path, or return None if the path
525    /// is empty.
526    ///
527    /// # Examples
528    /// ```
529    /// use netidx_core::path::Path;
530    /// let p = Path::from("/foo/bar/baz");
531    /// assert_eq!(Path::basename(&p), Some("baz"));
532    ///
533    /// let p = Path::from("foo");
534    /// assert_eq!(Path::basename(&p), Some("foo"));
535    ///
536    /// let p = Path::from("foo/bar");
537    /// assert_eq!(Path::basename(&p), Some("bar"));
538    ///
539    /// let p = Path::from("");
540    /// assert_eq!(Path::basename(&p), None);
541    ///
542    /// let p = Path::from("/");
543    /// assert_eq!(Path::basename(&p), None);
544    /// ```
545    pub fn basename<T: AsRef<str> + ?Sized>(s: &T) -> Option<&str> {
546        let s = s.as_ref();
547        match Path::rfind_sep(s) {
548            None => {
549                if s.len() > 0 {
550                    Some(s)
551                } else {
552                    None
553                }
554            }
555            Some(i) => {
556                if s.len() <= 1 {
557                    None
558                } else {
559                    Some(&s[i + 1..s.len()])
560                }
561            }
562        }
563    }
564
565    fn find_sep_int<F: Fn(&str) -> Option<usize>>(mut s: &str, f: F) -> Option<usize> {
566        if s.len() == 0 {
567            None
568        } else {
569            loop {
570                match f(s) {
571                    None => return None,
572                    Some(i) => {
573                        if !PATH_ESC.is_escaped(s, i) {
574                            return Some(i);
575                        } else {
576                            s = &s[0..i];
577                        }
578                    }
579                }
580            }
581        }
582    }
583
584    /// return the position of the last path separator in the path, or
585    /// None if there isn't one.
586    ///
587    /// # Examples
588    /// ```
589    /// use netidx_core::path::Path;
590    /// let p = Path::from("/foo/bar/baz");
591    /// assert_eq!(Path::rfind_sep(&p), Some(8));
592    ///
593    /// let p = Path::from("");
594    /// assert_eq!(Path::rfind_sep(&p), None);
595    /// ```
596    pub fn rfind_sep<T: AsRef<str> + ?Sized>(s: &T) -> Option<usize> {
597        let s = s.as_ref();
598        Path::find_sep_int(s, |s| s.rfind(SEP))
599    }
600
601    /// return the position of the first path separator in the path, or
602    /// None if there isn't one.
603    ///
604    /// # Examples
605    /// ```
606    /// use netidx_core::path::Path;
607    /// let p = Path::from("foo/bar/baz");
608    /// assert_eq!(Path::find_sep(&p), Some(3));
609    ///
610    /// let p = Path::from("");
611    /// assert_eq!(Path::find_sep(&p), None);
612    /// ```
613    pub fn find_sep<T: AsRef<str> + ?Sized>(s: &T) -> Option<usize> {
614        let s = s.as_ref();
615        Path::find_sep_int(s, |s| s.find(SEP))
616    }
617}