aranya_libc/
path.rs

1//! Null-terminated path handling.
2
3use alloc::vec::Vec;
4use core::{
5    cmp::Ordering,
6    ffi::{CStr, c_char},
7    fmt,
8    mem::MaybeUninit,
9    ops::Deref,
10    ptr, slice, str,
11};
12
13/// The input to `Path` is missing a null byte.
14#[derive(Debug, Eq, PartialEq, thiserror::Error)]
15#[error("missing null byte")]
16pub struct MissingNullByte(());
17
18/// A borrowed file path.
19#[repr(transparent)]
20pub struct Path(
21    /// Even though (almost) all of the public APIs require
22    /// inputs to be null terminated, this slice is NOT
23    /// necessarily null terminated. This is because we want to
24    /// interop with `std::path::Path` and `std::path::PathBuf`
25    /// which only expose `std::ffi::OsStr` and
26    /// `std::ffi::OsString` which do not contain a null byte.
27    ///
28    /// NB: if the slice contains a null byte it will always be
29    /// the *very last* byte.
30    [u8],
31);
32
33impl Path {
34    /// Create a [`Path`] from possibly null-terminated bytes.
35    pub fn new<S: AsRef<[u8]> + ?Sized>(path: &S) -> &Self {
36        Self::try_new(path.as_ref()).into()
37    }
38
39    /// Returns `Ok` if `path` has a null byte, `Err` if it does
40    /// not or if the null byte is not within `isize::MAX` bytes.
41    ///
42    /// `path` is truncated at the first null byte, if any.
43    fn try_new<S: AsRef<[u8]> + ?Sized>(path: &S) -> Result<&Self, &Self> {
44        let path = path.as_ref();
45        memchr::memchr(0, path)
46            .and_then(|idx| {
47                let end = isize::try_from(idx).ok()?.checked_add(1)?.try_into().ok()?;
48                path.get(..end)
49            })
50            .map(Self::from_raw_bytes)
51            .ok_or(Self::from_raw_bytes(path))
52    }
53
54    /// Creates a `Path` from the bytes as-is.
55    fn from_raw_bytes(path: &[u8]) -> &Self {
56        // SAFETY: `&[u8]` and `&Self` have the same
57        // memory layout.
58        unsafe { &*(ptr::from_ref::<[u8]>(path) as *const Self) }
59    }
60
61    /// Create a `Path` from bytes that end with a null
62    /// terminator.
63    ///
64    /// In debug mode this panics if `path` does not with with
65    /// a null terminator.
66    fn from_null_terminated_bytes(path: &[u8]) -> &Self {
67        debug_assert!(path.ends_with(&[0]) || path.is_empty());
68
69        Self::from_raw_bytes(path)
70    }
71
72    /// Create a [`Path`] from a raw pointer.
73    ///
74    /// # Safety
75    ///
76    /// - `ptr` must not be null.
77    /// - `ptr` must be null terminated.
78    /// - `ptr` must be valid for reads up to the null
79    ///   terminator.
80    /// - `ptr` must not be mutated for the duration of `'a`.
81    /// - The null terminator must be within `isize::MAX` bytes
82    ///   from `ptr`.
83    pub unsafe fn from_ptr<'a>(ptr: *const c_char) -> &'a Self {
84        debug_assert!(!ptr.is_null());
85
86        // SAFETY: See the function's safety docs.
87        let len = unsafe { libc::strlen(ptr) };
88        debug_assert!(len < (isize::MAX - 1) as usize);
89
90        // SAFETY: See the function's safety docs.
91        let path = unsafe {
92            // `len+1` does not overflow since the null
93            // terminator must be within `isize::MAX` bytes from
94            // `ptr`.
95            #[allow(clippy::arithmetic_side_effects)]
96            slice::from_raw_parts(ptr.cast(), len + 1)
97        };
98        Self::from_null_terminated_bytes(path)
99    }
100
101    /// Create a [`Path`] from null-terminated bytes.
102    ///
103    /// # Errors
104    ///
105    /// `path` must contain at least one null byte.
106    pub fn from_bytes_until_null(path: &[u8]) -> Result<&Self, MissingNullByte> {
107        Self::try_new(path).map_err(|_| MissingNullByte(()))
108    }
109
110    /// Create a [`Path`] from possibly null-terminated bytes.
111    pub fn from_bytes(path: &[u8]) -> &Self {
112        Self::new(path)
113    }
114
115    /// Create a [`Path`] from a [`CStr`].
116    pub fn from_cstr(path: &CStr) -> &Self {
117        Self::from_null_terminated_bytes(path.to_bytes_with_nul())
118    }
119
120    /// Returns the path as `&[u8]`.
121    pub const fn as_bytes(&self) -> &[u8] {
122        &self.0
123    }
124
125    /// Returns the path as `MaybeUtf8`.
126    fn display(&self) -> MaybeUtf8<'_> {
127        MaybeUtf8(self.as_bytes_without_null())
128    }
129
130    /// Returns the path as `&[u8]` *without* the trailing null
131    /// byte, if any.
132    fn as_bytes_without_null(&self) -> &[u8] {
133        self.as_bytes()
134            .strip_suffix(&[0])
135            .unwrap_or(self.as_bytes())
136    }
137
138    /// Reports whether the path is absolute.
139    pub fn is_abs(&self) -> bool {
140        self.as_bytes().starts_with(b"/")
141    }
142
143    /// Creates an owned [`PathBuf`] with `path` joined to
144    /// `self`.
145    pub fn join<P: AsRef<Self>>(&self, path: P) -> PathBuf {
146        PathBuf::from_iter([self, path.as_ref()])
147    }
148
149    /// Converts the `Path` into a [`PathBuf`].
150    pub fn to_path_buf(&self) -> PathBuf {
151        PathBuf::from(self)
152    }
153
154    /// Invokes `f` with `Path` converted to a null-terminated
155    /// C-style string.
156    pub fn with_cstr<R>(&self, f: &dyn Fn(*const c_char) -> R) -> R {
157        // See https://github.com/rust-lang/rust/blob/7a5867425959b4b5d69334fa6f02150dc2a5d128/library/std/src/sys/pal/common/small_c_string.rs
158        let path = self.as_bytes();
159        if path.ends_with(&[0]) {
160            f(path.as_ptr().cast())
161        } else {
162            self.with_cstr_no_null(f)
163        }
164    }
165
166    #[cold]
167    #[inline(never)]
168    fn with_cstr_no_null<R>(&self, f: &dyn Fn(*const c_char) -> R) -> R {
169        let path = self.as_bytes();
170
171        // This size is taken from the stdlib's implementation,
172        // which was chosen in order to avoid a probe frame.
173        const MAX_STACK: usize = 384;
174        if path.len() < MAX_STACK {
175            let mut buf = MaybeUninit::<[u8; MAX_STACK]>::uninit();
176            // SAFETY: `buf` is obviously a valid pointer and
177            // we've checked that `path.len() < buf.len()`.
178            unsafe {
179                buf.as_mut_ptr()
180                    .cast::<u8>()
181                    .copy_from_nonoverlapping(path.as_ptr(), path.len());
182                // Add the null terminator.
183                buf.as_mut_ptr().cast::<u8>().add(path.len()).write(0);
184            }
185            f(buf.as_ptr().cast())
186        } else {
187            let path = self.to_path_buf();
188            f(path.as_ptr())
189        }
190    }
191}
192
193impl Eq for Path {}
194impl PartialEq for Path {
195    fn eq(&self, other: &Self) -> bool {
196        self.as_bytes_without_null() == other.as_bytes_without_null()
197    }
198}
199impl Ord for Path {
200    fn cmp(&self, other: &Self) -> Ordering {
201        Ord::cmp(self.as_bytes_without_null(), other.as_bytes_without_null())
202    }
203}
204impl PartialOrd for Path {
205    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
206        Some(Ord::cmp(self, other))
207    }
208}
209
210// From stdlib v1.77.
211macro_rules! impl_cmp {
212    (<$($life:lifetime),*> $lhs:ty, $rhs: ty) => {
213        impl<$($life),*> PartialEq<$rhs> for $lhs {
214            fn eq(&self, other: &$rhs) -> bool {
215                <Path as PartialEq>::eq(self, other)
216            }
217        }
218
219        impl<$($life),*> PartialEq<$lhs> for $rhs {
220            fn eq(&self, other: &$lhs) -> bool {
221                <Path as PartialEq>::eq(self, other)
222            }
223        }
224
225        impl<$($life),*> PartialOrd<$rhs> for $lhs {
226            fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> {
227                <Path as PartialOrd>::partial_cmp(self, other)
228            }
229        }
230
231        impl<$($life),*> PartialOrd<$lhs> for $rhs {
232            fn partial_cmp(&self, other: &$lhs) -> Option<Ordering> {
233                <Path as PartialOrd>::partial_cmp(self, other)
234            }
235        }
236    };
237}
238impl_cmp!(<> PathBuf, Path);
239impl_cmp!(<'a> PathBuf, &'a Path);
240
241// From stdlib v1.77.
242macro_rules! impl_cmp_raw{
243    (<$($life:lifetime),*> $lhs:ty, $rhs: ty) => {
244        impl<$($life),*> PartialEq<$rhs> for $lhs {
245            fn eq(&self, other: &$rhs) -> bool {
246                <Path as PartialEq>::eq(self, Path::new(other))
247            }
248        }
249
250        impl<$($life),*> PartialEq<$lhs> for $rhs {
251            fn eq(&self, other: &$lhs) -> bool {
252                <Path as PartialEq>::eq(Path::new(self), other)
253            }
254        }
255
256        impl<$($life),*> PartialOrd<$rhs> for $lhs {
257            fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> {
258                <Path as PartialOrd>::partial_cmp(self, Path::new(other))
259            }
260        }
261
262        impl<$($life),*> PartialOrd<$lhs> for $rhs {
263            fn partial_cmp(&self, other: &$lhs) -> Option<Ordering> {
264                <Path as PartialOrd>::partial_cmp(Path::new(self), other)
265            }
266        }
267    };
268}
269impl_cmp_raw!(<> PathBuf, str);
270impl_cmp_raw!(<'a> PathBuf, &'a str);
271impl_cmp_raw!(<> Path, str);
272impl_cmp_raw!(<'a> Path, &'a str);
273impl_cmp_raw!(<'a> &'a Path, str);
274impl_cmp_raw!(<> PathBuf, [u8]);
275impl_cmp_raw!(<'a> PathBuf, &'a [u8]);
276impl_cmp_raw!(<> Path, [u8]);
277impl_cmp_raw!(<'a> Path, &'a [u8]);
278impl_cmp_raw!(<'a> &'a Path, [u8]);
279
280impl AsRef<Self> for Path {
281    fn as_ref(&self) -> &Self {
282        self
283    }
284}
285
286#[cfg(any(test, feature = "std"))]
287impl AsRef<Path> for std::path::PathBuf {
288    fn as_ref(&self) -> &Path {
289        self.as_path().as_ref()
290    }
291}
292
293#[cfg(any(test, feature = "std"))]
294impl AsRef<Path> for std::path::Path {
295    fn as_ref(&self) -> &Path {
296        // NB: as of Rust 1.77, `OsStr::as_encoded_bytes` returns
297        // the raw bytes for Unixy platforms.
298        Path::new(self.as_os_str().as_encoded_bytes())
299    }
300}
301
302impl<'a> From<&'a CStr> for &'a Path {
303    fn from(path: &'a CStr) -> Self {
304        Path::from_cstr(path)
305    }
306}
307
308impl<'a> TryFrom<&'a [u8]> for &'a Path {
309    type Error = MissingNullByte;
310
311    fn try_from(path: &'a [u8]) -> Result<Self, Self::Error> {
312        Path::from_bytes_until_null(path)
313    }
314}
315
316impl<'a> From<Result<&'a Path, &'a Path>> for &'a Path {
317    fn from(res: Result<&'a Path, &'a Path>) -> &'a Path {
318        match res {
319            Ok(v) | Err(v) => v,
320        }
321    }
322}
323
324impl fmt::Display for Path {
325    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
326        self.display().fmt(f)
327    }
328}
329
330impl fmt::Debug for Path {
331    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
332        f.debug_tuple("Path").field(&self.display()).finish()
333    }
334}
335
336/// For formatting a slice that might be UTF-8.
337struct MaybeUtf8<'a>(&'a [u8]);
338
339impl MaybeUtf8<'_> {
340    /// Returns the path as `&[u8]`.
341    fn try_as_str(&self) -> Result<&str, &[u8]> {
342        str::from_utf8(self.0).map_err(|_| self.0)
343    }
344}
345
346impl fmt::Display for MaybeUtf8<'_> {
347    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
348        match self.try_as_str() {
349            Ok(s) => s.fmt(f),
350            Err(v) => write!(f, "{v:?}"),
351        }
352    }
353}
354
355impl fmt::Debug for MaybeUtf8<'_> {
356    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
357        match self.try_as_str() {
358            Ok(s) => s.fmt(f),
359            Err(v) => write!(f, "{v:?}"),
360        }
361    }
362}
363
364/// An owned [`Path`].
365#[derive(Eq, PartialEq)]
366pub struct PathBuf {
367    /// NB: unlike `Path`, this is *always* has a null
368    /// terminator.
369    buf: Vec<u8>,
370}
371
372impl PathBuf {
373    const fn new() -> Self {
374        Self { buf: Vec::new() }
375    }
376
377    /// This is a method on `PathBuf` instead of `Path` because
378    /// `PathBuf` always has a null terminator.
379    fn as_ptr(&self) -> *const c_char {
380        self.buf.as_ptr().cast()
381    }
382
383    fn as_path(&self) -> &Path {
384        Path::from_null_terminated_bytes(self.buf.as_slice())
385    }
386
387    /// See `std::path::PathBuf::join`.
388    fn push<P: AsRef<Path>>(&mut self, path: P) {
389        // Get rid of the trailing null byte to simplify the code
390        // below.
391        if let Some(v) = self.buf.pop() {
392            debug_assert_eq!(v, 0);
393        }
394
395        let path = path.as_ref();
396        if path.is_abs() {
397            self.buf = path.as_bytes().to_vec();
398        } else {
399            if !self.buf.is_empty() && !self.buf.ends_with(b"/") {
400                self.buf.push(b'/');
401            }
402            self.buf.extend_from_slice(path.as_bytes());
403        }
404        if !self.buf.ends_with(&[0]) {
405            self.buf.push(0);
406        }
407        debug_assert!(self.buf.ends_with(&[0]));
408    }
409}
410
411impl AsRef<Path> for PathBuf {
412    fn as_ref(&self) -> &Path {
413        self.as_path()
414    }
415}
416
417impl Deref for PathBuf {
418    type Target = Path;
419
420    fn deref(&self) -> &Self::Target {
421        self.as_path()
422    }
423}
424
425impl<'a> From<&'a Path> for PathBuf {
426    fn from(path: &'a Path) -> Self {
427        let mut buf = Self::new();
428        buf.push(path);
429        buf
430    }
431}
432
433impl<P> FromIterator<P> for PathBuf
434where
435    P: AsRef<Path>,
436{
437    fn from_iter<I>(iter: I) -> Self
438    where
439        I: IntoIterator<Item = P>,
440    {
441        let mut buf = Self::new();
442        buf.extend(iter);
443        buf
444    }
445}
446
447impl<P> Extend<P> for PathBuf
448where
449    P: AsRef<Path>,
450{
451    fn extend<I: IntoIterator<Item = P>>(&mut self, iter: I) {
452        for elem in iter {
453            self.push(elem.as_ref());
454        }
455    }
456}
457
458impl fmt::Display for PathBuf {
459    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
460        fmt::Display::fmt(&**self, f)
461    }
462}
463
464impl fmt::Debug for PathBuf {
465    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
466        fmt::Debug::fmt(&**self, f)
467    }
468}
469
470#[cfg(test)]
471mod tests {
472    use core::ptr;
473
474    use super::*;
475
476    macro_rules! path {
477        ($path:literal) => {{
478            let path: &[u8] = $path.as_ref();
479            // SAFETY: `&Path` has the same size as `&[u8]`.
480            unsafe { &*(ptr::from_ref::<[u8]>(path) as *const Path) }
481        }};
482    }
483
484    /// Test [`Path::from_bytes_until_null`].
485    #[test]
486    fn test_path_from_bytes_until_null() {
487        let cases: &[(&[u8], _)] = &[
488            (b"/foo", Err(MissingNullByte(()))),
489            (b"/foo\0", Ok(path!("/foo\0"))),
490            (b"/foo\0\0", Ok(path!("/foo\0"))),
491            (b"/foo\0\0bar\0", Ok(path!("/foo\0"))),
492        ];
493        for (i, (path, want)) in cases.iter().enumerate() {
494            let got = Path::from_bytes_until_null(path);
495            assert_eq!(got, *want, "#{i}");
496        }
497    }
498
499    /// Test [`Path::from_ptr`].
500    #[test]
501    fn test_path_from_ptr() {
502        let cases: &[(&[u8], _)] = &[
503            (b"/foo\0", path!("/foo\0")),
504            (b"/foo\0\0", path!("/foo\0")),
505            (b"/foo\0\0bar\0", path!("/foo\0")),
506        ];
507        for (i, (path, want)) in cases.iter().enumerate() {
508            // SAFETY: all inputs contain a null byte within
509            // `isize::MAX`.
510            let got = unsafe { Path::from_ptr(path.as_ptr().cast()) };
511            assert_eq!(got, *want, "#{i}");
512        }
513    }
514
515    /// Test [`Path::from_Cstr`].
516    #[test]
517    fn test_path_from_cstr() {
518        let cases: &[(&[u8], _)] = &[
519            (b"/foo\0", path!("/foo\0")),
520            (b"/foo\0\0", path!("/foo\0")),
521            (b"/foo\0\0bar\0", path!("/foo\0")),
522        ];
523        for (i, (path, want)) in cases.iter().enumerate() {
524            let cstr = CStr::from_bytes_until_nul(path).unwrap();
525            let got = Path::from_cstr(cstr);
526            assert_eq!(got, *want, "#{i}");
527        }
528    }
529
530    #[test]
531    fn test_path_partial_eq() {
532        let cases = [
533            (path!("/foo"), "/foo"),
534            (path!("/foo\0"), "/foo"),
535            (path!("/foo\0"), "/foo\0"),
536            (path!("/foo"), "/foo\0"),
537        ];
538        for (i, (a, b)) in cases.into_iter().enumerate() {
539            assert_eq!(a, b, "#{i}: (a,b) str");
540            assert_eq!(b, a, "#{i}: (b,a) str");
541            assert_eq!(a, b.as_bytes(), "#{i}: (a,b) bytes");
542            assert_eq!(b.as_bytes(), a, "#{i}: (b,a) bytes");
543            assert_eq!(a.to_path_buf(), b, "#{i}: (a,b) PathBuf");
544            assert_eq!(b, a.to_path_buf(), "#{i}: (b,a) PathBuf");
545        }
546    }
547
548    /// Test [`Path::join`].
549    #[test]
550    fn test_path_join() {
551        let cases: &[(&[&str], &Path)] = &[
552            (&["foo"], path!("foo")),
553            (&["foo", "bar"], path!("foo/bar")),
554            (&["foo", "bar", "baz"], path!("foo/bar/baz")),
555            (&["foo/", "bar/", "baz"], path!("foo/bar/baz")),
556            (&["foo/", "bar/", "baz/"], path!("foo/bar/baz/")),
557            (&["foo/", "bar/", "/baz/"], path!("/baz/")),
558            (&["foo/", "/bar/", "/baz/"], path!("/baz/")),
559            (&["/foo/", "/bar/", "/baz/"], path!("/baz/")),
560            (&["/foo/", "/bar/", "baz//"], path!("/bar/baz//")),
561            (&["foo/", "bar", "", "/", "", "baz//"], path!("/baz//")),
562        ];
563        for (i, (elems, want)) in cases.iter().enumerate() {
564            let got: PathBuf = elems.iter().map(Path::new).collect();
565            assert_eq!(got, *want, "#{i}");
566        }
567    }
568}