realpath_ext/
lib.rs

1#![cfg_attr(not(feature = "std"), no_std)]
2
3mod slicevec;
4mod util;
5
6use slicevec::SliceVec;
7use util::{ComponentIter, ComponentStack, SymlinkCounter};
8
9#[cfg(target_family = "unix")]
10const PATH_MAX: usize = libc::PATH_MAX as usize;
11#[cfg(target_os = "wasi")]
12const PATH_MAX: usize = 4096;
13
14/// "Normalize" the given path.
15///
16/// This is a wrapper around [`normpath_raw()`] that allocates a buffer; see that function's
17/// documentation for details.
18#[cfg(feature = "std")]
19pub fn normpath<P: AsRef<std::path::Path>>(path: P) -> std::io::Result<std::path::PathBuf> {
20    #[cfg(target_family = "unix")]
21    use std::os::unix::prelude::*;
22    #[cfg(target_os = "wasi")]
23    use std::os::wasi::prelude::*;
24
25    let path = path.as_ref().as_os_str().as_bytes();
26
27    let mut buf = vec![0; path.len()];
28
29    let len = normpath_raw(path, &mut buf).map_err(std::io::Error::from_raw_os_error)?;
30    buf.truncate(len);
31
32    Ok(std::ffi::OsString::from_vec(buf).into())
33}
34
35/// "Normalize" the given path.
36///
37/// Other than the differences described below, the `path` and `buf` arguments to this function,
38/// and the return values, have the same meaning as for [`realpath_raw()`].
39///
40/// This function was designed after Python's `os.path.normpath()`. It will remove `.` elements,
41/// condense extra slashes, and collapse `..` entries. Think of it as a version of
42/// [`realpath_raw()`] that doesn't actually touch the filesystem. (As a consequence of this, if
43/// the given `path` is relative, the returned path will also be relative.)
44///
45/// Note that because this function doesn't actually touch the filesystem, the returned path may
46/// not refer to the correct file! Certain combinations of `..` and/or symbolic links can cause
47/// this; the only way to get the definitive canonicalized path is to use [`realpath_raw()`].
48///
49/// Example usage:
50///
51/// ```
52/// # use realpath_ext::normpath_raw;
53/// let mut buf = [0; libc::PATH_MAX as usize];
54/// let n = normpath_raw(b"/a/b/./c/../", &mut buf).unwrap();
55/// assert_eq!(&buf[..n], b"/a/b");
56/// ```
57///
58/// # Errors
59///
60/// This function may fail with the following errors:
61///
62/// - `ENAMETOOLONG`: The given `buf` is not long enough to store the normalized path.
63/// - `ENOENT`: The given `path` is empty.
64/// - `EINVAL`: The given `path` contains a NUL byte (not allowed in \*nix paths).
65pub fn normpath_raw(path: &[u8], buf: &mut [u8]) -> Result<usize, i32> {
66    let mut buf = SliceVec::empty(buf);
67
68    for component in ComponentIter::new(path)? {
69        if component == b"/" || component == b"//" {
70            buf.replace(component)?;
71        } else if component == b".." {
72            buf.make_parent_path()?;
73        } else {
74            if !matches!(buf.as_ref(), b"/" | b"//" | b"") {
75                buf.push(b'/')?;
76            }
77            buf.extend_from_slice(component)?;
78        }
79    }
80
81    if buf.is_empty() {
82        buf.push(b'.')?;
83    }
84
85    Ok(buf.len())
86}
87
88bitflags::bitflags! {
89    /// Flags that modify path resolution.
90    ///
91    /// These flags were modeled after the options to the GNU `realpath` program.
92    pub struct RealpathFlags: u32 {
93        /// Allow any component of the given path to be missing, inaccessible, or not a directory
94        /// when it should be.
95        const ALLOW_MISSING = 0x01;
96        /// Allow the last component of the given path to be missing.
97        const ALLOW_LAST_MISSING = 0x02;
98        /// Do not resolve symbolic links as they are encountered.
99        ///
100        /// Note that if this option is passed, the returned path may not refer to the correct file!
101        /// Certain combinations of `..` and/or symbolic links can cause this.
102        const IGNORE_SYMLINKS = 0x04;
103    }
104}
105
106/// A "builder" that allows customizing options to `realpath_raw()`.
107///
108/// `realpath(path, flags)` is equivalent to `RealpathBuilder::new().flags(flags).realpath(path)`.
109#[cfg(feature = "std")]
110#[derive(Clone)]
111pub struct RealpathBuilder {
112    max_len: usize,
113    flags: RealpathFlags,
114}
115
116#[cfg(feature = "std")]
117impl RealpathBuilder {
118    /// Create a new "builder".
119    ///
120    /// The returned builder has its `flags` empty. `max_size` will be set to 32768 on WASI, and
121    /// `PATH_MAX` on other OSes.
122    #[inline]
123    pub fn new() -> Self {
124        Self {
125            max_len: if cfg!(target_os = "wasi") {
126                32768
127            } else {
128                PATH_MAX
129            },
130            flags: RealpathFlags::empty(),
131        }
132    }
133
134    /// Set the maximum path length allowed before failing with `ENAMETOOLONG`.
135    ///
136    /// Generally speaking, this is only useful if the OS supports paths longer than `PATH_MAX` (for
137    /// example, this is the case on WASI).
138    ///
139    /// Note: In some cases, [`Self::realpath()`] may allocate a smaller buffer than this length,
140    /// then expand it and retry if resolution fails with `ENAMETOOLONG`.
141    #[inline]
142    pub fn max_len(&mut self, max_len: usize) -> &mut Self {
143        self.max_len = max_len;
144        self
145    }
146
147    /// Set the flags used to modify path resolution.
148    ///
149    /// See [`RealpathFlags`] for more information.
150    #[inline]
151    pub fn flags(&mut self, flags: RealpathFlags) -> &mut Self {
152        self.flags = flags;
153        self
154    }
155
156    /// Canonicalize the given path.
157    pub fn realpath<P: AsRef<std::path::Path>>(
158        &self,
159        path: P,
160    ) -> std::io::Result<std::path::PathBuf> {
161        #[cfg(target_family = "unix")]
162        use std::os::unix::prelude::*;
163        #[cfg(target_os = "wasi")]
164        use std::os::wasi::prelude::*;
165
166        let len = PATH_MAX.min(self.max_len);
167        let mut buf = vec![0; len];
168        let mut tmp = vec![0; len + 100];
169
170        loop {
171            match realpath_raw_inner(
172                path.as_ref().as_os_str().as_bytes(),
173                &mut buf,
174                &mut tmp,
175                self.flags,
176            ) {
177                Ok(len) => {
178                    buf.truncate(len);
179                    return Ok(std::ffi::OsString::from_vec(buf).into());
180                }
181
182                Err(libc::ENAMETOOLONG) if buf.len() < self.max_len => {
183                    // Resize until we hit the maximum limit
184                    let new_len = buf.len().saturating_mul(2).min(self.max_len);
185                    buf.resize(new_len, 0);
186                    tmp.resize(new_len + 100, 0);
187                }
188                Err(eno) => return Err(std::io::Error::from_raw_os_error(eno)),
189            }
190        }
191    }
192}
193
194#[cfg(feature = "std")]
195impl Default for RealpathBuilder {
196    #[inline]
197    fn default() -> Self {
198        Self::new()
199    }
200}
201
202/// Canonicalize the given path.
203///
204/// This is effectively a wrapper around [`realpath_raw()`] that allocates a buffer; see that
205/// function's documentation for details.
206///
207/// Note that on non-WASI OSes, this function is limited to resolving paths of `PATH_MAX` bytes.
208/// See [`RealpathBuilder`] for more information.
209#[cfg(feature = "std")]
210pub fn realpath<P: AsRef<std::path::Path>>(
211    path: P,
212    flags: RealpathFlags,
213) -> std::io::Result<std::path::PathBuf> {
214    RealpathBuilder::new().flags(flags).realpath(path)
215}
216
217/// A "builder" that allows customizing options to `realpath_raw()`.
218///
219/// `realpath_raw(path, buf, flags)` is equivalent to
220/// `RealpathRawBuilder::new().flags(flags).realpath_raw(path, buf)`.
221pub struct RealpathRawBuilder<'a> {
222    flags: RealpathFlags,
223    tmp: Option<&'a mut [u8]>,
224}
225
226impl<'a> RealpathRawBuilder<'a> {
227    /// Create a new "builder".
228    ///
229    /// The returned builder has its `flags` empty, and `temp_buffer` set to `None`.
230    #[inline]
231    pub fn new() -> Self {
232        Self {
233            flags: RealpathFlags::empty(),
234            tmp: None,
235        }
236    }
237
238    /// Set the flags used to modify path resolution.
239    ///
240    /// See [`RealpathFlags`] for more information.
241    #[inline]
242    pub fn flags(&mut self, flags: RealpathFlags) -> &mut Self {
243        self.flags = flags;
244        self
245    }
246
247    /// Set the temporary buffer used to store intermediate results.
248    ///
249    /// It's recommended to make this buffer somewhat larger than the `buf` passed to
250    /// [`Self::realpath_raw()`], since the current algorithm requires a bit of overhead in the
251    /// temporary buffer.
252    ///
253    /// If `tmp` is `None` (default), a temporary buffer of length `PATH_MAX + 100` will be
254    /// allocated on the stack.
255    #[inline]
256    pub fn temp_buffer(&mut self, tmp: Option<&'a mut [u8]>) -> &mut Self {
257        self.tmp = tmp;
258        self
259    }
260
261    /// Canonicalize the path given by `path` into the buffer given by `buf`.
262    ///
263    /// `path`, `buf`, and the return value have the same meanings as for [`realpath_raw()`].
264    #[inline]
265    pub fn realpath_raw(&mut self, path: &[u8], buf: &mut [u8]) -> Result<usize, i32> {
266        if let Some(tmp) = self.tmp.as_mut() {
267            realpath_raw_inner(path, buf, tmp, self.flags)
268        } else {
269            realpath_raw(path, buf, self.flags)
270        }
271    }
272}
273
274impl Default for RealpathRawBuilder<'_> {
275    #[inline]
276    fn default() -> Self {
277        Self::new()
278    }
279}
280
281/// Canonicalize the given path.
282///
283/// This function resolves the path specified by `path`, storing the result in `buf`. On success,
284/// the length of the resolved path is returned; on error, an OS error code is returned.
285///
286/// If `flags` is specified as `RealpathFlags::empty()`, this is roughly equivalent to the libc's
287/// `realpath()`. Otherwise, the given `flags` modify aspects of path resolution.
288///
289/// This function does not allocate any memory. It will only call the following C functions:
290/// - `sysconf(_SC_SYMLOOP_MAX)`
291/// - `readlink()`
292/// - `stat()` (only if it needs to be verified that the path is a directory)
293/// - `getcwd()` (only if the given `path` is relative and does not contain a reference to an
294///   absolute symbolic link)
295///
296/// Example usage:
297///
298/// ```
299/// # use realpath_ext::{RealpathFlags, realpath_raw};
300/// let mut buf = [0; libc::PATH_MAX as usize];
301/// let n = realpath_raw(b"///", &mut buf, RealpathFlags::empty()).unwrap();
302/// assert_eq!(&buf[..n], b"/");
303/// ```
304///
305/// The returned path will ALWAYS be absolute.
306///
307/// # Errors
308///
309/// This function may fail with the following errors:
310///
311/// - `ENAMETOOLONG`: Either:
312///    1. The given `buf` is not long enough to store the canonicalized path, or
313///    2. The current working directory cannot be represented in a buffer of length `PATH_MAX`, or
314///    3. An intermediate result created by combining any symbolic link paths exceeded the system
315///       `PATH_MAX`. (Note that the actual limit is slightly higher than `PATH_MAX` to account for
316///       storage overhead; this should not be relied upon.)
317/// - `EINVAL`: The given `path` contains a NUL byte (not allowed in \*nix paths).
318/// - `ELOOP`: Too many symbolic links were encounted during resolution.
319///
320///   This function will use `sysconf()` to check the system's `SYMLOOP_MAX` value to determine
321///   the limit. If that fails (for example, it always fails on glibc), this function will fall
322///   back on a limit of 40 (which is Linux's limit).
323/// - `ENOENT`/`EACCES`/`ENOTDIR`: The given `path` (or a component of it) does not exist, is
324///   inaccessible, or is not a directory (respectively).
325///
326///   `ENOENT` and `EACCES` may also be returned if `getcwd()` had to be called (see above for the
327///   conditions in which this may be necessary) and the path to the current directory cannot be
328///   obtained.
329///
330///   (Note that these errors may be ignored depending on the specified `flags`.)
331/// - `EIO`: An I/O error occurred while interacting with the filesystem.
332pub fn realpath_raw(path: &[u8], buf: &mut [u8], flags: RealpathFlags) -> Result<usize, i32> {
333    let mut tmp = [0u8; PATH_MAX + 100];
334    realpath_raw_inner(path, buf, &mut tmp, flags)
335}
336
337fn realpath_raw_inner(
338    path: &[u8],
339    buf: &mut [u8],
340    tmp: &mut [u8],
341    flags: RealpathFlags,
342) -> Result<usize, i32> {
343    let mut stack = ComponentStack::new(tmp);
344
345    let mut path_it = ComponentIter::new(path)?;
346
347    let mut buf = SliceVec::empty(buf);
348
349    let mut links = SymlinkCounter::new();
350
351    while let Some(component) = stack.next().or_else(|| path_it.next()) {
352        debug_assert_ne!(buf.as_ref(), b".");
353
354        if component == b"/" || component == b"//" {
355            buf.replace(component)?;
356        } else if component == b".." {
357            buf.make_parent_path()?;
358        } else {
359            let oldlen = buf.len();
360
361            if !matches!(buf.as_ref(), b"/" | b"//" | b"") {
362                buf.push(b'/')?;
363            }
364            buf.extend_from_slice(component)?;
365            buf.push(b'\0')?;
366
367            let res = if flags.contains(RealpathFlags::IGNORE_SYMLINKS) {
368                // If IGNORE_SYMLINKS was passed, call readlink() to make sure it exists, but then
369                // act like it isn't a symlink if it is
370                Err(unsafe { util::readlink_empty(buf.as_ptr()) }
371                    .err()
372                    .unwrap_or(libc::EINVAL))
373            } else {
374                unsafe { stack.push_readlink(buf.as_ptr()) }
375            };
376
377            match res {
378                Ok(()) => {
379                    links.advance()?;
380                    debug_assert!(buf.len() > oldlen);
381                    buf.truncate(oldlen);
382                }
383
384                // Not a symlink; just remove the trailing NUL
385                Err(libc::EINVAL) => {
386                    buf.pop();
387                }
388
389                // In these conditions, components of the path are allowed to not exist/not be
390                // accessible/not be a directory
391                Err(libc::ENOENT) | Err(libc::EACCES) | Err(libc::ENOTDIR)
392                    if flags.contains(RealpathFlags::ALLOW_MISSING) =>
393                {
394                    buf.pop();
395                }
396
397                Err(libc::ENOENT)
398                    if flags.contains(RealpathFlags::ALLOW_LAST_MISSING)
399                        && stack.is_empty()
400                        && path_it.is_empty() =>
401                {
402                    buf.pop();
403                }
404
405                Err(eno) => return Err(eno),
406            }
407        }
408    }
409
410    /// If required, check that `buf` refers to a directory.
411    fn maybe_check_isdir(path: &[u8], buf: &mut SliceVec, flags: RealpathFlags) -> Result<(), i32> {
412        if (path.ends_with(b"/") || path.ends_with(b"/."))
413            && !flags.contains(RealpathFlags::ALLOW_MISSING)
414        {
415            buf.push(b'\0')?;
416            match unsafe { util::check_isdir(buf.as_ptr()) } {
417                Ok(()) => (),
418                Err(libc::ENOENT) if flags.contains(RealpathFlags::ALLOW_LAST_MISSING) => (),
419                Err(eno) => return Err(eno),
420            }
421            buf.pop();
422        }
423
424        Ok(())
425    }
426
427    let mut tmp = SliceVec::empty(stack.clear());
428
429    if buf.as_ref() == b"" {
430        util::getcwd(&mut buf)?;
431        // We know `buf` refers to a directory
432    } else if buf.as_ref() == b".." {
433        util::getcwd(&mut buf)?;
434        buf.make_parent_path()?;
435        // We know `buf` refers to a directory
436    } else if buf.starts_with(b"../") {
437        let mut n = count_leading_dotdot(&buf);
438        if &buf[(n * 3)..] == b".." {
439            buf.clear();
440            n += 1;
441            // We know `buf` refers to a directory
442        } else {
443            maybe_check_isdir(path, &mut buf, flags)?;
444            buf.remove_range(0..(n * 3 - 1));
445        }
446
447        util::getcwd(&mut tmp)?;
448
449        for _ in 0..n {
450            tmp.make_parent_path()?;
451        }
452
453        buf.insert_from_slice(0, &tmp)?;
454    } else if !buf.starts_with(b"/") {
455        debug_assert!(!buf.starts_with(b"./"));
456        debug_assert_ne!(buf.as_ref(), b".");
457
458        maybe_check_isdir(path, &mut buf, flags)?;
459
460        tmp.clear();
461        util::getcwd(&mut tmp)?;
462        debug_assert!(tmp.len() > 0);
463        tmp.push(b'/')?;
464        buf.insert_from_slice(0, &tmp)?;
465    } else if !matches!(buf.as_ref(), b"/" | b"//") {
466        // We don't have to check "/" or "//", but we do have to check other paths
467        maybe_check_isdir(path, &mut buf, flags)?;
468    }
469
470    Ok(buf.len())
471}
472
473fn count_leading_dotdot(mut s: &[u8]) -> usize {
474    let mut n = 0;
475    while s.starts_with(b"../") {
476        n += 1;
477        s = &s[3..];
478    }
479    n
480}
481
482#[cfg(test)]
483mod tests {
484    use super::*;
485
486    #[test]
487    fn test_count_leading_dotdot() {
488        assert_eq!(count_leading_dotdot(b""), 0);
489        assert_eq!(count_leading_dotdot(b".."), 0);
490        assert_eq!(count_leading_dotdot(b"../a"), 1);
491        assert_eq!(count_leading_dotdot(b"../../a"), 2);
492        assert_eq!(count_leading_dotdot(b"../a/../b"), 1);
493    }
494
495    #[test]
496    fn test_normpath_raw() {
497        let mut buf = [0; 100];
498
499        let n = normpath_raw(b"/", &mut buf).unwrap();
500        assert_eq!(&buf[..n], b"/");
501
502        let n = normpath_raw(b".", &mut buf).unwrap();
503        assert_eq!(&buf[..n], b".");
504
505        let n = normpath_raw(b"a", &mut buf).unwrap();
506        assert_eq!(&buf[..n], b"a");
507
508        let n = normpath_raw(b"a/..", &mut buf).unwrap();
509        assert_eq!(&buf[..n], b".");
510
511        let n = normpath_raw(b"//a/./b/../c/", &mut buf).unwrap();
512        assert_eq!(&buf[..n], b"//a/c");
513
514        assert_eq!(normpath_raw(b"", &mut buf).unwrap_err(), libc::ENOENT);
515        assert_eq!(normpath_raw(b"\0", &mut buf).unwrap_err(), libc::EINVAL);
516    }
517
518    #[cfg(feature = "std")]
519    #[test]
520    fn test_normpath() {
521        assert_eq!(normpath("/").unwrap().as_os_str(), "/");
522        assert_eq!(normpath(".").unwrap().as_os_str(), ".");
523        assert_eq!(normpath("a/..").unwrap().as_os_str(), ".");
524        assert_eq!(normpath("//a/./b/../c/").unwrap().as_os_str(), "//a/c");
525
526        assert_eq!(normpath("").unwrap_err().raw_os_error(), Some(libc::ENOENT));
527        assert_eq!(
528            normpath("\0").unwrap_err().raw_os_error(),
529            Some(libc::EINVAL)
530        );
531    }
532}