realpath_ext/lib.rs
1#![cfg_attr(not(feature = "std"), no_std)]
2
3mod slicevec;
4mod util;
5
6use slicevec::SliceVec;
7use util::{ComponentIter, ComponentStack, SymlinkCounter};
8
9#[cfg(target_family = "unix")]
10const PATH_MAX: usize = libc::PATH_MAX as usize;
11#[cfg(target_os = "wasi")]
12const PATH_MAX: usize = 4096;
13
14/// "Normalize" the given path.
15///
16/// This is a wrapper around [`normpath_raw()`] that allocates a buffer; see that function's
17/// documentation for details.
18#[cfg(feature = "std")]
19pub fn normpath<P: AsRef<std::path::Path>>(path: P) -> std::io::Result<std::path::PathBuf> {
20 #[cfg(target_family = "unix")]
21 use std::os::unix::prelude::*;
22 #[cfg(target_os = "wasi")]
23 use std::os::wasi::prelude::*;
24
25 let path = path.as_ref().as_os_str().as_bytes();
26
27 let mut buf = vec![0; path.len()];
28
29 let len = normpath_raw(path, &mut buf).map_err(std::io::Error::from_raw_os_error)?;
30 buf.truncate(len);
31
32 Ok(std::ffi::OsString::from_vec(buf).into())
33}
34
35/// "Normalize" the given path.
36///
37/// Other than the differences described below, the `path` and `buf` arguments to this function,
38/// and the return values, have the same meaning as for [`realpath_raw()`].
39///
40/// This function was designed after Python's `os.path.normpath()`. It will remove `.` elements,
41/// condense extra slashes, and collapse `..` entries. Think of it as a version of
42/// [`realpath_raw()`] that doesn't actually touch the filesystem. (As a consequence of this, if
43/// the given `path` is relative, the returned path will also be relative.)
44///
45/// Note that because this function doesn't actually touch the filesystem, the returned path may
46/// not refer to the correct file! Certain combinations of `..` and/or symbolic links can cause
47/// this; the only way to get the definitive canonicalized path is to use [`realpath_raw()`].
48///
49/// Example usage:
50///
51/// ```
52/// # use realpath_ext::normpath_raw;
53/// let mut buf = [0; libc::PATH_MAX as usize];
54/// let n = normpath_raw(b"/a/b/./c/../", &mut buf).unwrap();
55/// assert_eq!(&buf[..n], b"/a/b");
56/// ```
57///
58/// # Errors
59///
60/// This function may fail with the following errors:
61///
62/// - `ENAMETOOLONG`: The given `buf` is not long enough to store the normalized path.
63/// - `ENOENT`: The given `path` is empty.
64/// - `EINVAL`: The given `path` contains a NUL byte (not allowed in \*nix paths).
65pub fn normpath_raw(path: &[u8], buf: &mut [u8]) -> Result<usize, i32> {
66 let mut buf = SliceVec::empty(buf);
67
68 for component in ComponentIter::new(path)? {
69 if component == b"/" || component == b"//" {
70 buf.replace(component)?;
71 } else if component == b".." {
72 buf.make_parent_path()?;
73 } else {
74 if !matches!(buf.as_ref(), b"/" | b"//" | b"") {
75 buf.push(b'/')?;
76 }
77 buf.extend_from_slice(component)?;
78 }
79 }
80
81 if buf.is_empty() {
82 buf.push(b'.')?;
83 }
84
85 Ok(buf.len())
86}
87
88bitflags::bitflags! {
89 /// Flags that modify path resolution.
90 ///
91 /// These flags were modeled after the options to the GNU `realpath` program.
92 pub struct RealpathFlags: u32 {
93 /// Allow any component of the given path to be missing, inaccessible, or not a directory
94 /// when it should be.
95 const ALLOW_MISSING = 0x01;
96 /// Allow the last component of the given path to be missing.
97 const ALLOW_LAST_MISSING = 0x02;
98 /// Do not resolve symbolic links as they are encountered.
99 ///
100 /// Note that if this option is passed, the returned path may not refer to the correct file!
101 /// Certain combinations of `..` and/or symbolic links can cause this.
102 const IGNORE_SYMLINKS = 0x04;
103 }
104}
105
106/// A "builder" that allows customizing options to `realpath_raw()`.
107///
108/// `realpath(path, flags)` is equivalent to `RealpathBuilder::new().flags(flags).realpath(path)`.
109#[cfg(feature = "std")]
110#[derive(Clone)]
111pub struct RealpathBuilder {
112 max_len: usize,
113 flags: RealpathFlags,
114}
115
116#[cfg(feature = "std")]
117impl RealpathBuilder {
118 /// Create a new "builder".
119 ///
120 /// The returned builder has its `flags` empty. `max_size` will be set to 32768 on WASI, and
121 /// `PATH_MAX` on other OSes.
122 #[inline]
123 pub fn new() -> Self {
124 Self {
125 max_len: if cfg!(target_os = "wasi") {
126 32768
127 } else {
128 PATH_MAX
129 },
130 flags: RealpathFlags::empty(),
131 }
132 }
133
134 /// Set the maximum path length allowed before failing with `ENAMETOOLONG`.
135 ///
136 /// Generally speaking, this is only useful if the OS supports paths longer than `PATH_MAX` (for
137 /// example, this is the case on WASI).
138 ///
139 /// Note: In some cases, [`Self::realpath()`] may allocate a smaller buffer than this length,
140 /// then expand it and retry if resolution fails with `ENAMETOOLONG`.
141 #[inline]
142 pub fn max_len(&mut self, max_len: usize) -> &mut Self {
143 self.max_len = max_len;
144 self
145 }
146
147 /// Set the flags used to modify path resolution.
148 ///
149 /// See [`RealpathFlags`] for more information.
150 #[inline]
151 pub fn flags(&mut self, flags: RealpathFlags) -> &mut Self {
152 self.flags = flags;
153 self
154 }
155
156 /// Canonicalize the given path.
157 pub fn realpath<P: AsRef<std::path::Path>>(
158 &self,
159 path: P,
160 ) -> std::io::Result<std::path::PathBuf> {
161 #[cfg(target_family = "unix")]
162 use std::os::unix::prelude::*;
163 #[cfg(target_os = "wasi")]
164 use std::os::wasi::prelude::*;
165
166 let len = PATH_MAX.min(self.max_len);
167 let mut buf = vec![0; len];
168 let mut tmp = vec![0; len + 100];
169
170 loop {
171 match realpath_raw_inner(
172 path.as_ref().as_os_str().as_bytes(),
173 &mut buf,
174 &mut tmp,
175 self.flags,
176 ) {
177 Ok(len) => {
178 buf.truncate(len);
179 return Ok(std::ffi::OsString::from_vec(buf).into());
180 }
181
182 Err(libc::ENAMETOOLONG) if buf.len() < self.max_len => {
183 // Resize until we hit the maximum limit
184 let new_len = buf.len().saturating_mul(2).min(self.max_len);
185 buf.resize(new_len, 0);
186 tmp.resize(new_len + 100, 0);
187 }
188 Err(eno) => return Err(std::io::Error::from_raw_os_error(eno)),
189 }
190 }
191 }
192}
193
194#[cfg(feature = "std")]
195impl Default for RealpathBuilder {
196 #[inline]
197 fn default() -> Self {
198 Self::new()
199 }
200}
201
202/// Canonicalize the given path.
203///
204/// This is effectively a wrapper around [`realpath_raw()`] that allocates a buffer; see that
205/// function's documentation for details.
206///
207/// Note that on non-WASI OSes, this function is limited to resolving paths of `PATH_MAX` bytes.
208/// See [`RealpathBuilder`] for more information.
209#[cfg(feature = "std")]
210pub fn realpath<P: AsRef<std::path::Path>>(
211 path: P,
212 flags: RealpathFlags,
213) -> std::io::Result<std::path::PathBuf> {
214 RealpathBuilder::new().flags(flags).realpath(path)
215}
216
217/// A "builder" that allows customizing options to `realpath_raw()`.
218///
219/// `realpath_raw(path, buf, flags)` is equivalent to
220/// `RealpathRawBuilder::new().flags(flags).realpath_raw(path, buf)`.
221pub struct RealpathRawBuilder<'a> {
222 flags: RealpathFlags,
223 tmp: Option<&'a mut [u8]>,
224}
225
226impl<'a> RealpathRawBuilder<'a> {
227 /// Create a new "builder".
228 ///
229 /// The returned builder has its `flags` empty, and `temp_buffer` set to `None`.
230 #[inline]
231 pub fn new() -> Self {
232 Self {
233 flags: RealpathFlags::empty(),
234 tmp: None,
235 }
236 }
237
238 /// Set the flags used to modify path resolution.
239 ///
240 /// See [`RealpathFlags`] for more information.
241 #[inline]
242 pub fn flags(&mut self, flags: RealpathFlags) -> &mut Self {
243 self.flags = flags;
244 self
245 }
246
247 /// Set the temporary buffer used to store intermediate results.
248 ///
249 /// It's recommended to make this buffer somewhat larger than the `buf` passed to
250 /// [`Self::realpath_raw()`], since the current algorithm requires a bit of overhead in the
251 /// temporary buffer.
252 ///
253 /// If `tmp` is `None` (default), a temporary buffer of length `PATH_MAX + 100` will be
254 /// allocated on the stack.
255 #[inline]
256 pub fn temp_buffer(&mut self, tmp: Option<&'a mut [u8]>) -> &mut Self {
257 self.tmp = tmp;
258 self
259 }
260
261 /// Canonicalize the path given by `path` into the buffer given by `buf`.
262 ///
263 /// `path`, `buf`, and the return value have the same meanings as for [`realpath_raw()`].
264 #[inline]
265 pub fn realpath_raw(&mut self, path: &[u8], buf: &mut [u8]) -> Result<usize, i32> {
266 if let Some(tmp) = self.tmp.as_mut() {
267 realpath_raw_inner(path, buf, tmp, self.flags)
268 } else {
269 realpath_raw(path, buf, self.flags)
270 }
271 }
272}
273
274impl Default for RealpathRawBuilder<'_> {
275 #[inline]
276 fn default() -> Self {
277 Self::new()
278 }
279}
280
281/// Canonicalize the given path.
282///
283/// This function resolves the path specified by `path`, storing the result in `buf`. On success,
284/// the length of the resolved path is returned; on error, an OS error code is returned.
285///
286/// If `flags` is specified as `RealpathFlags::empty()`, this is roughly equivalent to the libc's
287/// `realpath()`. Otherwise, the given `flags` modify aspects of path resolution.
288///
289/// This function does not allocate any memory. It will only call the following C functions:
290/// - `sysconf(_SC_SYMLOOP_MAX)`
291/// - `readlink()`
292/// - `stat()` (only if it needs to be verified that the path is a directory)
293/// - `getcwd()` (only if the given `path` is relative and does not contain a reference to an
294/// absolute symbolic link)
295///
296/// Example usage:
297///
298/// ```
299/// # use realpath_ext::{RealpathFlags, realpath_raw};
300/// let mut buf = [0; libc::PATH_MAX as usize];
301/// let n = realpath_raw(b"///", &mut buf, RealpathFlags::empty()).unwrap();
302/// assert_eq!(&buf[..n], b"/");
303/// ```
304///
305/// The returned path will ALWAYS be absolute.
306///
307/// # Errors
308///
309/// This function may fail with the following errors:
310///
311/// - `ENAMETOOLONG`: Either:
312/// 1. The given `buf` is not long enough to store the canonicalized path, or
313/// 2. The current working directory cannot be represented in a buffer of length `PATH_MAX`, or
314/// 3. An intermediate result created by combining any symbolic link paths exceeded the system
315/// `PATH_MAX`. (Note that the actual limit is slightly higher than `PATH_MAX` to account for
316/// storage overhead; this should not be relied upon.)
317/// - `EINVAL`: The given `path` contains a NUL byte (not allowed in \*nix paths).
318/// - `ELOOP`: Too many symbolic links were encounted during resolution.
319///
320/// This function will use `sysconf()` to check the system's `SYMLOOP_MAX` value to determine
321/// the limit. If that fails (for example, it always fails on glibc), this function will fall
322/// back on a limit of 40 (which is Linux's limit).
323/// - `ENOENT`/`EACCES`/`ENOTDIR`: The given `path` (or a component of it) does not exist, is
324/// inaccessible, or is not a directory (respectively).
325///
326/// `ENOENT` and `EACCES` may also be returned if `getcwd()` had to be called (see above for the
327/// conditions in which this may be necessary) and the path to the current directory cannot be
328/// obtained.
329///
330/// (Note that these errors may be ignored depending on the specified `flags`.)
331/// - `EIO`: An I/O error occurred while interacting with the filesystem.
332pub fn realpath_raw(path: &[u8], buf: &mut [u8], flags: RealpathFlags) -> Result<usize, i32> {
333 let mut tmp = [0u8; PATH_MAX + 100];
334 realpath_raw_inner(path, buf, &mut tmp, flags)
335}
336
337fn realpath_raw_inner(
338 path: &[u8],
339 buf: &mut [u8],
340 tmp: &mut [u8],
341 flags: RealpathFlags,
342) -> Result<usize, i32> {
343 let mut stack = ComponentStack::new(tmp);
344
345 let mut path_it = ComponentIter::new(path)?;
346
347 let mut buf = SliceVec::empty(buf);
348
349 let mut links = SymlinkCounter::new();
350
351 while let Some(component) = stack.next().or_else(|| path_it.next()) {
352 debug_assert_ne!(buf.as_ref(), b".");
353
354 if component == b"/" || component == b"//" {
355 buf.replace(component)?;
356 } else if component == b".." {
357 buf.make_parent_path()?;
358 } else {
359 let oldlen = buf.len();
360
361 if !matches!(buf.as_ref(), b"/" | b"//" | b"") {
362 buf.push(b'/')?;
363 }
364 buf.extend_from_slice(component)?;
365 buf.push(b'\0')?;
366
367 let res = if flags.contains(RealpathFlags::IGNORE_SYMLINKS) {
368 // If IGNORE_SYMLINKS was passed, call readlink() to make sure it exists, but then
369 // act like it isn't a symlink if it is
370 Err(unsafe { util::readlink_empty(buf.as_ptr()) }
371 .err()
372 .unwrap_or(libc::EINVAL))
373 } else {
374 unsafe { stack.push_readlink(buf.as_ptr()) }
375 };
376
377 match res {
378 Ok(()) => {
379 links.advance()?;
380 debug_assert!(buf.len() > oldlen);
381 buf.truncate(oldlen);
382 }
383
384 // Not a symlink; just remove the trailing NUL
385 Err(libc::EINVAL) => {
386 buf.pop();
387 }
388
389 // In these conditions, components of the path are allowed to not exist/not be
390 // accessible/not be a directory
391 Err(libc::ENOENT) | Err(libc::EACCES) | Err(libc::ENOTDIR)
392 if flags.contains(RealpathFlags::ALLOW_MISSING) =>
393 {
394 buf.pop();
395 }
396
397 Err(libc::ENOENT)
398 if flags.contains(RealpathFlags::ALLOW_LAST_MISSING)
399 && stack.is_empty()
400 && path_it.is_empty() =>
401 {
402 buf.pop();
403 }
404
405 Err(eno) => return Err(eno),
406 }
407 }
408 }
409
410 /// If required, check that `buf` refers to a directory.
411 fn maybe_check_isdir(path: &[u8], buf: &mut SliceVec, flags: RealpathFlags) -> Result<(), i32> {
412 if (path.ends_with(b"/") || path.ends_with(b"/."))
413 && !flags.contains(RealpathFlags::ALLOW_MISSING)
414 {
415 buf.push(b'\0')?;
416 match unsafe { util::check_isdir(buf.as_ptr()) } {
417 Ok(()) => (),
418 Err(libc::ENOENT) if flags.contains(RealpathFlags::ALLOW_LAST_MISSING) => (),
419 Err(eno) => return Err(eno),
420 }
421 buf.pop();
422 }
423
424 Ok(())
425 }
426
427 let mut tmp = SliceVec::empty(stack.clear());
428
429 if buf.as_ref() == b"" {
430 util::getcwd(&mut buf)?;
431 // We know `buf` refers to a directory
432 } else if buf.as_ref() == b".." {
433 util::getcwd(&mut buf)?;
434 buf.make_parent_path()?;
435 // We know `buf` refers to a directory
436 } else if buf.starts_with(b"../") {
437 let mut n = count_leading_dotdot(&buf);
438 if &buf[(n * 3)..] == b".." {
439 buf.clear();
440 n += 1;
441 // We know `buf` refers to a directory
442 } else {
443 maybe_check_isdir(path, &mut buf, flags)?;
444 buf.remove_range(0..(n * 3 - 1));
445 }
446
447 util::getcwd(&mut tmp)?;
448
449 for _ in 0..n {
450 tmp.make_parent_path()?;
451 }
452
453 buf.insert_from_slice(0, &tmp)?;
454 } else if !buf.starts_with(b"/") {
455 debug_assert!(!buf.starts_with(b"./"));
456 debug_assert_ne!(buf.as_ref(), b".");
457
458 maybe_check_isdir(path, &mut buf, flags)?;
459
460 tmp.clear();
461 util::getcwd(&mut tmp)?;
462 debug_assert!(tmp.len() > 0);
463 tmp.push(b'/')?;
464 buf.insert_from_slice(0, &tmp)?;
465 } else if !matches!(buf.as_ref(), b"/" | b"//") {
466 // We don't have to check "/" or "//", but we do have to check other paths
467 maybe_check_isdir(path, &mut buf, flags)?;
468 }
469
470 Ok(buf.len())
471}
472
473fn count_leading_dotdot(mut s: &[u8]) -> usize {
474 let mut n = 0;
475 while s.starts_with(b"../") {
476 n += 1;
477 s = &s[3..];
478 }
479 n
480}
481
482#[cfg(test)]
483mod tests {
484 use super::*;
485
486 #[test]
487 fn test_count_leading_dotdot() {
488 assert_eq!(count_leading_dotdot(b""), 0);
489 assert_eq!(count_leading_dotdot(b".."), 0);
490 assert_eq!(count_leading_dotdot(b"../a"), 1);
491 assert_eq!(count_leading_dotdot(b"../../a"), 2);
492 assert_eq!(count_leading_dotdot(b"../a/../b"), 1);
493 }
494
495 #[test]
496 fn test_normpath_raw() {
497 let mut buf = [0; 100];
498
499 let n = normpath_raw(b"/", &mut buf).unwrap();
500 assert_eq!(&buf[..n], b"/");
501
502 let n = normpath_raw(b".", &mut buf).unwrap();
503 assert_eq!(&buf[..n], b".");
504
505 let n = normpath_raw(b"a", &mut buf).unwrap();
506 assert_eq!(&buf[..n], b"a");
507
508 let n = normpath_raw(b"a/..", &mut buf).unwrap();
509 assert_eq!(&buf[..n], b".");
510
511 let n = normpath_raw(b"//a/./b/../c/", &mut buf).unwrap();
512 assert_eq!(&buf[..n], b"//a/c");
513
514 assert_eq!(normpath_raw(b"", &mut buf).unwrap_err(), libc::ENOENT);
515 assert_eq!(normpath_raw(b"\0", &mut buf).unwrap_err(), libc::EINVAL);
516 }
517
518 #[cfg(feature = "std")]
519 #[test]
520 fn test_normpath() {
521 assert_eq!(normpath("/").unwrap().as_os_str(), "/");
522 assert_eq!(normpath(".").unwrap().as_os_str(), ".");
523 assert_eq!(normpath("a/..").unwrap().as_os_str(), ".");
524 assert_eq!(normpath("//a/./b/../c/").unwrap().as_os_str(), "//a/c");
525
526 assert_eq!(normpath("").unwrap_err().raw_os_error(), Some(libc::ENOENT));
527 assert_eq!(
528 normpath("\0").unwrap_err().raw_os_error(),
529 Some(libc::EINVAL)
530 );
531 }
532}