joat_path/
path_clean.rs

1//! `path-clean` is a Rust port of the the `cleanname` procedure from the Plan 9 C library, and is similar to
2//! [`path.Clean`](https://golang.org/pkg/path/#Clean) from the Go standard library. It works as follows:
3//!
4//! 1. Reduce multiple slashes to a single slash.
5//! 2. Eliminate `.` path name elements (the current directory).
6//! 3. Eliminate `..` path name elements (the parent directory) and the non-`.` non-`..`, element that precedes them.
7//! 4. Eliminate `..` elements that begin a rooted path, that is, replace `/..` by `/` at the beginning of a path.
8//! 5. Leave intact `..` elements that begin a non-rooted path.
9//!
10//! If the result of this process is an empty string, return the string `"."`, representing the current directory.
11//!
12//! It performs this transform lexically, without touching the filesystem. Therefore it doesn't do
13//! any symlink resolution or absolute path resolution. For more information you can see ["Getting Dot-Dot
14//! Right"](https://9p.io/sys/doc/lexnames.html).
15//!
16//! For convenience, the [`PathClean`] trait is exposed and comes implemented for [`std::path::PathBuf`].
17//!
18//! ```rust
19//! use std::path::PathBuf;
20//! use joat_path::{clean, PathClean};
21//! assert_eq!(clean("hello/world/.."), "hello");
22//! assert_eq!(
23//!     PathBuf::from("/test/../path/").clean(),
24//!     PathBuf::from("/path")
25//! );
26//! ```
27use self::internal::PathCharacteristics;
28use std::path::PathBuf;
29
30/// The Clean trait implements a `clean` method. It's recommended you use the provided [`clean`]
31/// function.
32pub trait PathClean<T> {
33    fn clean(&self) -> T;
34}
35
36/// `PathClean` implemented for `PathBuf`
37impl PathClean<Self> for PathBuf {
38    fn clean(&self) -> Self {
39        Self::from(clean(self.to_str().unwrap_or("")))
40    }
41}
42
43mod internal {
44    pub trait PathCharacteristics {
45        /// Primary separator character for this type of path
46        const CANONICAL_SEPARATOR: char;
47
48        /// Returns true if string is any valid separator character, false otherwise
49        fn is_separator(path: &str) -> bool;
50
51        /// Returns true if string starts with any valid separator character, false otherwise
52        fn starts_with_separator(path: &str) -> bool;
53
54        /// Removes separators from end of string
55        fn trim_end_matches_separator(path: &str) -> &str;
56
57        /// Split path on separators
58        fn split_on_separators(path: &str) -> Vec<&str>;
59    }
60
61    /// Characteristics for Unix-style paths
62    /// * Path separator is always a single forward slash "/"
63    pub struct UnixPath;
64
65    impl PathCharacteristics for UnixPath {
66        const CANONICAL_SEPARATOR: char = '/';
67
68        fn is_separator(path: &str) -> bool {
69            path == "/"
70        }
71
72        fn starts_with_separator(path: &str) -> bool {
73            path.starts_with('/')
74        }
75
76        fn trim_end_matches_separator(path: &str) -> &str {
77            path.trim_end_matches('/')
78        }
79
80        fn split_on_separators(path: &str) -> Vec<&str> {
81            path.split('/').collect()
82        }
83    }
84
85    /// Characteristics for Windows-style paths
86    /// * Path separator can be a single forward slash "/" or backslash "\\"
87    pub struct WindowsPath;
88
89    impl PathCharacteristics for WindowsPath {
90        const CANONICAL_SEPARATOR: char = '\\';
91
92        fn is_separator(path: &str) -> bool {
93            path == "\\" || path == "/"
94        }
95
96        fn starts_with_separator(path: &str) -> bool {
97            path.starts_with(['\\', '/'])
98        }
99
100        fn trim_end_matches_separator(path: &str) -> &str {
101            path.trim_end_matches(['\\', '/'])
102        }
103
104        fn split_on_separators(path: &str) -> Vec<&str> {
105            path.split(['\\', '/']).collect()
106        }
107    }
108
109    /// Get normalized version of special path if path is special
110    ///
111    /// # Arguments
112    ///
113    /// * `path` - Path
114    pub fn special_path<P: PathCharacteristics>(path: &str) -> Option<String> {
115        if P::is_separator(path) {
116            return Some(P::CANONICAL_SEPARATOR.to_string());
117        }
118        match path {
119            "" | "." => Some(String::from(".")),
120            ".." => Some(String::from("..")),
121            _ => None,
122        }
123    }
124
125    /// Determine if path is rooted
126    ///
127    /// # Arguments
128    ///
129    /// * `path` - Path
130    pub fn is_root<P: PathCharacteristics>(path: &str) -> bool {
131        P::starts_with_separator(path)
132    }
133
134    /// Trim trailing path separators from end of path
135    ///
136    /// # Arguments
137    ///
138    /// * `path` - Path
139    pub fn trim_end_path<P: PathCharacteristics>(path: &str) -> &str {
140        P::trim_end_matches_separator(path)
141    }
142
143    /// Split path into segments based on path characteristics
144    ///
145    /// # Arguments
146    ///
147    /// * `path` - Path
148    pub fn split_path_segments<P: PathCharacteristics>(path: &str) -> Vec<&str> {
149        P::split_on_separators(path)
150    }
151
152    /// Join path segments to create a path based on path characteristics
153    ///
154    /// # Arguments
155    ///
156    /// * `segments` - Segments
157    pub fn join_path_segments<P: PathCharacteristics>(segments: &[&str]) -> String {
158        segments.join(&P::CANONICAL_SEPARATOR.to_string())
159    }
160
161    /// Make an absolute path based on path characteristics
162    ///
163    /// # Arguments
164    ///
165    /// * `path` - Path
166    pub fn make_absolute<P: PathCharacteristics>(path: &str) -> String {
167        P::CANONICAL_SEPARATOR.to_string() + path
168    }
169
170    #[cfg(test)]
171    mod tests {
172        use super::*;
173
174        #[test]
175        fn test_special_path_unix() {
176            assert_eq!(Some(String::from(".")), special_path::<UnixPath>(""));
177            assert_eq!(Some(String::from(".")), special_path::<UnixPath>("."));
178            assert_eq!(Some(String::from("..")), special_path::<UnixPath>(".."));
179            assert_eq!(Some(String::from("/")), special_path::<UnixPath>("/"));
180            assert_eq!(None, special_path::<UnixPath>("\\"));
181            assert_eq!(None, special_path::<UnixPath>("aaa"));
182        }
183
184        #[test]
185        fn test_special_path_windows() {
186            assert_eq!(Some(String::from(".")), special_path::<WindowsPath>(""));
187            assert_eq!(Some(String::from(".")), special_path::<WindowsPath>("."));
188            assert_eq!(Some(String::from("..")), special_path::<WindowsPath>(".."));
189            assert_eq!(Some(String::from("\\")), special_path::<WindowsPath>("/"));
190            assert_eq!(Some(String::from("\\")), special_path::<WindowsPath>("\\"));
191            assert_eq!(None, special_path::<WindowsPath>("aaa"));
192        }
193
194        #[test]
195        fn test_is_root_unix() {
196            assert!(is_root::<UnixPath>("/a"));
197            assert!(!is_root::<UnixPath>("\\a"));
198            assert!(!is_root::<UnixPath>("a"));
199        }
200
201        #[test]
202        fn test_is_root_windows() {
203            assert!(is_root::<WindowsPath>("/a"));
204            assert!(is_root::<WindowsPath>("\\a"));
205            assert!(!is_root::<WindowsPath>("a"));
206        }
207
208        #[test]
209        fn test_trim_end_path_unix() {
210            assert_eq!("aaa", trim_end_path::<UnixPath>("aaa"));
211            assert_eq!("aaa", trim_end_path::<UnixPath>("aaa/"));
212            assert_eq!("aaa", trim_end_path::<UnixPath>("aaa/////"));
213        }
214
215        #[test]
216        fn test_trim_end_path_windows() {
217            assert_eq!("aaa", trim_end_path::<WindowsPath>("aaa"));
218            assert_eq!("aaa", trim_end_path::<WindowsPath>("aaa/"));
219            assert_eq!("aaa", trim_end_path::<WindowsPath>("aaa\\"));
220            assert_eq!("aaa", trim_end_path::<WindowsPath>("aaa/////"));
221            assert_eq!("aaa", trim_end_path::<WindowsPath>("aaa\\\\\\\\\\"));
222            assert_eq!("aaa", trim_end_path::<WindowsPath>("aaa/\\/\\/"));
223        }
224
225        #[test]
226        fn test_split_path_segments_first_empty_unix() {
227            let segments = split_path_segments::<UnixPath>("/a/b/c");
228            assert_eq!(4, segments.len());
229            assert_eq!("", segments[0]);
230            assert_eq!("a", segments[1]);
231            assert_eq!("b", segments[2]);
232            assert_eq!("c", segments[3]);
233        }
234
235        #[test]
236        fn test_split_path_segments_last_empty_unix() {
237            let segments = split_path_segments::<UnixPath>("/a/b/c/");
238            assert_eq!(5, segments.len());
239            assert_eq!("", segments[0]);
240            assert_eq!("a", segments[1]);
241            assert_eq!("b", segments[2]);
242            assert_eq!("c", segments[3]);
243            assert_eq!("", segments[4]);
244        }
245
246        #[test]
247        fn test_split_path_segments_empty_unix() {
248            let segments = split_path_segments::<UnixPath>("");
249            assert_eq!(1, segments.len());
250            assert_eq!("", segments[0]);
251        }
252
253        #[test]
254        fn test_split_path_segments_multiple_empty_unix() {
255            let segments = split_path_segments::<UnixPath>("//");
256            assert_eq!(3, segments.len());
257            assert_eq!("", segments[0]);
258            assert_eq!("", segments[1]);
259            assert_eq!("", segments[2]);
260        }
261
262        #[test]
263        fn test_split_path_segments_first_empty_unix_backslashes() {
264            let segments = split_path_segments::<UnixPath>("/a\\b\\c");
265            assert_eq!(2, segments.len());
266            assert_eq!("", segments[0]);
267            assert_eq!("a\\b\\c", segments[1]);
268        }
269
270        #[test]
271        fn test_split_path_segments_first_empty_windows_backslashes() {
272            let segments = split_path_segments::<WindowsPath>("\\a\\b\\c");
273            assert_eq!(4, segments.len());
274            assert_eq!("", segments[0]);
275            assert_eq!("a", segments[1]);
276            assert_eq!("b", segments[2]);
277            assert_eq!("c", segments[3]);
278        }
279
280        #[test]
281        fn test_split_path_segments_first_empty_windows_mixture() {
282            let segments = split_path_segments::<WindowsPath>("/a\\b/c");
283            assert_eq!(4, segments.len());
284            assert_eq!("", segments[0]);
285            assert_eq!("a", segments[1]);
286            assert_eq!("b", segments[2]);
287            assert_eq!("c", segments[3]);
288        }
289
290        #[test]
291        fn test_join_path_segments_unix() {
292            assert_eq!("", join_path_segments::<UnixPath>(&[]));
293            assert_eq!("a/b/c", join_path_segments::<UnixPath>(&["a", "b", "c"]));
294        }
295
296        #[test]
297        fn test_join_path_segments_windows() {
298            assert_eq!("", join_path_segments::<WindowsPath>(&[]));
299            assert_eq!(
300                "a\\b\\c",
301                join_path_segments::<WindowsPath>(&["a", "b", "c"])
302            );
303        }
304
305        #[test]
306        fn test_make_absolute_unix() {
307            assert_eq!("/aaa", make_absolute::<UnixPath>("aaa"));
308            assert_eq!("//aaa", make_absolute::<UnixPath>("/aaa"));
309            assert_eq!("/\\aaa", make_absolute::<UnixPath>("\\aaa"));
310        }
311
312        #[test]
313        fn test_make_absolute_windows() {
314            assert_eq!("\\aaa", make_absolute::<WindowsPath>("aaa"));
315            assert_eq!("\\/aaa", make_absolute::<WindowsPath>("/aaa"));
316            assert_eq!("\\\\aaa", make_absolute::<WindowsPath>("\\aaa"));
317        }
318    }
319}
320
321/// The core implementation. It performs the following, lexically:
322/// 1. Reduce multiple slashes to a single slash.
323/// 2. Eliminate `.` path name elements (the current directory).
324/// 3. Eliminate `..` path name elements (the parent directory) and the non-`.` non-`..`, element that precedes them.
325/// 4. Eliminate `..` elements that begin a rooted path, that is, replace `/..` by `/` at the beginning of a path.
326/// 5. Leave intact `..` elements that begin a non-rooted path.
327///
328/// If the result of this process is an empty string, return the string `"."`, representing the current directory.
329#[must_use]
330pub fn clean(path: &str) -> String {
331    #[cfg(not(target_os = "windows"))]
332    type PlatformPath = internal::UnixPath;
333    #[cfg(target_os = "windows")]
334    type PlatformPath = internal::WindowsPath;
335
336    clean_core::<PlatformPath>(path)
337}
338
339#[must_use]
340pub fn clean_unix(path: &str) -> String {
341    clean_core::<internal::UnixPath>(path)
342}
343
344#[must_use]
345pub fn clean_windows(path: &str) -> String {
346    clean_core::<internal::WindowsPath>(path)
347}
348
349#[allow(clippy::unnecessary_unwrap)]
350fn clean_core<P: PathCharacteristics>(path: &str) -> String {
351    use internal::{
352        is_root, join_path_segments, make_absolute, special_path, split_path_segments,
353        trim_end_path,
354    };
355
356    if let Some(s) = special_path::<P>(path) {
357        return s;
358    }
359
360    let mut out = vec![];
361    let is_root = is_root::<P>(path);
362
363    let path = trim_end_path::<P>(path);
364    let segments = split_path_segments::<P>(path);
365    let num_segments = segments.len();
366
367    for segment in segments {
368        match segment {
369            "" => continue,
370            "." => {
371                if num_segments == 1 {
372                    out.push(segment);
373                };
374                continue;
375            }
376            ".." => {
377                let previous = out.pop();
378                if previous.is_some() && !can_backtrack(previous.unwrap()) {
379                    out.push(previous.unwrap());
380                    out.push(segment);
381                } else if previous.is_none() && !is_root {
382                    out.push(segment);
383                };
384                continue;
385            }
386            _ => {
387                out.push(segment);
388            }
389        };
390    }
391
392    let out_str_0 = join_path_segments::<P>(&out);
393
394    let out_str_1 = if is_root {
395        make_absolute::<P>(&out_str_0)
396    } else {
397        out_str_0
398    };
399
400    if out_str_1.is_empty() {
401        ".".to_string()
402    } else {
403        out_str_1
404    }
405}
406
407fn can_backtrack(segment: &str) -> bool {
408    !matches!(segment, "." | "..")
409}
410
411#[cfg(test)]
412mod tests {
413    use super::test_helpers::to_windows;
414    use super::{clean_unix, clean_windows, PathClean};
415
416    use std::path::PathBuf;
417
418    #[test]
419    fn test_empty_path_is_current_dir() {
420        assert_eq!(clean_unix(""), ".");
421        assert_eq!(clean_windows(&to_windows("")), to_windows("."));
422    }
423
424    #[test]
425    fn test_clean_paths_dont_change() {
426        let tests = vec![(".", "."), ("..", ".."), ("/", "/")];
427
428        for test in tests {
429            assert_eq!(clean_unix(test.0), test.1);
430            assert_eq!(clean_windows(&to_windows(test.0)), to_windows(test.1));
431        }
432    }
433
434    #[test]
435    fn test_replace_multiple_slashes() {
436        let tests = vec![
437            ("/", "/"),
438            ("//", "/"),
439            ("///", "/"),
440            (".//", "."),
441            ("//..", "/"),
442            ("..//", ".."),
443            ("/..//", "/"),
444            ("/.//./", "/"),
445            ("././/./", "."),
446            ("path//to///thing", "path/to/thing"),
447            ("/path//to///thing", "/path/to/thing"),
448        ];
449
450        for test in tests {
451            assert_eq!(clean_unix(test.0), test.1);
452            assert_eq!(clean_windows(&to_windows(test.0)), to_windows(test.1));
453        }
454    }
455
456    #[test]
457    fn test_eliminate_current_dir() {
458        let tests = vec![
459            ("./", "."),
460            ("/./", "/"),
461            ("./test", "test"),
462            ("./test/./path", "test/path"),
463            ("/test/./path/", "/test/path"),
464            ("test/path/.", "test/path"),
465        ];
466
467        for test in tests {
468            assert_eq!(clean_unix(test.0), test.1);
469            assert_eq!(clean_windows(&to_windows(test.0)), to_windows(test.1));
470        }
471    }
472
473    #[test]
474    fn test_eliminate_parent_dir() {
475        let tests = vec![
476            ("/..", "/"),
477            ("/../test", "/test"),
478            ("test/..", "."),
479            ("test/path/..", "test"),
480            ("test/../path", "path"),
481            ("/test/../path", "/path"),
482            ("test/path/../../", "."),
483            ("test/path/../../..", ".."),
484            ("/test/path/../../..", "/"),
485            ("/test/path/../../../..", "/"),
486            ("test/path/../../../..", "../.."),
487            ("test/path/../../another/path", "another/path"),
488            ("test/path/../../another/path/..", "another"),
489            ("../test", "../test"),
490            ("../test/", "../test"),
491            ("../test/path", "../test/path"),
492            ("../test/..", ".."),
493        ];
494
495        for test in tests {
496            assert_eq!(clean_unix(test.0), test.1);
497            assert_eq!(clean_windows(&to_windows(test.0)), to_windows(test.1));
498        }
499    }
500
501    #[test]
502    fn test_pathbuf_trait() {
503        assert_eq!(
504            PathBuf::from("/test/../path/").clean(),
505            PathBuf::from("/path")
506        );
507    }
508}
509
510#[cfg(test)]
511mod test_helpers {
512    pub fn to_windows(p: &str) -> String {
513        p.replace('/', "\\")
514    }
515}