box_format/path/
mod.rs

1use std::{
2    fmt,
3    path::{Path, PathBuf},
4};
5
6mod error;
7
8pub use self::error::IntoBoxPathError;
9
10#[cfg(not(windows))]
11/// The platform-specific separator as a string, used for splitting
12/// platform-supplied paths and printing `BoxPath`s in the platform-preferred
13/// manner.
14pub const PATH_PLATFORM_SEP: &str = "/";
15
16#[cfg(windows)]
17/// The platform-specific separator as a string, used for splitting
18/// platform-supplied paths and printing `BoxPath`s in the platform-preferred
19/// manner.
20pub const PATH_PLATFORM_SEP: &str = "\\";
21
22/// The separator used in `BoxPath` type paths, used primarily in
23/// `FileRecord` and `DirectoryRecord` fields.
24pub const PATH_BOX_SEP: &str = "\x1f";
25
26#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
27#[repr(transparent)]
28pub struct BoxPath(pub(crate) String);
29
30pub fn sanitize<P: AsRef<Path>>(path: P) -> Option<Vec<String>> {
31    use std::path::Component;
32    use unic_normal::StrNormalForm;
33    use unic_ucd::GeneralCategory;
34
35    let mut out = vec![];
36
37    for component in path.as_ref().components() {
38        match component {
39            Component::CurDir | Component::RootDir | Component::Prefix(_) => {}
40            Component::ParentDir => {
41                out.pop();
42            }
43            Component::Normal(os_str) => out.push(
44                os_str
45                    .to_str()
46                    .map(|x| x.trim())
47                    .filter(|x| !x.is_empty())
48                    .filter(|x| {
49                        !x.chars().any(|c| {
50                            let cat = GeneralCategory::of(c);
51                            c == '\\'
52                                || cat == GeneralCategory::Control
53                                || (cat.is_separator() && c != ' ')
54                        })
55                    })
56                    .map(|x| x.nfc().collect::<String>())?,
57            ),
58        }
59    }
60
61    Some(out)
62}
63
64impl AsRef<[u8]> for BoxPath {
65    #[inline(always)]
66    fn as_ref(&self) -> &[u8] {
67        self.0.as_bytes()
68    }
69}
70
71impl BoxPath {
72    pub fn new<P: AsRef<Path>>(path: P) -> std::result::Result<BoxPath, IntoBoxPathError> {
73        let out = sanitize(&path).ok_or(IntoBoxPathError::UnrepresentableStr)?;
74
75        if out.is_empty() {
76            return Err(IntoBoxPathError::EmptyPath);
77        }
78
79        Ok(BoxPath(out.join(PATH_BOX_SEP)))
80    }
81
82    pub fn to_path_buf(&self) -> PathBuf {
83        PathBuf::from(self.to_string())
84    }
85
86    pub fn parent(&self) -> Option<BoxPath> {
87        let mut parts: Vec<_> = self.iter().collect();
88        if parts.len() == 1 {
89            return None;
90        }
91        parts.pop();
92        Some(BoxPath(parts.join(PATH_BOX_SEP)))
93    }
94
95    pub fn filename(&self) -> String {
96        self.iter().collect::<Vec<_>>().pop().unwrap().to_string()
97    }
98
99    pub fn depth(&self) -> usize {
100        self.0.chars().filter(|c| c == &'\x1f').count()
101    }
102
103    pub fn starts_with(&self, other: &BoxPath) -> bool {
104        !self
105            .0
106            .split(PATH_BOX_SEP)
107            .zip(other.0.split(PATH_BOX_SEP))
108            .any(|(a, b)| a != b)
109    }
110
111    pub fn join<P: AsRef<Path>>(&self, tail: P) -> std::result::Result<BoxPath, IntoBoxPathError> {
112        Self::new(self.to_path_buf().join(tail))
113    }
114
115    pub(crate) fn join_unchecked(&self, tail: &str) -> BoxPath {
116        BoxPath(format!("{}{}{}", self.0, PATH_BOX_SEP, tail))
117    }
118
119    pub fn iter(&self) -> std::str::Split<'_, &str> {
120        self.0.split(PATH_BOX_SEP)
121    }
122}
123
124impl fmt::Display for BoxPath {
125    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
126        let mut iter = self.0.split(PATH_BOX_SEP);
127        if let Some(v) = iter.next() {
128            f.write_str(v)?;
129        }
130        for v in iter {
131            f.write_str(PATH_PLATFORM_SEP)?;
132            f.write_str(v)?;
133        }
134        Ok(())
135    }
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141
142    #[test]
143    fn sanitisation() {
144        let box_path = BoxPath::new("/something/../somethingelse/./foo.txt").unwrap();
145        assert_eq!(box_path.0, "somethingelse\x1ffoo.txt");
146        let box_path = BoxPath::new("../something/../somethingelse/./foo.txt/.").unwrap();
147        assert_eq!(box_path.0, "somethingelse\x1ffoo.txt");
148
149        // This one will do different things on Windows and Unix, because Unix loves a good backslash
150        let box_path = BoxPath::new(r"..\something\..\somethingelse\.\foo.txt\.");
151
152        #[cfg(not(windows))]
153        assert!(box_path.is_err());
154        #[cfg(windows)]
155        assert_eq!(box_path.unwrap().0, "somethingelse\x1ffoo.txt");
156
157        let box_path = BoxPath::new(r"..\something/..\somethingelse\./foodir\");
158        #[cfg(not(windows))]
159        assert!(box_path.is_err());
160        #[cfg(windows)]
161        assert_eq!(box_path.unwrap().0, "somethingelse\x1ffoodir");
162    }
163
164    #[test]
165    fn sanitisation2() {
166        // Null is a sassy fellow
167        let box_path = BoxPath::new("\0");
168        assert!(box_path.is_err());
169    }
170
171    #[test]
172    fn sanitisation3() {
173        // Blank string is a sassy fellow if you can find him
174        let box_path = BoxPath::new("");
175        assert!(box_path.is_err());
176    }
177
178    #[test]
179    fn sanitisation4() {
180        // Blank string is a sassy fellow if you can find him
181        let box_path = BoxPath::new("/cant/hate//the/path");
182        println!("{:?}", box_path);
183        assert_eq!(box_path.unwrap().0, "cant\x1fhate\x1fthe\x1fpath");
184    }
185
186    #[test]
187    fn sanitisation_bidi() {
188        // Blank string is a sassy fellow if you can find him
189        let box_path = BoxPath::new("this is now العَرَبِيَّة.txt");
190        println!("{:?}", box_path);
191        assert_eq!(box_path.unwrap().0, "this is now العَرَبِيَّة.txt");
192    }
193
194    #[test]
195    fn sanitisation_basmala() {
196        // Blank string is a sassy fellow if you can find him
197        let box_path = BoxPath::new("this is now ﷽.txt");
198        println!("{:?}", box_path);
199        assert_eq!(box_path.unwrap().0, "this is now ﷽.txt");
200    }
201
202    #[test]
203    fn sanitisation_icecube_emoji() {
204        let box_path = BoxPath::new("///🧊/🧊");
205        println!("{:?}", box_path);
206        assert_eq!(box_path.unwrap().0, "🧊\x1f🧊");
207    }
208
209    #[test]
210    fn sanitisation_simple_self() {
211        let box_path = BoxPath::new("./self");
212        println!("{:?}", box_path);
213        assert_eq!(box_path.unwrap().0, "self");
214    }
215
216    #[test]
217    fn sanitisation_slash() {
218        let box_path = BoxPath::new("/");
219        println!("{:?}", box_path);
220        assert!(box_path.is_err());
221    }
222}