typed_path/windows/
non_utf8.rs

1mod components;
2
3use core::fmt;
4use core::hash::{Hash, Hasher};
5
6pub use components::*;
7
8use super::constants::*;
9use crate::common::CheckedPathError;
10use crate::no_std_compat::*;
11use crate::typed::{TypedPath, TypedPathBuf};
12use crate::{private, Component, Components, Encoding, Path, PathBuf};
13
14/// Represents a Windows-specific [`Path`]
15pub type WindowsPath = Path<WindowsEncoding>;
16
17/// Represents a Windows-specific [`PathBuf`]
18pub type WindowsPathBuf = PathBuf<WindowsEncoding>;
19
20/// Represents a Windows-specific [`Encoding`]
21#[derive(Copy, Clone)]
22pub struct WindowsEncoding;
23
24impl private::Sealed for WindowsEncoding {}
25
26impl Encoding for WindowsEncoding {
27    type Components<'a> = WindowsComponents<'a>;
28
29    fn label() -> &'static str {
30        "windows"
31    }
32
33    fn components(path: &[u8]) -> Self::Components<'_> {
34        WindowsComponents::new(path)
35    }
36
37    fn hash<H: Hasher>(path: &[u8], h: &mut H) {
38        let (prefix_len, verbatim) = match Self::components(path).prefix() {
39            Some(prefix) => {
40                prefix.hash(h);
41                (prefix.len(), prefix.kind().is_verbatim())
42            }
43            None => (0, false),
44        };
45        let bytes = &path[prefix_len..];
46
47        let mut component_start = 0;
48        let mut bytes_hashed = 0;
49
50        for i in 0..bytes.len() {
51            let is_sep = if verbatim {
52                path[i] == SEPARATOR as u8
53            } else {
54                path[i] == SEPARATOR as u8 || path[i] == ALT_SEPARATOR as u8
55            };
56            if is_sep {
57                if i > component_start {
58                    let to_hash = &bytes[component_start..i];
59                    h.write(to_hash);
60                    bytes_hashed += to_hash.len();
61                }
62
63                // skip over separator and optionally a following CurDir item
64                // since components() would normalize these away.
65                component_start = i + 1;
66
67                let tail = &bytes[component_start..];
68
69                if !verbatim {
70                    component_start += match tail {
71                        [b'.'] => 1,
72                        [b'.', sep, ..]
73                            if *sep == SEPARATOR as u8 || *sep == ALT_SEPARATOR as u8 =>
74                        {
75                            1
76                        }
77                        _ => 0,
78                    };
79                }
80            }
81        }
82
83        if component_start < bytes.len() {
84            let to_hash = &bytes[component_start..];
85            h.write(to_hash);
86            bytes_hashed += to_hash.len();
87        }
88
89        h.write_usize(bytes_hashed);
90    }
91
92    // COMPLEX RULES OF WINDOWS PATH APPENDING
93    //
94    // 1. If the incoming path being pushed is absolute or has a prefix:
95    //    * replace the current path with the incoming path
96    //
97    // 2. If the current path have a verbatim, verbatim disk, or verbatim UNC prefix
98    //    and the incoming path being pushed is not empty:
99    //    * we know that incoming path has NO prefix (checked @ #1)
100    //    * build up the components representing our path (buffer)
101    //        * start with all of the components from the current path (assign to buffer)
102    //        * iterate through components of incoming path
103    //        * if the incoming path has a root dir, remove everything after
104    //          prefix in the current path (from buffer)
105    //        * skip appending (to buffer) any current dir component from incoming path
106    //        * if parent dir, check if the last component (in buffer) is normal, and if
107    //          so pop it off (of buffer)
108    //        * otherwise, push component (onto buffer)
109    //    * iterate through buffer of components to rebuild Vec<u8> via loop:
110    //        * assign flag (`need_sep`) to track if we need to add a separator
111    //        * at beginning of loop, check if `need_sep` and component not root dir:
112    //            * if so, push separator into Vec<u8>
113    //        * push component into Vec<u8>
114    //        * re-assign `need_sep` flag:
115    //            * if component was root dir, flag is false
116    //            * if component was prefix, flag is true IF not drive (Prefix::Disk)
117    //            * else, flag is true
118    //    * update inner pathbuf value to new Vec<u8>
119    //
120    // 3. If the incoming path being pushed has root dir ('\') and no prefix (checked @ #1):
121    //    * we shorten current path to just the prefix, which can be 0 if there is no prefix
122    //    * append incoming path to current path
123    //
124    // 4. Otherwise:
125    //    * If we need a separator (none at the end and current is not empty) and the current
126    //      bytes are not just a drive letter (e.g. C:), then append a separator to the end of
127    //      current path
128    //    * append incoming path to current path
129    fn push(current_path: &mut Vec<u8>, path: &[u8]) {
130        if path.is_empty() {
131            return;
132        }
133
134        let comps = Self::components(path);
135        let cur_comps = Self::components(current_path);
136
137        if comps.is_absolute() || comps.has_prefix() {
138            current_path.clear();
139            current_path.extend_from_slice(path);
140        } else if cur_comps.has_any_verbatim_prefix() && !path.is_empty() {
141            let mut buffer: Vec<_> = Self::components(current_path).collect();
142            for c in Self::components(path) {
143                match c {
144                    WindowsComponent::RootDir => {
145                        buffer.truncate(1);
146                        buffer.push(c);
147                    }
148                    WindowsComponent::CurDir => (),
149                    WindowsComponent::ParentDir => {
150                        if let Some(WindowsComponent::Normal(_)) = buffer.last() {
151                            buffer.pop();
152                        }
153                    }
154                    _ => buffer.push(c),
155                }
156            }
157
158            let mut new_path = Vec::new();
159            let mut need_sep = false;
160
161            for c in buffer {
162                if need_sep && c != WindowsComponent::RootDir {
163                    new_path.push(SEPARATOR as u8);
164                }
165
166                new_path.extend_from_slice(c.as_bytes());
167
168                need_sep = match c {
169                    WindowsComponent::RootDir => false,
170                    WindowsComponent::Prefix(prefix) => {
171                        !matches!(prefix.kind(), WindowsPrefix::Disk(_))
172                    }
173                    _ => true,
174                };
175            }
176
177            *current_path = new_path;
178        } else if comps.has_root() {
179            let len = Self::components(current_path).prefix_len();
180            current_path.truncate(len);
181            current_path.extend_from_slice(path);
182        } else {
183            // NOTE: From std lib, there's a check that the prefix len == path len, which
184            //       would imply having no other
185            let needs_sep = (!current_path.is_empty()
186                && !current_path.ends_with(&[SEPARATOR as u8]))
187                && !Self::components(current_path).is_only_disk();
188
189            if needs_sep {
190                current_path.push(SEPARATOR as u8);
191            }
192
193            current_path.extend_from_slice(path);
194        }
195    }
196
197    fn push_checked(current_path: &mut Vec<u8>, path: &[u8]) -> Result<(), CheckedPathError> {
198        // As we scan through path components, we maintain a count of normal components that
199        // have not been popped off as a result of a parent component. If we ever reach a
200        // parent component without any preceding normal components remaining, this violates
201        // pushing onto our path and represents a path traversal attack.
202        let mut normal_cnt = 0;
203        for component in WindowsPath::new(path).components() {
204            match component {
205                WindowsComponent::Prefix(_) => return Err(CheckedPathError::UnexpectedPrefix),
206                WindowsComponent::RootDir => return Err(CheckedPathError::UnexpectedRoot),
207                WindowsComponent::ParentDir if normal_cnt == 0 => {
208                    return Err(CheckedPathError::PathTraversalAttack)
209                }
210                WindowsComponent::ParentDir => normal_cnt -= 1,
211                WindowsComponent::Normal(bytes) => {
212                    for b in bytes {
213                        if DISALLOWED_FILENAME_BYTES.contains(b) {
214                            return Err(CheckedPathError::InvalidFilename);
215                        }
216                    }
217                    normal_cnt += 1;
218                }
219                _ => continue,
220            }
221        }
222
223        Self::push(current_path, path);
224        Ok(())
225    }
226}
227
228impl fmt::Debug for WindowsEncoding {
229    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
230        f.debug_struct("WindowsEncoding").finish()
231    }
232}
233
234impl fmt::Display for WindowsEncoding {
235    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
236        write!(f, "WindowsEncoding")
237    }
238}
239
240impl<T> Path<T>
241where
242    T: Encoding,
243{
244    /// Returns true if the encoding for the path is for Windows.
245    ///
246    /// # Examples
247    ///
248    /// ```
249    /// use typed_path::{UnixPath, WindowsPath};
250    ///
251    /// assert!(!UnixPath::new("/some/path").has_windows_encoding());
252    /// assert!(WindowsPath::new(r"\some\path").has_windows_encoding());
253    /// ```
254    pub fn has_windows_encoding(&self) -> bool {
255        T::label() == WindowsEncoding::label()
256    }
257
258    /// Creates an owned [`PathBuf`] like `self` but using [`WindowsEncoding`].
259    ///
260    /// See [`Path::with_encoding`] for more information.
261    pub fn with_windows_encoding(&self) -> PathBuf<WindowsEncoding> {
262        self.with_encoding()
263    }
264
265    /// Creates an owned [`PathBuf`] like `self` but using [`WindowsEncoding`], ensuring it is a
266    /// valid Windows path.
267    ///
268    /// See [`Path::with_encoding_checked`] for more information.
269    pub fn with_windows_encoding_checked(
270        &self,
271    ) -> Result<PathBuf<WindowsEncoding>, CheckedPathError> {
272        self.with_encoding_checked()
273    }
274}
275
276impl WindowsPath {
277    pub fn to_typed_path(&self) -> TypedPath<'_> {
278        TypedPath::windows(self)
279    }
280
281    pub fn to_typed_path_buf(&self) -> TypedPathBuf {
282        TypedPathBuf::from_windows(self)
283    }
284}
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289
290    #[test]
291    fn push_checked_should_fail_if_providing_an_absolute_path() {
292        // Empty current path will fail when pushing an absolute path
293        let mut current_path = vec![];
294        assert_eq!(
295            WindowsEncoding::push_checked(&mut current_path, br"\abc"),
296            Err(CheckedPathError::UnexpectedRoot)
297        );
298        assert_eq!(current_path, b"");
299
300        // Non-empty relative current path will fail when pushing an absolute path
301        let mut current_path = br"some\path".to_vec();
302        assert_eq!(
303            WindowsEncoding::push_checked(&mut current_path, br"\abc"),
304            Err(CheckedPathError::UnexpectedRoot)
305        );
306        assert_eq!(current_path, br"some\path");
307
308        // Non-empty absolute current path will fail when pushing an absolute path
309        let mut current_path = br"\some\path\".to_vec();
310        assert_eq!(
311            WindowsEncoding::push_checked(&mut current_path, br"\abc"),
312            Err(CheckedPathError::UnexpectedRoot)
313        );
314        assert_eq!(current_path, br"\some\path\");
315    }
316
317    #[test]
318    fn push_checked_should_fail_if_providing_a_path_with_an_embedded_prefix() {
319        // Empty current path will fail when pushing a path with a prefix
320        let mut current_path = vec![];
321        assert_eq!(
322            WindowsEncoding::push_checked(&mut current_path, br"C:abc"),
323            Err(CheckedPathError::UnexpectedPrefix)
324        );
325        assert_eq!(current_path, b"");
326
327        // Non-empty relative current path will fail when pushing a path with a prefix
328        let mut current_path = br"some\path".to_vec();
329        assert_eq!(
330            WindowsEncoding::push_checked(&mut current_path, br"C:abc"),
331            Err(CheckedPathError::UnexpectedPrefix)
332        );
333        assert_eq!(current_path, br"some\path");
334
335        // Non-empty absolute current path will fail when pushing a path with a prefix
336        let mut current_path = br"\some\path\".to_vec();
337        assert_eq!(
338            WindowsEncoding::push_checked(&mut current_path, br"C:abc"),
339            Err(CheckedPathError::UnexpectedPrefix)
340        );
341        assert_eq!(current_path, br"\some\path\");
342    }
343
344    #[test]
345    fn push_checked_should_fail_if_providing_a_path_with_disallowed_filename_bytes() {
346        // Empty current path will fail when pushing a path containing disallowed filename bytes
347        let mut current_path = vec![];
348        assert_eq!(
349            WindowsEncoding::push_checked(&mut current_path, br"some\inva|lid\path"),
350            Err(CheckedPathError::InvalidFilename)
351        );
352        assert_eq!(current_path, b"");
353
354        // Non-empty relative current path will fail when pushing a path containing disallowed
355        // filename bytes
356        let mut current_path = br"some\path".to_vec();
357        assert_eq!(
358            WindowsEncoding::push_checked(&mut current_path, br"some\inva|lid\path"),
359            Err(CheckedPathError::InvalidFilename)
360        );
361        assert_eq!(current_path, br"some\path");
362
363        // Non-empty absolute current path will fail when pushing a path containing disallowed
364        // filename bytes
365        let mut current_path = br"\some\path\".to_vec();
366        assert_eq!(
367            WindowsEncoding::push_checked(&mut current_path, br"some\inva|lid\path"),
368            Err(CheckedPathError::InvalidFilename)
369        );
370        assert_eq!(current_path, br"\some\path\");
371    }
372
373    #[test]
374    fn push_checked_should_fail_if_providing_a_path_that_would_escape_the_current_path() {
375        // Empty current path will fail when pushing a path that would escape
376        let mut current_path = vec![];
377        assert_eq!(
378            WindowsEncoding::push_checked(&mut current_path, b".."),
379            Err(CheckedPathError::PathTraversalAttack)
380        );
381        assert_eq!(current_path, b"");
382
383        // Non-empty relative current path will fail when pushing a path that would escape
384        let mut current_path = br"some\path".to_vec();
385        assert_eq!(
386            WindowsEncoding::push_checked(&mut current_path, b".."),
387            Err(CheckedPathError::PathTraversalAttack)
388        );
389        assert_eq!(current_path, br"some\path");
390
391        // Non-empty absolute current path will fail when pushing a path that would escape
392        let mut current_path = br"\some\path\".to_vec();
393        assert_eq!(
394            WindowsEncoding::push_checked(&mut current_path, b".."),
395            Err(CheckedPathError::PathTraversalAttack)
396        );
397        assert_eq!(current_path, br"\some\path\");
398    }
399
400    #[test]
401    fn push_checked_should_append_path_to_current_path_with_a_separator_if_does_not_violate_rules()
402    {
403        // Pushing a path that contains parent dirs, but does not escape the current path,
404        // should succeed
405        let mut current_path = vec![];
406        assert_eq!(
407            WindowsEncoding::push_checked(&mut current_path, br"abc\..\def\."),
408            Ok(()),
409        );
410        assert_eq!(current_path, br"abc\..\def\.");
411
412        let mut current_path = br"some\path".to_vec();
413        assert_eq!(
414            WindowsEncoding::push_checked(&mut current_path, br"abc\..\def\."),
415            Ok(()),
416        );
417        assert_eq!(current_path, br"some\path\abc\..\def\.");
418
419        let mut current_path = br"\some\path\".to_vec();
420        assert_eq!(
421            WindowsEncoding::push_checked(&mut current_path, br"abc\..\def\."),
422            Ok(()),
423        );
424        assert_eq!(current_path, br"\some\path\abc\..\def\.");
425    }
426}