gix_validate/
path.rs

1use bstr::{BStr, ByteSlice};
2
3///
4pub mod component {
5    /// The error returned by [`component()`](super::component()).
6    #[derive(Debug, thiserror::Error)]
7    #[allow(missing_docs)]
8    pub enum Error {
9        #[error("A path component must not be empty")]
10        Empty,
11        #[error(r"Path separators like / or \ are not allowed")]
12        PathSeparator,
13        #[error("Windows path prefixes are not allowed")]
14        WindowsPathPrefix,
15        #[error("Windows device-names may have side-effects and are not allowed")]
16        WindowsReservedName,
17        #[error(r#"Trailing spaces or dots, and the following characters anywhere, are forbidden in Windows paths, along with non-printable ones: <>:"|?*"#)]
18        WindowsIllegalCharacter,
19        #[error("The .git name may never be used")]
20        DotGitDir,
21        #[error("The .gitmodules file must not be a symlink")]
22        SymlinkedGitModules,
23        #[error("Relative components '.' and '..' are disallowed")]
24        Relative,
25    }
26
27    /// Further specify what to check for in [`component()`](super::component())
28    ///
29    /// Note that the `Default` implementation maximizes safety by enabling all protections.
30    #[derive(Debug, Copy, Clone)]
31    pub struct Options {
32        /// This flag should be turned on when on Windows, but can be turned on when on other platforms
33        /// as well to prevent path components that can cause trouble on Windows.
34        pub protect_windows: bool,
35        /// If `true`, protections for the MacOS HFS+ filesystem will be active, checking for
36        /// special directories that we should never write while ignoring codepoints just like HFS+ would.
37        ///
38        /// This field is equivalent to `core.protectHFS`.
39        pub protect_hfs: bool,
40        /// If `true`, protections for Windows NTFS specific features will be active. This adds special handling
41        /// for `8.3` filenames and alternate data streams, both of which could be used to mask the true name of
42        /// what would be created on disk.
43        ///
44        /// This field is equivalent to `core.protectNTFS`.
45        pub protect_ntfs: bool,
46    }
47
48    impl Default for Options {
49        fn default() -> Self {
50            Options {
51                protect_windows: true,
52                protect_hfs: true,
53                protect_ntfs: true,
54            }
55        }
56    }
57
58    /// The mode of the component, if it's the leaf of a path.
59    #[derive(Debug, Copy, Clone, PartialEq, Eq)]
60    pub enum Mode {
61        /// The item is a symbolic link.
62        Symlink,
63    }
64}
65
66/// Assure the given `input` resembles a valid name for a tree or blob, and in that sense, a path component.
67/// `mode` indicates the kind of `input` and it should be `Some` if `input` is the last component in the underlying
68/// path.
69///
70/// `input` must not make it possible to exit the repository, or to specify absolute paths.
71pub fn component(
72    input: &BStr,
73    mode: Option<component::Mode>,
74    component::Options {
75        protect_windows,
76        protect_hfs,
77        protect_ntfs,
78    }: component::Options,
79) -> Result<&BStr, component::Error> {
80    if input.is_empty() {
81        return Err(component::Error::Empty);
82    }
83    if input == ".." || input == "." {
84        return Err(component::Error::Relative);
85    }
86    if protect_windows {
87        if input.find_byteset(br"/\").is_some() {
88            return Err(component::Error::PathSeparator);
89        }
90        if input.chars().nth(1) == Some(':') {
91            return Err(component::Error::WindowsPathPrefix);
92        }
93    } else if input.find_byte(b'/').is_some() {
94        return Err(component::Error::PathSeparator);
95    }
96    if protect_hfs {
97        if is_dot_hfs(input, "git") {
98            return Err(component::Error::DotGitDir);
99        }
100        if is_symlink(mode) && is_dot_hfs(input, "gitmodules") {
101            return Err(component::Error::SymlinkedGitModules);
102        }
103    }
104
105    if protect_ntfs {
106        if is_dot_git_ntfs(input) {
107            return Err(component::Error::DotGitDir);
108        }
109        if is_symlink(mode) && is_dot_ntfs(input, "gitmodules", "gi7eba") {
110            return Err(component::Error::SymlinkedGitModules);
111        }
112
113        if protect_windows {
114            if let Some(err) = check_win_devices_and_illegal_characters(input) {
115                return Err(err);
116            }
117        }
118    }
119
120    if !(protect_hfs | protect_ntfs) {
121        if input.eq_ignore_ascii_case(b".git") {
122            return Err(component::Error::DotGitDir);
123        }
124        if is_symlink(mode) && input.eq_ignore_ascii_case(b".gitmodules") {
125            return Err(component::Error::SymlinkedGitModules);
126        }
127    }
128    Ok(input)
129}
130
131/// Return `true` if the path component at `input` looks like a Windows device, like `CON`
132/// or `LPT1` (case-insensitively).
133///
134/// This is relevant only on Windows, where one may be tricked into reading or writing to such devices.
135/// When reading from `CON`, a console-program may block until the user provided input.
136pub fn component_is_windows_device(input: &BStr) -> bool {
137    is_win_device(input)
138}
139
140fn is_win_device(input: &BStr) -> bool {
141    let Some(in3) = input.get(..3) else { return false };
142    if in3.eq_ignore_ascii_case(b"AUX") && is_done_windows(input.get(3..)) {
143        return true;
144    }
145    if in3.eq_ignore_ascii_case(b"NUL") && is_done_windows(input.get(3..)) {
146        return true;
147    }
148    if in3.eq_ignore_ascii_case(b"PRN") && is_done_windows(input.get(3..)) {
149        return true;
150    }
151    // Note that the following allows `COM0`, even though `LPT0` is not allowed.
152    // Even though tests seem to indicate that neither `LPT0` nor `COM0` are valid
153    // device names, it's unclear this truly is the case in all possible versions and editions
154    // of Windows.
155    // Hence, justification for this asymmetry is merely to do exactly the same as Git does,
156    // and to have exactly the same behaviour during validation (for worktree-writes).
157    if in3.eq_ignore_ascii_case(b"COM")
158        && input.get(3).is_some_and(|n| *n >= b'1' && *n <= b'9')
159        && is_done_windows(input.get(4..))
160    {
161        return true;
162    }
163    if in3.eq_ignore_ascii_case(b"LPT")
164        && input.get(3).is_some_and(u8::is_ascii_digit)
165        && is_done_windows(input.get(4..))
166    {
167        return true;
168    }
169    if in3.eq_ignore_ascii_case(b"CON")
170        && (is_done_windows(input.get(3..))
171            || (input.get(3..6).is_some_and(|n| n.eq_ignore_ascii_case(b"IN$")) && is_done_windows(input.get(6..)))
172            || (input.get(3..7).is_some_and(|n| n.eq_ignore_ascii_case(b"OUT$")) && is_done_windows(input.get(7..))))
173    {
174        return true;
175    }
176    false
177}
178
179fn check_win_devices_and_illegal_characters(input: &BStr) -> Option<component::Error> {
180    if is_win_device(input) {
181        return Some(component::Error::WindowsReservedName);
182    }
183    if input.iter().any(|b| *b < 0x20 || b":<>\"|?*".contains(b)) {
184        return Some(component::Error::WindowsIllegalCharacter);
185    }
186    if input.ends_with(b".") || input.ends_with(b" ") {
187        return Some(component::Error::WindowsIllegalCharacter);
188    }
189    None
190}
191
192fn is_symlink(mode: Option<component::Mode>) -> bool {
193    mode == Some(component::Mode::Symlink)
194}
195
196fn is_dot_hfs(input: &BStr, search_case_insensitive: &str) -> bool {
197    let mut input = input.chars().filter(|c| match *c as u32 {
198        // Case-insensitive HFS+ skips these code points as "ignorable" when comparing filenames. See:
199        // https://github.com/git/git/commit/6162a1d323d24fd8cbbb1a6145a91fb849b2568f
200        // https://developer.apple.com/library/archive/technotes/tn/tn1150.html#StringComparisonAlgorithm
201        // https://github.com/apple-oss-distributions/hfs/blob/main/core/UCStringCompareData.h
202            0x200c | // ZERO WIDTH NON-JOINER
203            0x200d | // ZERO WIDTH JOINER
204            0x200e | // LEFT-TO-RIGHT MARK
205            0x200f | // RIGHT-TO-LEFT MARK
206            0x202a | // LEFT-TO-RIGHT EMBEDDING
207            0x202b | // RIGHT-TO-LEFT EMBEDDING
208            0x202c | // POP DIRECTIONAL FORMATTING
209            0x202d | // LEFT-TO-RIGHT OVERRIDE
210            0x202e | // RIGHT-TO-LEFT OVERRIDE
211            0x206a | // INHIBIT SYMMETRIC SWAPPING
212            0x206b | // ACTIVATE SYMMETRIC SWAPPING
213            0x206c | // INHIBIT ARABIC FORM SHAPING
214            0x206d | // ACTIVATE ARABIC FORM SHAPING
215            0x206e | // NATIONAL DIGIT SHAPES
216            0x206f | // NOMINAL DIGIT SHAPES
217            0xfeff => false, // ZERO WIDTH NO-BREAK SPACE
218            _ => true
219        });
220    if input.next() != Some('.') {
221        return false;
222    }
223
224    let mut comp = search_case_insensitive.chars();
225    loop {
226        match (comp.next(), input.next()) {
227            (Some(a), Some(b)) => {
228                if !a.eq_ignore_ascii_case(&b) {
229                    return false;
230                }
231            }
232            (None, None) => return true,
233            _ => return false,
234        }
235    }
236}
237
238fn is_dot_git_ntfs(input: &BStr) -> bool {
239    if input.get(..4).is_some_and(|input| input.eq_ignore_ascii_case(b".git")) {
240        return is_done_ntfs(input.get(4..));
241    }
242    if input.get(..5).is_some_and(|input| input.eq_ignore_ascii_case(b"git~1")) {
243        return is_done_ntfs(input.get(5..));
244    }
245    false
246}
247
248/// The `search_case_insensitive` name is the actual name to look for (in a case-insensitive way).
249/// Opposed to that there is the special `ntfs_shortname_prefix` which is derived from `search_case_insensitive`
250/// but looks more like a hash, one that NTFS uses to disambiguate things, for when there is a lot of files
251/// with the same prefix.
252fn is_dot_ntfs(input: &BStr, search_case_insensitive: &str, ntfs_shortname_prefix: &str) -> bool {
253    if input.first() == Some(&b'.') {
254        let end_pos = 1 + search_case_insensitive.len();
255        if input
256            .get(1..end_pos)
257            .is_some_and(|input| input.eq_ignore_ascii_case(search_case_insensitive.as_bytes()))
258        {
259            is_done_ntfs(input.get(end_pos..))
260        } else {
261            false
262        }
263    } else {
264        let search_case_insensitive: &[u8] = search_case_insensitive.as_bytes();
265        if search_case_insensitive
266            .get(..6)
267            .zip(input.get(..6))
268            .is_some_and(|(ntfs_prefix, first_6_of_input)| {
269                first_6_of_input.eq_ignore_ascii_case(ntfs_prefix)
270                    && input.get(6) == Some(&b'~')
271                    // It's notable that only `~1` to `~4` are possible before the disambiguation algorithm
272                    // switches to using the `ntfs_shortname_prefix`, which is checked hereafter.
273                    && input.get(7).is_some_and(|num| (b'1'..=b'4').contains(num))
274            })
275        {
276            return is_done_ntfs(input.get(8..));
277        }
278
279        let ntfs_shortname_prefix: &[u8] = ntfs_shortname_prefix.as_bytes();
280        let mut saw_tilde = false;
281        let mut pos = 0;
282        while pos < 8 {
283            let Some(b) = input.get(pos).copied() else {
284                return false;
285            };
286            if saw_tilde {
287                if !b.is_ascii_digit() {
288                    return false;
289                }
290            } else if b == b'~' {
291                saw_tilde = true;
292                pos += 1;
293                let Some(b) = input.get(pos).copied() else {
294                    return false;
295                };
296                if !(b'1'..=b'9').contains(&b) {
297                    return false;
298                }
299            } else if pos >= 6
300                || b & 0x80 == 0x80
301                || ntfs_shortname_prefix
302                    .get(pos)
303                    .map_or(true, |ob| !b.eq_ignore_ascii_case(ob))
304            {
305                return false;
306            }
307            pos += 1;
308        }
309        is_done_ntfs(input.get(pos..))
310    }
311}
312
313/// Check if trailing filename bytes leave a match to special files like `.git` unchanged in NTFS.
314fn is_done_ntfs(input: Option<&[u8]>) -> bool {
315    // Skip spaces and dots. Then return true if we are at the end or a colon.
316    let Some(input) = input else { return true };
317    for b in input.bytes() {
318        if b == b':' {
319            return true;
320        }
321        if b != b' ' && b != b'.' {
322            return false;
323        }
324    }
325    true
326}
327
328/// Check if trailing filename bytes leave a match to Windows reserved device names unchanged.
329fn is_done_windows(input: Option<&[u8]>) -> bool {
330    // Skip spaces. Then return true if we are at the end or a dot or colon.
331    let Some(input) = input else { return true };
332    let skip = input.bytes().take_while(|b| *b == b' ').count();
333    let Some(next) = input.get(skip) else { return true };
334    *next == b'.' || *next == b':'
335}