gix_validate/
path.rs

1use bstr::{BStr, ByteSlice};
2
3///
4pub mod component {
5    /// The error returned by [`component()`](super::component()).
6    #[derive(Debug)]
7    #[allow(missing_docs)]
8    #[non_exhaustive]
9    pub enum Error {
10        Empty,
11        PathSeparator,
12        WindowsPathPrefix,
13        WindowsReservedName,
14        WindowsIllegalCharacter,
15        DotGitDir,
16        SymlinkedGitModules,
17        Relative,
18    }
19
20    impl std::fmt::Display for Error {
21        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
22            match self {
23                Error::Empty => write!(f, "A path component must not be empty"),
24                Error::PathSeparator => write!(f, r"Path separators like / or \ are not allowed"),
25                Error::WindowsPathPrefix => write!(f, "Windows path prefixes are not allowed"),
26                Error::WindowsReservedName => {
27                    write!(f, "Windows device-names may have side-effects and are not allowed")
28                }
29                Error::WindowsIllegalCharacter => write!(
30                    f,
31                    r#"Trailing spaces or dots, and the following characters anywhere, are forbidden in Windows paths, along with non-printable ones: <>:"|?*"#
32                ),
33                Error::DotGitDir => write!(f, "The .git name may never be used"),
34                Error::SymlinkedGitModules => write!(f, "The .gitmodules file must not be a symlink"),
35                Error::Relative => write!(f, "Relative components '.' and '..' are disallowed"),
36            }
37        }
38    }
39
40    impl std::error::Error for Error {}
41
42    /// Further specify what to check for in [`component()`](super::component())
43    ///
44    /// Note that the `Default` implementation maximizes safety by enabling all protections.
45    #[derive(Debug, Copy, Clone)]
46    pub struct Options {
47        /// This flag should be turned on when on Windows, but can be turned on when on other platforms
48        /// as well to prevent path components that can cause trouble on Windows.
49        pub protect_windows: bool,
50        /// If `true`, protections for the MacOS HFS+ filesystem will be active, checking for
51        /// special directories that we should never write while ignoring codepoints just like HFS+ would.
52        ///
53        /// This field is equivalent to `core.protectHFS`.
54        pub protect_hfs: bool,
55        /// If `true`, protections for Windows NTFS specific features will be active. This adds special handling
56        /// for `8.3` filenames and alternate data streams, both of which could be used to mask the true name of
57        /// what would be created on disk.
58        ///
59        /// This field is equivalent to `core.protectNTFS`.
60        pub protect_ntfs: bool,
61    }
62
63    impl Default for Options {
64        fn default() -> Self {
65            Options {
66                protect_windows: true,
67                protect_hfs: true,
68                protect_ntfs: true,
69            }
70        }
71    }
72
73    /// The mode of the component, if it's the leaf of a path.
74    #[derive(Debug, Copy, Clone, PartialEq, Eq)]
75    pub enum Mode {
76        /// The item is a symbolic link.
77        Symlink,
78    }
79}
80
81/// Assure the given `input` resembles a valid name for a tree or blob, and in that sense, a path component.
82/// `mode` indicates the kind of `input` and it should be `Some` if `input` is the last component in the underlying
83/// path.
84///
85/// `input` must not make it possible to exit the repository, or to specify absolute paths.
86pub fn component(
87    input: &BStr,
88    mode: Option<component::Mode>,
89    component::Options {
90        protect_windows,
91        protect_hfs,
92        protect_ntfs,
93    }: component::Options,
94) -> Result<&BStr, component::Error> {
95    if input.is_empty() {
96        return Err(component::Error::Empty);
97    }
98    if input == ".." || input == "." {
99        return Err(component::Error::Relative);
100    }
101    if protect_windows {
102        if input.find_byteset(br"/\").is_some() {
103            return Err(component::Error::PathSeparator);
104        }
105        if input.chars().nth(1) == Some(':') {
106            return Err(component::Error::WindowsPathPrefix);
107        }
108    } else if input.find_byte(b'/').is_some() {
109        return Err(component::Error::PathSeparator);
110    }
111    if protect_hfs {
112        if is_dot_hfs(input, "git") {
113            return Err(component::Error::DotGitDir);
114        }
115        if is_symlink(mode) && is_dot_hfs(input, "gitmodules") {
116            return Err(component::Error::SymlinkedGitModules);
117        }
118    }
119
120    if protect_ntfs {
121        if is_dot_git_ntfs(input) {
122            return Err(component::Error::DotGitDir);
123        }
124        if is_symlink(mode) && is_dot_ntfs(input, "gitmodules", "gi7eba") {
125            return Err(component::Error::SymlinkedGitModules);
126        }
127
128        if protect_windows {
129            if let Some(err) = check_win_devices_and_illegal_characters(input) {
130                return Err(err);
131            }
132        }
133    }
134
135    if !(protect_hfs | protect_ntfs) {
136        if input.eq_ignore_ascii_case(b".git") {
137            return Err(component::Error::DotGitDir);
138        }
139        if is_symlink(mode) && input.eq_ignore_ascii_case(b".gitmodules") {
140            return Err(component::Error::SymlinkedGitModules);
141        }
142    }
143    Ok(input)
144}
145
146/// Return `true` if the path component at `input` looks like a Windows device, like `CON`
147/// or `LPT1` (case-insensitively).
148///
149/// This is relevant only on Windows, where one may be tricked into reading or writing to such devices.
150/// When reading from `CON`, a console-program may block until the user provided input.
151pub fn component_is_windows_device(input: &BStr) -> bool {
152    is_win_device(input)
153}
154
155fn is_win_device(input: &BStr) -> bool {
156    let Some(in3) = input.get(..3) else { return false };
157    if in3.eq_ignore_ascii_case(b"AUX") && is_done_windows(input.get(3..)) {
158        return true;
159    }
160    if in3.eq_ignore_ascii_case(b"NUL") && is_done_windows(input.get(3..)) {
161        return true;
162    }
163    if in3.eq_ignore_ascii_case(b"PRN") && is_done_windows(input.get(3..)) {
164        return true;
165    }
166    // Note that the following allows `COM0`, even though `LPT0` is not allowed.
167    // Even though tests seem to indicate that neither `LPT0` nor `COM0` are valid
168    // device names, it's unclear this truly is the case in all possible versions and editions
169    // of Windows.
170    // Hence, justification for this asymmetry is merely to do exactly the same as Git does,
171    // and to have exactly the same behaviour during validation (for worktree-writes).
172    if in3.eq_ignore_ascii_case(b"COM")
173        && input.get(3).is_some_and(|n| *n >= b'1' && *n <= b'9')
174        && is_done_windows(input.get(4..))
175    {
176        return true;
177    }
178    if in3.eq_ignore_ascii_case(b"LPT")
179        && input.get(3).is_some_and(u8::is_ascii_digit)
180        && is_done_windows(input.get(4..))
181    {
182        return true;
183    }
184    if in3.eq_ignore_ascii_case(b"CON")
185        && (is_done_windows(input.get(3..))
186            || (input.get(3..6).is_some_and(|n| n.eq_ignore_ascii_case(b"IN$")) && is_done_windows(input.get(6..)))
187            || (input.get(3..7).is_some_and(|n| n.eq_ignore_ascii_case(b"OUT$")) && is_done_windows(input.get(7..))))
188    {
189        return true;
190    }
191    false
192}
193
194fn check_win_devices_and_illegal_characters(input: &BStr) -> Option<component::Error> {
195    if is_win_device(input) {
196        return Some(component::Error::WindowsReservedName);
197    }
198    if input.iter().any(|b| *b < 0x20 || b":<>\"|?*".contains(b)) {
199        return Some(component::Error::WindowsIllegalCharacter);
200    }
201    if input.ends_with(b".") || input.ends_with(b" ") {
202        return Some(component::Error::WindowsIllegalCharacter);
203    }
204    None
205}
206
207fn is_symlink(mode: Option<component::Mode>) -> bool {
208    mode == Some(component::Mode::Symlink)
209}
210
211fn is_dot_hfs(input: &BStr, search_case_insensitive: &str) -> bool {
212    let mut input = input.chars().filter(|c| match *c as u32 {
213        // Case-insensitive HFS+ skips these code points as "ignorable" when comparing filenames. See:
214        // https://github.com/git/git/commit/6162a1d323d24fd8cbbb1a6145a91fb849b2568f
215        // https://developer.apple.com/library/archive/technotes/tn/tn1150.html#StringComparisonAlgorithm
216        // https://github.com/apple-oss-distributions/hfs/blob/main/core/UCStringCompareData.h
217            0x200c | // ZERO WIDTH NON-JOINER
218            0x200d | // ZERO WIDTH JOINER
219            0x200e | // LEFT-TO-RIGHT MARK
220            0x200f | // RIGHT-TO-LEFT MARK
221            0x202a | // LEFT-TO-RIGHT EMBEDDING
222            0x202b | // RIGHT-TO-LEFT EMBEDDING
223            0x202c | // POP DIRECTIONAL FORMATTING
224            0x202d | // LEFT-TO-RIGHT OVERRIDE
225            0x202e | // RIGHT-TO-LEFT OVERRIDE
226            0x206a | // INHIBIT SYMMETRIC SWAPPING
227            0x206b | // ACTIVATE SYMMETRIC SWAPPING
228            0x206c | // INHIBIT ARABIC FORM SHAPING
229            0x206d | // ACTIVATE ARABIC FORM SHAPING
230            0x206e | // NATIONAL DIGIT SHAPES
231            0x206f | // NOMINAL DIGIT SHAPES
232            0xfeff => false, // ZERO WIDTH NO-BREAK SPACE
233            _ => true
234        });
235    if input.next() != Some('.') {
236        return false;
237    }
238
239    let mut comp = search_case_insensitive.chars();
240    loop {
241        match (comp.next(), input.next()) {
242            (Some(a), Some(b)) => {
243                if !a.eq_ignore_ascii_case(&b) {
244                    return false;
245                }
246            }
247            (None, None) => return true,
248            _ => return false,
249        }
250    }
251}
252
253fn is_dot_git_ntfs(input: &BStr) -> bool {
254    if input.get(..4).is_some_and(|input| input.eq_ignore_ascii_case(b".git")) {
255        return is_done_ntfs(input.get(4..));
256    }
257    if input.get(..5).is_some_and(|input| input.eq_ignore_ascii_case(b"git~1")) {
258        return is_done_ntfs(input.get(5..));
259    }
260    false
261}
262
263/// The `search_case_insensitive` name is the actual name to look for (in a case-insensitive way).
264/// Opposed to that there is the special `ntfs_shortname_prefix` which is derived from `search_case_insensitive`
265/// but looks more like a hash, one that NTFS uses to disambiguate things, for when there is a lot of files
266/// with the same prefix.
267fn is_dot_ntfs(input: &BStr, search_case_insensitive: &str, ntfs_shortname_prefix: &str) -> bool {
268    if input.first() == Some(&b'.') {
269        let end_pos = 1 + search_case_insensitive.len();
270        if input
271            .get(1..end_pos)
272            .is_some_and(|input| input.eq_ignore_ascii_case(search_case_insensitive.as_bytes()))
273        {
274            is_done_ntfs(input.get(end_pos..))
275        } else {
276            false
277        }
278    } else {
279        let search_case_insensitive: &[u8] = search_case_insensitive.as_bytes();
280        if search_case_insensitive
281            .get(..6)
282            .zip(input.get(..6))
283            .is_some_and(|(ntfs_prefix, first_6_of_input)| {
284                first_6_of_input.eq_ignore_ascii_case(ntfs_prefix)
285                    && input.get(6) == Some(&b'~')
286                    // It's notable that only `~1` to `~4` are possible before the disambiguation algorithm
287                    // switches to using the `ntfs_shortname_prefix`, which is checked hereafter.
288                    && input.get(7).is_some_and(|num| (b'1'..=b'4').contains(num))
289            })
290        {
291            return is_done_ntfs(input.get(8..));
292        }
293
294        let ntfs_shortname_prefix: &[u8] = ntfs_shortname_prefix.as_bytes();
295        let mut saw_tilde = false;
296        let mut pos = 0;
297        while pos < 8 {
298            let Some(b) = input.get(pos).copied() else {
299                return false;
300            };
301            if saw_tilde {
302                if !b.is_ascii_digit() {
303                    return false;
304                }
305            } else if b == b'~' {
306                saw_tilde = true;
307                pos += 1;
308                let Some(b) = input.get(pos).copied() else {
309                    return false;
310                };
311                if !(b'1'..=b'9').contains(&b) {
312                    return false;
313                }
314            } else if pos >= 6
315                || b & 0x80 == 0x80
316                || ntfs_shortname_prefix
317                    .get(pos)
318                    .is_none_or(|ob| !b.eq_ignore_ascii_case(ob))
319            {
320                return false;
321            }
322            pos += 1;
323        }
324        is_done_ntfs(input.get(pos..))
325    }
326}
327
328/// Check if trailing filename bytes leave a match to special files like `.git` unchanged in NTFS.
329fn is_done_ntfs(input: Option<&[u8]>) -> bool {
330    // Skip spaces and dots. Then return true if we are at the end or a colon.
331    let Some(input) = input else { return true };
332    for b in input.bytes() {
333        if b == b':' {
334            return true;
335        }
336        if b != b' ' && b != b'.' {
337            return false;
338        }
339    }
340    true
341}
342
343/// Check if trailing filename bytes leave a match to Windows reserved device names unchanged.
344fn is_done_windows(input: Option<&[u8]>) -> bool {
345    // Skip spaces. Then return true if we are at the end or a dot or colon.
346    let Some(input) = input else { return true };
347    let skip = input.bytes().take_while(|b| *b == b' ').count();
348    let Some(next) = input.get(skip) else { return true };
349    *next == b'.' || *next == b':'
350}