git_glob/
parse.rs

1use bstr::{BString, ByteSlice};
2
3use crate::{pattern, pattern::Mode};
4
5#[inline]
6/// A sloppy parser that performs only the most basic checks, providing additional information
7/// using `pattern::Mode` flags.
8///
9/// Returns `(pattern, mode, no_wildcard_len)`
10pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, Option<usize>)> {
11    let mut mode = Mode::empty();
12    if pat.is_empty() {
13        return None;
14    };
15    if pat.first() == Some(&b'!') {
16        mode |= Mode::NEGATIVE;
17        pat = &pat[1..];
18    } else if pat.first() == Some(&b'\\') {
19        let second = pat.get(1);
20        if second == Some(&b'!') || second == Some(&b'#') {
21            pat = &pat[1..];
22        }
23    }
24    if pat.iter().all(|b| b.is_ascii_whitespace()) {
25        return None;
26    }
27    if pat.first() == Some(&b'/') {
28        mode |= Mode::ABSOLUTE;
29        pat = &pat[1..];
30    }
31    let mut pat = truncate_non_escaped_trailing_spaces(pat);
32    if pat.last() == Some(&b'/') {
33        mode |= Mode::MUST_BE_DIR;
34        pat.pop();
35    }
36
37    if !pat.contains(&b'/') {
38        mode |= Mode::NO_SUB_DIR;
39    }
40    if pat.first() == Some(&b'*') && first_wildcard_pos(&pat[1..]).is_none() {
41        mode |= Mode::ENDS_WITH;
42    }
43
44    let pos_of_first_wildcard = first_wildcard_pos(&pat);
45    Some((pat, mode, pos_of_first_wildcard))
46}
47
48fn first_wildcard_pos(pat: &[u8]) -> Option<usize> {
49    pat.find_byteset(GLOB_CHARACTERS)
50}
51
52pub(crate) const GLOB_CHARACTERS: &[u8] = br"*?[\";
53
54/// We always copy just because that's ultimately needed anyway, not because we always have to.
55fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> BString {
56    match buf.rfind_not_byteset(br"\ ") {
57        Some(pos) if pos + 1 == buf.len() => buf.into(), // does not end in (escaped) whitespace
58        None => buf.into(),
59        Some(start_of_non_space) => {
60            // This seems a bit strange but attempts to recreate the git implementation while
61            // actually removing the escape characters before spaces. We leave other backslashes
62            // for escapes to be handled by `glob/globset`.
63            let mut res: BString = buf[..start_of_non_space + 1].into();
64
65            let mut trailing_bytes = buf[start_of_non_space + 1..].iter();
66            let mut bare_spaces = 0;
67            while let Some(b) = trailing_bytes.next() {
68                match b {
69                    b' ' => {
70                        bare_spaces += 1;
71                    }
72                    b'\\' => {
73                        res.extend(std::iter::repeat(b' ').take(bare_spaces));
74                        bare_spaces = 0;
75                        // Skip what follows, like git does, but keep spaces if possible.
76                        if trailing_bytes.next() == Some(&b' ') {
77                            res.push(b' ');
78                        }
79                    }
80                    _ => unreachable!("BUG: this must be either backslash or space"),
81                }
82            }
83            res
84        }
85    }
86}