git_glob/
wildmatch.rs

1use bitflags::bitflags;
2bitflags! {
3    /// The match mode employed in [`Pattern::matches()`][crate::Pattern::matches()].
4    #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
5    pub struct Mode: u8 {
6        /// Let globs like `*` and `?` not match the slash `/` literal, which is useful when matching paths.
7        const NO_MATCH_SLASH_LITERAL = 1 << 0;
8        /// Match case insensitively for ascii characters only.
9        const IGNORE_CASE = 1 << 1;
10    }
11}
12
13pub(crate) mod function {
14    use bstr::{BStr, ByteSlice};
15
16    use crate::wildmatch::Mode;
17
18    #[derive(Eq, PartialEq)]
19    enum Result {
20        Match,
21        NoMatch,
22        AbortAll,
23        AbortToStarStar,
24    }
25
26    const STAR: u8 = b'*';
27    const BACKSLASH: u8 = b'\\';
28    const SLASH: u8 = b'/';
29    const BRACKET_OPEN: u8 = b'[';
30    const BRACKET_CLOSE: u8 = b']';
31    const COLON: u8 = b':';
32
33    const NEGATE_CLASS: u8 = b'!';
34
35    fn match_recursive(pattern: &BStr, text: &BStr, mode: Mode) -> Result {
36        use self::Result::*;
37        let possibly_lowercase = |c: &u8| {
38            if mode.contains(Mode::IGNORE_CASE) {
39                c.to_ascii_lowercase()
40            } else {
41                *c
42            }
43        };
44        let mut p = pattern.iter().map(possibly_lowercase).enumerate().peekable();
45        let mut t = text.iter().map(possibly_lowercase).enumerate();
46
47        while let Some((mut p_idx, mut p_ch)) = p.next() {
48            let (mut t_idx, mut t_ch) = match t.next() {
49                Some(c) => c,
50                None if p_ch != STAR => return AbortAll,
51                None => (text.len(), 0),
52            };
53
54            if p_ch == BACKSLASH {
55                match p.next() {
56                    Some((_p_idx, p_ch)) => {
57                        if p_ch != t_ch {
58                            return NoMatch;
59                        } else {
60                            continue;
61                        }
62                    }
63                    None => return NoMatch,
64                };
65            }
66            match p_ch {
67                b'?' => {
68                    if mode.contains(Mode::NO_MATCH_SLASH_LITERAL) && t_ch == SLASH {
69                        return NoMatch;
70                    } else {
71                        continue;
72                    }
73                }
74                STAR => {
75                    let mut match_slash = !mode.contains(Mode::NO_MATCH_SLASH_LITERAL);
76                    match p.next() {
77                        Some((next_p_idx, next_p_ch)) => {
78                            let next;
79                            if next_p_ch == STAR {
80                                let leading_slash_idx = p_idx.checked_sub(1);
81                                while p.next_if(|(_, c)| *c == STAR).is_some() {}
82                                next = p.next();
83                                if !mode.contains(Mode::NO_MATCH_SLASH_LITERAL) {
84                                    match_slash = true;
85                                } else if leading_slash_idx.map_or(true, |idx| pattern[idx] == SLASH)
86                                    && next.map_or(true, |(_, c)| {
87                                        c == SLASH || (c == BACKSLASH && p.peek().map(|t| t.1) == Some(SLASH))
88                                    })
89                                {
90                                    if next.map_or(NoMatch, |(idx, _)| {
91                                        match_recursive(pattern[idx + 1..].as_bstr(), text[t_idx..].as_bstr(), mode)
92                                    }) == Match
93                                    {
94                                        return Match;
95                                    }
96                                    match_slash = true;
97                                } else {
98                                    match_slash = false;
99                                }
100                            } else {
101                                next = Some((next_p_idx, next_p_ch));
102                            }
103
104                            match next {
105                                None => {
106                                    return if !match_slash && text[t_idx..].contains(&SLASH) {
107                                        NoMatch
108                                    } else {
109                                        Match
110                                    };
111                                }
112                                Some((next_p_idx, next_p_ch)) => {
113                                    p_idx = next_p_idx;
114                                    p_ch = next_p_ch;
115                                    if !match_slash && p_ch == SLASH {
116                                        match text[t_idx..].find_byte(SLASH) {
117                                            Some(distance_to_slash) => {
118                                                for _ in t.by_ref().take(distance_to_slash) {}
119                                                continue;
120                                            }
121                                            None => return NoMatch,
122                                        }
123                                    }
124                                }
125                            }
126                        }
127                        None => {
128                            return if !match_slash && text[t_idx..].contains(&SLASH) {
129                                NoMatch
130                            } else {
131                                Match
132                            }
133                        }
134                    }
135
136                    return loop {
137                        if !crate::parse::GLOB_CHARACTERS.contains(&p_ch) {
138                            loop {
139                                if (!match_slash && t_ch == SLASH) || t_ch == p_ch {
140                                    break;
141                                }
142                                match t.next() {
143                                    Some(t) => {
144                                        t_idx = t.0;
145                                        t_ch = t.1;
146                                    }
147                                    None => break,
148                                };
149                            }
150                            if t_ch != p_ch {
151                                return NoMatch;
152                            }
153                        }
154                        let res = match_recursive(pattern[p_idx..].as_bstr(), text[t_idx..].as_bstr(), mode);
155                        if res != NoMatch {
156                            if !match_slash || res != AbortToStarStar {
157                                return res;
158                            }
159                        } else if !match_slash && t_ch == SLASH {
160                            return AbortToStarStar;
161                        }
162                        match t.next() {
163                            Some(t) => {
164                                t_idx = t.0;
165                                t_ch = t.1;
166                            }
167                            None => break AbortAll,
168                        };
169                    };
170                }
171                BRACKET_OPEN => {
172                    match p.next() {
173                        Some(t) => {
174                            p_idx = t.0;
175                            p_ch = t.1;
176                        }
177                        None => return AbortAll,
178                    };
179
180                    if p_ch == b'^' {
181                        p_ch = NEGATE_CLASS;
182                    }
183                    let negated = p_ch == NEGATE_CLASS;
184                    let mut next = if negated { p.next() } else { Some((p_idx, p_ch)) };
185                    let mut prev_p_ch = 0;
186                    let mut matched = false;
187                    loop {
188                        match next {
189                            None => return AbortAll,
190                            Some((p_idx, mut p_ch)) => match p_ch {
191                                BACKSLASH => match p.next() {
192                                    Some((_, p_ch)) => {
193                                        if p_ch == t_ch {
194                                            matched = true
195                                        } else {
196                                            prev_p_ch = p_ch;
197                                        }
198                                    }
199                                    None => return AbortAll,
200                                },
201                                b'-' if prev_p_ch != 0
202                                    && p.peek().is_some()
203                                    && p.peek().map(|t| t.1) != Some(BRACKET_CLOSE) =>
204                                {
205                                    p_ch = p.next().expect("peeked").1;
206                                    if p_ch == BACKSLASH {
207                                        p_ch = match p.next() {
208                                            Some(t) => t.1,
209                                            None => return AbortAll,
210                                        };
211                                    }
212                                    if t_ch <= p_ch && t_ch >= prev_p_ch {
213                                        matched = true;
214                                    } else if mode.contains(Mode::IGNORE_CASE) && t_ch.is_ascii_lowercase() {
215                                        let t_ch_upper = t_ch.to_ascii_uppercase();
216                                        if (t_ch_upper <= p_ch.to_ascii_uppercase()
217                                            && t_ch_upper >= prev_p_ch.to_ascii_uppercase())
218                                            || (t_ch_upper <= prev_p_ch.to_ascii_uppercase()
219                                                && t_ch_upper >= p_ch.to_ascii_uppercase())
220                                        {
221                                            matched = true;
222                                        }
223                                    }
224                                    prev_p_ch = 0;
225                                }
226                                BRACKET_OPEN if matches!(p.peek(), Some((_, COLON))) => {
227                                    p.next();
228                                    while p.peek().map_or(false, |t| t.1 != BRACKET_CLOSE) {
229                                        p.next();
230                                    }
231                                    let closing_bracket_idx = match p.next() {
232                                        Some((idx, _)) => idx,
233                                        None => return AbortAll,
234                                    };
235                                    const BRACKET__COLON__BRACKET: usize = 3;
236                                    if closing_bracket_idx - p_idx < BRACKET__COLON__BRACKET
237                                        || pattern[closing_bracket_idx - 1] != COLON
238                                    {
239                                        if t_ch == BRACKET_OPEN {
240                                            matched = true
241                                        }
242                                        p = pattern[p_idx + 1..]
243                                            .iter()
244                                            .map(possibly_lowercase)
245                                            .enumerate()
246                                            .peekable();
247                                    } else {
248                                        let class = &pattern.as_ref()[p_idx + 2..closing_bracket_idx - 1];
249                                        match class {
250                                            b"alnum" => {
251                                                if t_ch.is_ascii_alphanumeric() {
252                                                    matched = true;
253                                                }
254                                            }
255                                            b"alpha" => {
256                                                if t_ch.is_ascii_alphabetic() {
257                                                    matched = true;
258                                                }
259                                            }
260                                            b"blank" => {
261                                                if t_ch.is_ascii_whitespace() {
262                                                    matched = true;
263                                                }
264                                            }
265                                            b"cntrl" => {
266                                                if t_ch.is_ascii_control() {
267                                                    matched = true;
268                                                }
269                                            }
270                                            b"digit" => {
271                                                if t_ch.is_ascii_digit() {
272                                                    matched = true;
273                                                }
274                                            }
275
276                                            b"graph" => {
277                                                if t_ch.is_ascii_graphic() {
278                                                    matched = true;
279                                                }
280                                            }
281                                            b"lower" => {
282                                                if t_ch.is_ascii_lowercase() {
283                                                    matched = true;
284                                                }
285                                            }
286                                            b"print" => {
287                                                if (0x20u8..=0x7e).contains(&t_ch) {
288                                                    matched = true;
289                                                }
290                                            }
291                                            b"punct" => {
292                                                if t_ch.is_ascii_punctuation() {
293                                                    matched = true;
294                                                }
295                                            }
296                                            b"space" => {
297                                                if t_ch == b' ' {
298                                                    matched = true;
299                                                }
300                                            }
301                                            b"upper" => {
302                                                if t_ch.is_ascii_uppercase()
303                                                    || mode.contains(Mode::IGNORE_CASE) && t_ch.is_ascii_lowercase()
304                                                {
305                                                    matched = true;
306                                                }
307                                            }
308                                            b"xdigit" => {
309                                                if t_ch.is_ascii_hexdigit() {
310                                                    matched = true;
311                                                }
312                                            }
313                                            _ => return AbortAll,
314                                        };
315                                        prev_p_ch = 0;
316                                    }
317                                }
318                                _ => {
319                                    prev_p_ch = p_ch;
320                                    if p_ch == t_ch {
321                                        matched = true;
322                                    }
323                                }
324                            },
325                        };
326                        next = p.next();
327                        if let Some((_, BRACKET_CLOSE)) = next {
328                            break;
329                        }
330                    }
331                    if matched == negated || mode.contains(Mode::NO_MATCH_SLASH_LITERAL) && t_ch == SLASH {
332                        return NoMatch;
333                    }
334                    continue;
335                }
336                non_glob_ch => {
337                    if non_glob_ch != t_ch {
338                        return NoMatch;
339                    } else {
340                        continue;
341                    }
342                }
343            }
344        }
345        t.next().map(|_| NoMatch).unwrap_or(Match)
346    }
347
348    /// Employ pattern matching to see if `value` matches `pattern`.
349    ///
350    /// `mode` can be used to adjust the way the matching is performed.
351    pub fn wildmatch(pattern: &BStr, value: &BStr, mode: Mode) -> bool {
352        match_recursive(pattern, value, mode) == Result::Match
353    }
354}