git_glob/wildmatch.rs
1use bitflags::bitflags;
2bitflags! {
3 /// The match mode employed in [`Pattern::matches()`][crate::Pattern::matches()].
4 #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
5 pub struct Mode: u8 {
6 /// Let globs like `*` and `?` not match the slash `/` literal, which is useful when matching paths.
7 const NO_MATCH_SLASH_LITERAL = 1 << 0;
8 /// Match case insensitively for ascii characters only.
9 const IGNORE_CASE = 1 << 1;
10 }
11}
12
13pub(crate) mod function {
14 use bstr::{BStr, ByteSlice};
15
16 use crate::wildmatch::Mode;
17
18 #[derive(Eq, PartialEq)]
19 enum Result {
20 Match,
21 NoMatch,
22 AbortAll,
23 AbortToStarStar,
24 }
25
26 const STAR: u8 = b'*';
27 const BACKSLASH: u8 = b'\\';
28 const SLASH: u8 = b'/';
29 const BRACKET_OPEN: u8 = b'[';
30 const BRACKET_CLOSE: u8 = b']';
31 const COLON: u8 = b':';
32
33 const NEGATE_CLASS: u8 = b'!';
34
35 fn match_recursive(pattern: &BStr, text: &BStr, mode: Mode) -> Result {
36 use self::Result::*;
37 let possibly_lowercase = |c: &u8| {
38 if mode.contains(Mode::IGNORE_CASE) {
39 c.to_ascii_lowercase()
40 } else {
41 *c
42 }
43 };
44 let mut p = pattern.iter().map(possibly_lowercase).enumerate().peekable();
45 let mut t = text.iter().map(possibly_lowercase).enumerate();
46
47 while let Some((mut p_idx, mut p_ch)) = p.next() {
48 let (mut t_idx, mut t_ch) = match t.next() {
49 Some(c) => c,
50 None if p_ch != STAR => return AbortAll,
51 None => (text.len(), 0),
52 };
53
54 if p_ch == BACKSLASH {
55 match p.next() {
56 Some((_p_idx, p_ch)) => {
57 if p_ch != t_ch {
58 return NoMatch;
59 } else {
60 continue;
61 }
62 }
63 None => return NoMatch,
64 };
65 }
66 match p_ch {
67 b'?' => {
68 if mode.contains(Mode::NO_MATCH_SLASH_LITERAL) && t_ch == SLASH {
69 return NoMatch;
70 } else {
71 continue;
72 }
73 }
74 STAR => {
75 let mut match_slash = !mode.contains(Mode::NO_MATCH_SLASH_LITERAL);
76 match p.next() {
77 Some((next_p_idx, next_p_ch)) => {
78 let next;
79 if next_p_ch == STAR {
80 let leading_slash_idx = p_idx.checked_sub(1);
81 while p.next_if(|(_, c)| *c == STAR).is_some() {}
82 next = p.next();
83 if !mode.contains(Mode::NO_MATCH_SLASH_LITERAL) {
84 match_slash = true;
85 } else if leading_slash_idx.map_or(true, |idx| pattern[idx] == SLASH)
86 && next.map_or(true, |(_, c)| {
87 c == SLASH || (c == BACKSLASH && p.peek().map(|t| t.1) == Some(SLASH))
88 })
89 {
90 if next.map_or(NoMatch, |(idx, _)| {
91 match_recursive(pattern[idx + 1..].as_bstr(), text[t_idx..].as_bstr(), mode)
92 }) == Match
93 {
94 return Match;
95 }
96 match_slash = true;
97 } else {
98 match_slash = false;
99 }
100 } else {
101 next = Some((next_p_idx, next_p_ch));
102 }
103
104 match next {
105 None => {
106 return if !match_slash && text[t_idx..].contains(&SLASH) {
107 NoMatch
108 } else {
109 Match
110 };
111 }
112 Some((next_p_idx, next_p_ch)) => {
113 p_idx = next_p_idx;
114 p_ch = next_p_ch;
115 if !match_slash && p_ch == SLASH {
116 match text[t_idx..].find_byte(SLASH) {
117 Some(distance_to_slash) => {
118 for _ in t.by_ref().take(distance_to_slash) {}
119 continue;
120 }
121 None => return NoMatch,
122 }
123 }
124 }
125 }
126 }
127 None => {
128 return if !match_slash && text[t_idx..].contains(&SLASH) {
129 NoMatch
130 } else {
131 Match
132 }
133 }
134 }
135
136 return loop {
137 if !crate::parse::GLOB_CHARACTERS.contains(&p_ch) {
138 loop {
139 if (!match_slash && t_ch == SLASH) || t_ch == p_ch {
140 break;
141 }
142 match t.next() {
143 Some(t) => {
144 t_idx = t.0;
145 t_ch = t.1;
146 }
147 None => break,
148 };
149 }
150 if t_ch != p_ch {
151 return NoMatch;
152 }
153 }
154 let res = match_recursive(pattern[p_idx..].as_bstr(), text[t_idx..].as_bstr(), mode);
155 if res != NoMatch {
156 if !match_slash || res != AbortToStarStar {
157 return res;
158 }
159 } else if !match_slash && t_ch == SLASH {
160 return AbortToStarStar;
161 }
162 match t.next() {
163 Some(t) => {
164 t_idx = t.0;
165 t_ch = t.1;
166 }
167 None => break AbortAll,
168 };
169 };
170 }
171 BRACKET_OPEN => {
172 match p.next() {
173 Some(t) => {
174 p_idx = t.0;
175 p_ch = t.1;
176 }
177 None => return AbortAll,
178 };
179
180 if p_ch == b'^' {
181 p_ch = NEGATE_CLASS;
182 }
183 let negated = p_ch == NEGATE_CLASS;
184 let mut next = if negated { p.next() } else { Some((p_idx, p_ch)) };
185 let mut prev_p_ch = 0;
186 let mut matched = false;
187 loop {
188 match next {
189 None => return AbortAll,
190 Some((p_idx, mut p_ch)) => match p_ch {
191 BACKSLASH => match p.next() {
192 Some((_, p_ch)) => {
193 if p_ch == t_ch {
194 matched = true
195 } else {
196 prev_p_ch = p_ch;
197 }
198 }
199 None => return AbortAll,
200 },
201 b'-' if prev_p_ch != 0
202 && p.peek().is_some()
203 && p.peek().map(|t| t.1) != Some(BRACKET_CLOSE) =>
204 {
205 p_ch = p.next().expect("peeked").1;
206 if p_ch == BACKSLASH {
207 p_ch = match p.next() {
208 Some(t) => t.1,
209 None => return AbortAll,
210 };
211 }
212 if t_ch <= p_ch && t_ch >= prev_p_ch {
213 matched = true;
214 } else if mode.contains(Mode::IGNORE_CASE) && t_ch.is_ascii_lowercase() {
215 let t_ch_upper = t_ch.to_ascii_uppercase();
216 if (t_ch_upper <= p_ch.to_ascii_uppercase()
217 && t_ch_upper >= prev_p_ch.to_ascii_uppercase())
218 || (t_ch_upper <= prev_p_ch.to_ascii_uppercase()
219 && t_ch_upper >= p_ch.to_ascii_uppercase())
220 {
221 matched = true;
222 }
223 }
224 prev_p_ch = 0;
225 }
226 BRACKET_OPEN if matches!(p.peek(), Some((_, COLON))) => {
227 p.next();
228 while p.peek().map_or(false, |t| t.1 != BRACKET_CLOSE) {
229 p.next();
230 }
231 let closing_bracket_idx = match p.next() {
232 Some((idx, _)) => idx,
233 None => return AbortAll,
234 };
235 const BRACKET__COLON__BRACKET: usize = 3;
236 if closing_bracket_idx - p_idx < BRACKET__COLON__BRACKET
237 || pattern[closing_bracket_idx - 1] != COLON
238 {
239 if t_ch == BRACKET_OPEN {
240 matched = true
241 }
242 p = pattern[p_idx + 1..]
243 .iter()
244 .map(possibly_lowercase)
245 .enumerate()
246 .peekable();
247 } else {
248 let class = &pattern.as_ref()[p_idx + 2..closing_bracket_idx - 1];
249 match class {
250 b"alnum" => {
251 if t_ch.is_ascii_alphanumeric() {
252 matched = true;
253 }
254 }
255 b"alpha" => {
256 if t_ch.is_ascii_alphabetic() {
257 matched = true;
258 }
259 }
260 b"blank" => {
261 if t_ch.is_ascii_whitespace() {
262 matched = true;
263 }
264 }
265 b"cntrl" => {
266 if t_ch.is_ascii_control() {
267 matched = true;
268 }
269 }
270 b"digit" => {
271 if t_ch.is_ascii_digit() {
272 matched = true;
273 }
274 }
275
276 b"graph" => {
277 if t_ch.is_ascii_graphic() {
278 matched = true;
279 }
280 }
281 b"lower" => {
282 if t_ch.is_ascii_lowercase() {
283 matched = true;
284 }
285 }
286 b"print" => {
287 if (0x20u8..=0x7e).contains(&t_ch) {
288 matched = true;
289 }
290 }
291 b"punct" => {
292 if t_ch.is_ascii_punctuation() {
293 matched = true;
294 }
295 }
296 b"space" => {
297 if t_ch == b' ' {
298 matched = true;
299 }
300 }
301 b"upper" => {
302 if t_ch.is_ascii_uppercase()
303 || mode.contains(Mode::IGNORE_CASE) && t_ch.is_ascii_lowercase()
304 {
305 matched = true;
306 }
307 }
308 b"xdigit" => {
309 if t_ch.is_ascii_hexdigit() {
310 matched = true;
311 }
312 }
313 _ => return AbortAll,
314 };
315 prev_p_ch = 0;
316 }
317 }
318 _ => {
319 prev_p_ch = p_ch;
320 if p_ch == t_ch {
321 matched = true;
322 }
323 }
324 },
325 };
326 next = p.next();
327 if let Some((_, BRACKET_CLOSE)) = next {
328 break;
329 }
330 }
331 if matched == negated || mode.contains(Mode::NO_MATCH_SLASH_LITERAL) && t_ch == SLASH {
332 return NoMatch;
333 }
334 continue;
335 }
336 non_glob_ch => {
337 if non_glob_ch != t_ch {
338 return NoMatch;
339 } else {
340 continue;
341 }
342 }
343 }
344 }
345 t.next().map(|_| NoMatch).unwrap_or(Match)
346 }
347
348 /// Employ pattern matching to see if `value` matches `pattern`.
349 ///
350 /// `mode` can be used to adjust the way the matching is performed.
351 pub fn wildmatch(pattern: &BStr, value: &BStr, mode: Mode) -> bool {
352 match_recursive(pattern, value, mode) == Result::Match
353 }
354}