fast_glob/
lib.rs

1//! `fast-glob` is a high-performance glob matching crate for Rust, originally forked from [`devongovett/glob-match`](https://github.com/devongovett/glob-match).
2//! This crate provides efficient glob pattern matching with support for multi-pattern matching and brace expansion.
3//!
4//! ## Key Features
5//!
6//! - Up to 60% performance improvement.
7//! - Support for more complex and efficient brace expansion.
8//! - Fixed matching issues with wildcard and globstar [`glob-match/issues#9`](https://github.com/devongovett/glob-match/issues/9).
9//!
10//! ## Examples
11//!
12//! ```rust
13//! use fast_glob::glob_match;
14//!
15//! let glob = "some/**/n*d[k-m]e?txt";
16//! let path = "some/a/bigger/path/to/the/crazy/needle.txt";
17//!
18//! assert!(glob_match(glob, path));
19//! ```
20//!
21//! ## Syntax
22//!
23//! `fast-glob` supports the following glob pattern syntax:
24//!
25//! | Syntax  | Meaning                                                                                                                                                                                             |
26//! | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
27//! | `?`     | Matches any single character.                                                                                                                                                                       |
28//! | `*`     | Matches zero or more characters, except for path separators (e.g., `/`).                                                                                                                             |
29//! | `**`    | Matches zero or more characters, including path separators. Must match a complete path segment (i.e., followed by a `/` or the end of the pattern).                                                  |
30//! | `[ab]`  | Matches one of the characters contained in the brackets. Character ranges, e.g., `[a-z]`, are also supported. Use `[!ab]` or `[^ab]` to match any character _except_ those contained in the brackets. |
31//! | `{a,b}` | Matches one of the patterns contained in the braces. Any of the wildcard characters can be used in the sub-patterns. Braces may be nested up to 10 levels deep.                                     |
32//! | `!`     | When at the start of the glob, this negates the result. Multiple `!` characters negate the glob multiple times.                                                                                     |
33//! | `\`     | A backslash character may be used to escape any of the above special characters.                                                                                                                    |
34//!
35//! ---
36//!
37//! For detailed usage and API reference, refer to the specific function and struct documentation.
38//!
39//! For any issues or contributions, please visit the [GitHub repository](https://github.com/shulaoda/fast-glob).
40
41/**
42 * The following code is modified based on
43 * https://github.com/devongovett/glob-match/blob/d5a6c67/src/lib.rs
44 *
45 * MIT Licensed
46 * Copyright (c) 2023 Devon Govett
47 * https://github.com/devongovett/glob-match/tree/main/LICENSE
48 */
49use std::path::is_separator;
50
51use arrayvec::ArrayVec;
52
53#[derive(Clone, Debug, Default)]
54struct State {
55    path_index: usize,
56    glob_index: usize,
57    brace_depth: usize,
58
59    wildcard: Wildcard,
60    globstar: Wildcard,
61}
62
63#[derive(Clone, Copy, Debug, Default)]
64struct Wildcard {
65    glob_index: u32,
66    path_index: u32,
67    brace_depth: u32,
68}
69
70type BraceStack = ArrayVec<(u32, u32), 10>;
71
72pub fn glob_match(glob: impl AsRef<[u8]>, path: impl AsRef<[u8]>) -> bool {
73    let glob = glob.as_ref();
74    let path = path.as_ref();
75    glob_match_impl(glob, path)
76}
77
78fn glob_match_impl(glob: &[u8], path: &[u8]) -> bool {
79    let mut state = State::default();
80
81    let mut negated = false;
82    while state.glob_index < glob.len() && glob[state.glob_index] == b'!' {
83        negated = !negated;
84        state.glob_index += 1;
85    }
86
87    let mut brace_stack = ArrayVec::<_, 10>::new();
88    let matched = state.glob_match_from(glob, path, 0, &mut brace_stack);
89
90    negated ^ matched
91}
92
93#[inline(always)]
94fn unescape(c: &mut u8, glob: &[u8], state: &mut State) -> bool {
95    if *c == b'\\' {
96        state.glob_index += 1;
97        if state.glob_index >= glob.len() {
98            return false;
99        }
100        *c = match glob[state.glob_index] {
101            b'a' => b'\x61',
102            b'b' => b'\x08',
103            b'n' => b'\n',
104            b'r' => b'\r',
105            b't' => b'\t',
106            c => c,
107        }
108    }
109    true
110}
111
112impl State {
113    #[inline(always)]
114    fn backtrack(&mut self) {
115        self.glob_index = self.wildcard.glob_index as usize;
116        self.path_index = self.wildcard.path_index as usize;
117        self.brace_depth = self.wildcard.brace_depth as usize;
118    }
119
120    #[inline(always)]
121    fn skip_globstars(&mut self, glob: &[u8]) {
122        let mut glob_index = self.glob_index + 2;
123
124        while glob_index + 4 <= glob.len() && &glob[glob_index..glob_index + 4] == b"/**/" {
125            glob_index += 3;
126        }
127
128        if &glob[glob_index..] == b"/**" {
129            glob_index += 3;
130        }
131
132        self.glob_index = glob_index - 2;
133    }
134
135    #[inline(always)]
136    fn skip_to_separator(&mut self, path: &[u8], is_end_invalid: bool) {
137        if self.path_index == path.len() {
138            self.wildcard.path_index += 1;
139            return;
140        }
141
142        let mut path_index = self.path_index;
143        while path_index < path.len() && !is_separator(path[path_index] as char) {
144            path_index += 1;
145        }
146
147        if is_end_invalid || path_index != path.len() {
148            path_index += 1;
149        }
150
151        self.wildcard.path_index = path_index as u32;
152        self.globstar = self.wildcard;
153    }
154
155    #[inline(always)]
156    fn skip_branch(&mut self, glob: &[u8]) {
157        let mut in_brackets = false;
158        let end_brace_depth = self.brace_depth - 1;
159        while self.glob_index < glob.len() {
160            match glob[self.glob_index] {
161                b'{' if !in_brackets => self.brace_depth += 1,
162                b'}' if !in_brackets => {
163                    self.brace_depth -= 1;
164                    if self.brace_depth == end_brace_depth {
165                        self.glob_index += 1;
166                        return;
167                    }
168                }
169                b'[' if !in_brackets => in_brackets = true,
170                b']' => in_brackets = false,
171                b'\\' => self.glob_index += 1,
172                _ => (),
173            }
174            self.glob_index += 1;
175        }
176    }
177
178    fn match_brace_branch(
179        &self,
180        glob: &[u8],
181        path: &[u8],
182        open_brace_index: usize,
183        branch_index: usize,
184        brace_stack: &mut BraceStack,
185    ) -> bool {
186        brace_stack.push((open_brace_index as u32, branch_index as u32));
187
188        let mut branch_state = self.clone();
189        branch_state.glob_index = branch_index;
190        branch_state.brace_depth = brace_stack.len();
191
192        let matched = branch_state.glob_match_from(glob, path, branch_index, brace_stack);
193
194        brace_stack.pop();
195
196        matched
197    }
198
199    fn match_brace(&mut self, glob: &[u8], path: &[u8], brace_stack: &mut BraceStack) -> bool {
200        let mut brace_depth = 0;
201        let mut in_brackets = false;
202
203        let open_brace_index = self.glob_index;
204
205        let mut branch_index = 0;
206
207        while self.glob_index < glob.len() {
208            match glob[self.glob_index] {
209                b'{' if !in_brackets => {
210                    brace_depth += 1;
211                    if brace_depth == 1 {
212                        branch_index = self.glob_index + 1;
213                    }
214                }
215                b'}' if !in_brackets => {
216                    brace_depth -= 1;
217                    if brace_depth == 0 {
218                        if self.match_brace_branch(
219                            glob,
220                            path,
221                            open_brace_index,
222                            branch_index,
223                            brace_stack,
224                        ) {
225                            return true;
226                        }
227                        break;
228                    }
229                }
230                b',' if brace_depth == 1 => {
231                    if self.match_brace_branch(
232                        glob,
233                        path,
234                        open_brace_index,
235                        branch_index,
236                        brace_stack,
237                    ) {
238                        return true;
239                    }
240                    branch_index = self.glob_index + 1;
241                }
242                b'[' if !in_brackets => in_brackets = true,
243                b']' => in_brackets = false,
244                b'\\' => self.glob_index += 1,
245                _ => (),
246            }
247            self.glob_index += 1;
248        }
249
250        false
251    }
252
253    #[inline(always)]
254    fn glob_match_from(
255        &mut self,
256        glob: &[u8],
257        path: &[u8],
258        match_start: usize,
259        brace_stack: &mut BraceStack,
260    ) -> bool {
261        while self.glob_index < glob.len() || self.path_index < path.len() {
262            if self.glob_index < glob.len() {
263                match glob[self.glob_index] {
264                    b'*' => {
265                        let is_globstar =
266                            self.glob_index + 1 < glob.len() && glob[self.glob_index + 1] == b'*';
267                        if is_globstar {
268                            self.skip_globstars(glob);
269                        }
270
271                        self.wildcard.glob_index = self.glob_index as u32;
272                        self.wildcard.path_index = self.path_index as u32 + 1;
273                        self.wildcard.brace_depth = self.brace_depth as u32;
274
275                        let mut in_globstar = false;
276                        if is_globstar {
277                            self.glob_index += 2;
278
279                            let is_end_invalid = self.glob_index != glob.len();
280
281                            if (self.glob_index.saturating_sub(match_start) < 3
282                                || glob[self.glob_index - 3] == b'/')
283                                && (!is_end_invalid || glob[self.glob_index] == b'/')
284                            {
285                                if is_end_invalid {
286                                    self.glob_index += 1;
287                                }
288
289                                self.skip_to_separator(path, is_end_invalid);
290                                in_globstar = true;
291                            }
292                        } else {
293                            self.glob_index += 1;
294                        }
295
296                        if !in_globstar
297                            && self.path_index < path.len()
298                            && is_separator(path[self.path_index] as char)
299                        {
300                            self.wildcard = self.globstar;
301                        }
302
303                        continue;
304                    }
305                    b'?' if self.path_index < path.len() => {
306                        if !is_separator(path[self.path_index] as char) {
307                            self.glob_index += 1;
308                            self.path_index += 1;
309                            continue;
310                        }
311                    }
312                    b'[' if self.path_index < path.len() => {
313                        self.glob_index += 1;
314
315                        let mut negated = false;
316                        if self.glob_index < glob.len()
317                            && matches!(glob[self.glob_index], b'^' | b'!')
318                        {
319                            negated = true;
320                            self.glob_index += 1;
321                        }
322
323                        let mut first = true;
324                        let mut is_match = false;
325                        let c = path[self.path_index];
326                        while self.glob_index < glob.len()
327                            && (first || glob[self.glob_index] != b']')
328                        {
329                            let mut low = glob[self.glob_index];
330                            if !unescape(&mut low, glob, self) {
331                                return false;
332                            }
333
334                            self.glob_index += 1;
335
336                            let high = if self.glob_index + 1 < glob.len()
337                                && glob[self.glob_index] == b'-'
338                                && glob[self.glob_index + 1] != b']'
339                            {
340                                self.glob_index += 1;
341
342                                let mut high = glob[self.glob_index];
343                                if !unescape(&mut high, glob, self) {
344                                    return false;
345                                }
346
347                                self.glob_index += 1;
348                                high
349                            } else {
350                                low
351                            };
352
353                            if low <= c && c <= high {
354                                is_match = true;
355                            }
356
357                            first = false;
358                        }
359
360                        if self.glob_index >= glob.len() {
361                            return false;
362                        }
363
364                        self.glob_index += 1;
365                        if is_match != negated {
366                            self.path_index += 1;
367                            continue;
368                        }
369                    }
370                    b'{' => {
371                        if let Some((_, branch_index)) =
372                            brace_stack.iter().find(|(open_brace_index, _)| {
373                                *open_brace_index == self.glob_index as u32
374                            })
375                        {
376                            self.glob_index = *branch_index as usize;
377                            self.brace_depth += 1;
378                            continue;
379                        }
380                        return self.match_brace(glob, path, brace_stack);
381                    }
382                    b',' | b'}' if self.brace_depth > 0 => {
383                        self.skip_branch(glob);
384                        continue;
385                    }
386                    mut c if self.path_index < path.len() => {
387                        if !unescape(&mut c, glob, self) {
388                            return false;
389                        }
390
391                        let is_match = if c == b'/' {
392                            is_separator(path[self.path_index] as char)
393                        } else {
394                            path[self.path_index] == c
395                        };
396
397                        if is_match {
398                            self.glob_index += 1;
399                            self.path_index += 1;
400
401                            if c == b'/' {
402                                self.wildcard = self.globstar;
403                            }
404
405                            continue;
406                        }
407                    }
408                    _ => {}
409                }
410            }
411
412            if self.wildcard.path_index > 0 && self.wildcard.path_index <= path.len() as u32 {
413                self.backtrack();
414                continue;
415            }
416
417            return false;
418        }
419
420        true
421    }
422}