Skip to main content

fast_glob/
lib.rs

1//! `fast-glob` is a high-performance glob matching crate for Rust, originally forked from [`devongovett/glob-match`](https://github.com/devongovett/glob-match).
2//! This crate provides efficient glob pattern matching with support for multi-pattern matching and brace expansion.
3//!
4//! ## Key Features
5//!
6//! - Up to 60% performance improvement.
7//! - Support for more complex and efficient brace expansion.
8//! - Fixed matching issues with wildcard and globstar [`glob-match/issues#9`](https://github.com/devongovett/glob-match/issues/9).
9//!
10//! ## Examples
11//!
12//! ```rust
13//! use fast_glob::glob_match;
14//!
15//! let glob = "some/**/n*d[k-m]e?txt";
16//! let path = "some/a/bigger/path/to/the/crazy/needle.txt";
17//!
18//! assert!(glob_match(glob, path));
19//! ```
20//!
21//! ## Syntax
22//!
23//! `fast-glob` supports the following glob pattern syntax:
24//!
25//! | Syntax  | Meaning                                                                                                                                                                                             |
26//! | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
27//! | `?`     | Matches any single character.                                                                                                                                                                       |
28//! | `*`     | Matches zero or more characters, except for path separators (e.g., `/`).                                                                                                                             |
29//! | `**`    | Matches zero or more characters, including path separators. Must match a complete path segment (i.e., followed by a `/` or the end of the pattern).                                                  |
30//! | `[ab]`  | Matches one of the characters contained in the brackets. Character ranges, e.g., `[a-z]`, are also supported. Use `[!ab]` or `[^ab]` to match any character _except_ those contained in the brackets. |
31//! | `{a,b}` | Matches one of the patterns contained in the braces. Any of the wildcard characters can be used in the sub-patterns. Braces may be nested up to 10 levels deep.                                     |
32//! | `!`     | When at the start of the glob, this negates the result. Multiple `!` characters negate the glob multiple times.                                                                                     |
33//! | `\`     | A backslash character may be used to escape any of the above special characters.                                                                                                                    |
34//!
35//! ---
36//!
37//! For detailed usage and API reference, refer to the specific function and struct documentation.
38//!
39//! For any issues or contributions, please visit the [GitHub repository](https://github.com/oxc-project/fast-glob).
40
41/**
42 * The following code is modified based on
43 * https://github.com/devongovett/glob-match/blob/d5a6c67/src/lib.rs
44 *
45 * MIT Licensed
46 * Copyright (c) 2023 Devon Govett
47 * https://github.com/devongovett/glob-match/tree/main/LICENSE
48 */
49use std::path::is_separator;
50
51use arrayvec::ArrayVec;
52
53#[derive(Clone, Debug, Default)]
54struct State {
55    path_index: usize,
56    glob_index: usize,
57    brace_depth: usize,
58
59    wildcard: Wildcard,
60    globstar: Wildcard,
61}
62
63#[derive(Clone, Copy, Debug, Default)]
64struct Wildcard {
65    glob_index: u32,
66    path_index: u32,
67    brace_depth: u32,
68}
69
70type BraceStack = ArrayVec<(u32, u32), 10>;
71
72pub fn glob_match(glob: impl AsRef<[u8]>, path: impl AsRef<[u8]>) -> bool {
73    let glob = glob.as_ref();
74    let path = path.as_ref();
75    glob_match_impl(glob, path)
76}
77
78fn glob_match_impl(glob: &[u8], path: &[u8]) -> bool {
79    let mut state = State::default();
80
81    let mut negated = false;
82    while state.glob_index < glob.len() && glob[state.glob_index] == b'!' {
83        negated = !negated;
84        state.glob_index += 1;
85    }
86
87    let mut brace_stack = ArrayVec::<_, 10>::new();
88    let mut invalid_pattern = false;
89    let matched = state.glob_match_from(glob, path, 0, &mut brace_stack, &mut invalid_pattern);
90    if invalid_pattern {
91        return false;
92    }
93
94    negated ^ matched
95}
96
97#[inline(always)]
98fn unescape(c: &mut u8, glob: &[u8], state: &mut State) -> bool {
99    if *c == b'\\' {
100        state.glob_index += 1;
101        if state.glob_index >= glob.len() {
102            return false;
103        }
104        *c = match glob[state.glob_index] {
105            b'a' => b'\x61',
106            b'b' => b'\x08',
107            b'n' => b'\n',
108            b'r' => b'\r',
109            b't' => b'\t',
110            c => c,
111        }
112    }
113    true
114}
115
116impl State {
117    #[inline(always)]
118    fn backtrack(&mut self) {
119        self.glob_index = self.wildcard.glob_index as usize;
120        self.path_index = self.wildcard.path_index as usize;
121        self.brace_depth = self.wildcard.brace_depth as usize;
122    }
123
124    #[inline(always)]
125    fn skip_globstars(&mut self, glob: &[u8]) {
126        let mut glob_index = self.glob_index + 2;
127
128        while glob_index + 4 <= glob.len() && &glob[glob_index..glob_index + 4] == b"/**/" {
129            glob_index += 3;
130        }
131
132        if &glob[glob_index..] == b"/**" {
133            glob_index += 3;
134        }
135
136        self.glob_index = glob_index - 2;
137    }
138
139    #[inline(always)]
140    fn skip_to_separator(&mut self, path: &[u8], is_end_invalid: bool) {
141        if self.path_index == path.len() {
142            self.wildcard.path_index += 1;
143            return;
144        }
145
146        let mut path_index = self.path_index;
147        while path_index < path.len() && !is_separator(path[path_index] as char) {
148            path_index += 1;
149        }
150
151        if is_end_invalid || path_index != path.len() {
152            path_index += 1;
153        }
154
155        self.wildcard.path_index = path_index as u32;
156        self.globstar = self.wildcard;
157    }
158
159    #[inline(always)]
160    fn skip_branch(&mut self, glob: &[u8]) {
161        let mut in_brackets = false;
162        let end_brace_depth = self.brace_depth - 1;
163        while self.glob_index < glob.len() {
164            match glob[self.glob_index] {
165                b'{' if !in_brackets => self.brace_depth += 1,
166                b'}' if !in_brackets => {
167                    self.brace_depth -= 1;
168                    if self.brace_depth == end_brace_depth {
169                        self.glob_index += 1;
170                        return;
171                    }
172                }
173                b'[' if !in_brackets => in_brackets = true,
174                b']' => in_brackets = false,
175                b'\\' => self.glob_index += 1,
176                _ => (),
177            }
178            self.glob_index += 1;
179        }
180    }
181
182    fn match_brace_branch(
183        &self,
184        glob: &[u8],
185        path: &[u8],
186        open_brace_index: usize,
187        branch_index: usize,
188        brace_stack: &mut BraceStack,
189        invalid_pattern: &mut bool,
190    ) -> bool {
191        // Gracefully reject brace expansions deeper than BraceStack capacity.
192        if brace_stack.try_push((open_brace_index as u32, branch_index as u32)).is_err() {
193            *invalid_pattern = true;
194            return false;
195        }
196
197        let mut branch_state = self.clone();
198        branch_state.glob_index = branch_index;
199        branch_state.brace_depth = brace_stack.len();
200
201        let matched =
202            branch_state.glob_match_from(glob, path, branch_index, brace_stack, invalid_pattern);
203
204        brace_stack.pop();
205
206        matched
207    }
208
209    fn match_brace(
210        &mut self,
211        glob: &[u8],
212        path: &[u8],
213        brace_stack: &mut BraceStack,
214        invalid_pattern: &mut bool,
215    ) -> bool {
216        let mut brace_depth = 0;
217        let mut in_brackets = false;
218        let mut has_closing_brace = false;
219        let mut matched = false;
220
221        let open_brace_index = self.glob_index;
222
223        let mut branch_index = 0;
224
225        while self.glob_index < glob.len() {
226            match glob[self.glob_index] {
227                b'{' if !in_brackets => {
228                    brace_depth += 1;
229                    if brace_depth == 1 {
230                        branch_index = self.glob_index + 1;
231                    }
232                }
233                b'}' if !in_brackets => {
234                    brace_depth -= 1;
235                    if brace_depth == 0 {
236                        has_closing_brace = true;
237                        if self.match_brace_branch(
238                            glob,
239                            path,
240                            open_brace_index,
241                            branch_index,
242                            brace_stack,
243                            invalid_pattern,
244                        ) {
245                            matched = true;
246                        }
247                        break;
248                    }
249                }
250                b',' if brace_depth == 1 => {
251                    if self.match_brace_branch(
252                        glob,
253                        path,
254                        open_brace_index,
255                        branch_index,
256                        brace_stack,
257                        invalid_pattern,
258                    ) {
259                        matched = true;
260                    }
261                    branch_index = self.glob_index + 1;
262                }
263                b'[' if !in_brackets => in_brackets = true,
264                b']' => in_brackets = false,
265                b'\\' => self.glob_index += 1,
266                _ => (),
267            }
268            self.glob_index += 1;
269        }
270
271        if !has_closing_brace {
272            *invalid_pattern = true;
273            return false;
274        }
275
276        matched
277    }
278
279    #[inline(always)]
280    fn glob_match_from(
281        &mut self,
282        glob: &[u8],
283        path: &[u8],
284        match_start: usize,
285        brace_stack: &mut BraceStack,
286        invalid_pattern: &mut bool,
287    ) -> bool {
288        while self.glob_index < glob.len() || self.path_index < path.len() {
289            if self.glob_index < glob.len() {
290                match glob[self.glob_index] {
291                    b'*' => {
292                        let is_globstar =
293                            self.glob_index + 1 < glob.len() && glob[self.glob_index + 1] == b'*';
294                        if is_globstar {
295                            self.skip_globstars(glob);
296                        }
297
298                        self.wildcard.glob_index = self.glob_index as u32;
299                        self.wildcard.path_index = self.path_index as u32 + 1;
300                        self.wildcard.brace_depth = self.brace_depth as u32;
301
302                        let mut in_globstar = false;
303                        if is_globstar {
304                            self.glob_index += 2;
305
306                            let is_end_invalid = self.glob_index != glob.len();
307
308                            if (self.glob_index.saturating_sub(match_start) < 3
309                                || glob[self.glob_index - 3] == b'/')
310                                && (!is_end_invalid || glob[self.glob_index] == b'/')
311                            {
312                                if is_end_invalid {
313                                    self.glob_index += 1;
314                                }
315
316                                self.skip_to_separator(path, is_end_invalid);
317                                in_globstar = true;
318                            }
319                        } else {
320                            self.glob_index += 1;
321                        }
322
323                        if !in_globstar
324                            && self.path_index < path.len()
325                            && is_separator(path[self.path_index] as char)
326                        {
327                            self.wildcard = self.globstar;
328                        }
329
330                        continue;
331                    }
332                    b'?' if self.path_index < path.len() => {
333                        if !is_separator(path[self.path_index] as char) {
334                            self.glob_index += 1;
335                            self.path_index += 1;
336                            continue;
337                        }
338                    }
339                    b'[' if self.path_index < path.len() => {
340                        self.glob_index += 1;
341
342                        let mut negated = false;
343                        if self.glob_index < glob.len()
344                            && matches!(glob[self.glob_index], b'^' | b'!')
345                        {
346                            negated = true;
347                            self.glob_index += 1;
348                        }
349
350                        let mut first = true;
351                        let mut is_match = false;
352                        let c = path[self.path_index];
353                        while self.glob_index < glob.len()
354                            && (first || glob[self.glob_index] != b']')
355                        {
356                            let mut low = glob[self.glob_index];
357                            if !unescape(&mut low, glob, self) {
358                                return false;
359                            }
360
361                            self.glob_index += 1;
362
363                            let high = if self.glob_index + 1 < glob.len()
364                                && glob[self.glob_index] == b'-'
365                                && glob[self.glob_index + 1] != b']'
366                            {
367                                self.glob_index += 1;
368
369                                let mut high = glob[self.glob_index];
370                                if !unescape(&mut high, glob, self) {
371                                    return false;
372                                }
373
374                                self.glob_index += 1;
375                                high
376                            } else {
377                                low
378                            };
379
380                            if low <= c && c <= high {
381                                is_match = true;
382                            }
383
384                            first = false;
385                        }
386
387                        if self.glob_index >= glob.len() {
388                            return false;
389                        }
390
391                        self.glob_index += 1;
392                        if is_match != negated {
393                            self.path_index += 1;
394                            continue;
395                        }
396                    }
397                    b'{' => {
398                        if let Some((_, branch_index)) =
399                            brace_stack.iter().find(|(open_brace_index, _)| {
400                                *open_brace_index == self.glob_index as u32
401                            })
402                        {
403                            self.glob_index = *branch_index as usize;
404                            self.brace_depth += 1;
405                            continue;
406                        }
407                        return self.match_brace(glob, path, brace_stack, invalid_pattern);
408                    }
409                    b',' | b'}' if self.brace_depth > 0 => {
410                        self.skip_branch(glob);
411                        continue;
412                    }
413                    mut c if self.path_index < path.len() => {
414                        if !unescape(&mut c, glob, self) {
415                            return false;
416                        }
417
418                        let is_match = if c == b'/' {
419                            is_separator(path[self.path_index] as char)
420                        } else {
421                            path[self.path_index] == c
422                        };
423
424                        if is_match {
425                            self.glob_index += 1;
426                            self.path_index += 1;
427
428                            if c == b'/' {
429                                self.wildcard = self.globstar;
430                            }
431
432                            continue;
433                        }
434                    }
435                    _ => {}
436                }
437            }
438
439            if self.wildcard.path_index > 0 && self.wildcard.path_index <= path.len() as u32 {
440                self.backtrack();
441                continue;
442            }
443
444            return false;
445        }
446
447        true
448    }
449}