Skip to main content

sley_pathspec/
lib.rs

1//! Shared pathspec primitive for sley.
2//!
3//! This crate owns the byte-faithful port of git's wildmatch engine
4//! (`wildmatch.c::dowild`) and the single-item pathspec matcher
5//! (`match_pathspec_item`), plus a [`Pathspec`] type that parses git's
6//! pathspec *magic* prefixes (`:(exclude)`, `:(icase)`, `:(literal)`,
7//! `:(glob)`, `:(top)`, `:(attr:...)`, and the shorthand `:!`/`:^`/`:/`).
8//!
9//! Four clusters consume this primitive: the rev-walk (`sley-rev`), diff,
10//! the worktree walker (`sley-worktree`, which re-exports the engine for its
11//! `ls-files` path), and the CLI. Keeping the wildmatch port and the magic
12//! parser in one low-level crate (depending only on `sley-core`) means there
13//! is exactly one implementation of git's matching semantics to keep in sync
14//! with the 2.54 oracle.
15//!
16//! STAGE-A scope: parsing + per-path `matches`. The TREESAME / history
17//! simplification that *consumes* a `Pathspec` to prune the rev-walk is
18//! STAGE-B; this crate only provides the matching primitive that stage will
19//! drive.
20
21/// A parsed pathspec element: a single pattern plus its magic flags.
22///
23/// Mirrors git's `struct pathspec_item` for the subset sley needs today.
24/// Construct with [`PathspecElement::parse`]; query with
25/// [`PathspecElement::matches`].
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct PathspecElement {
28    /// The match pattern with any magic prefix stripped (git's `item.match`).
29    pattern: Vec<u8>,
30    /// `:(exclude)` / `:!` / `:^` — this element subtracts from the set.
31    exclude: bool,
32    /// `:(icase)` — case-insensitive matching.
33    icase: bool,
34    /// `:(literal)` — wildcards are matched literally (no globbing).
35    literal: bool,
36    /// `:(glob)` — pathname-aware globbing (`**` required to cross `/`).
37    glob: bool,
38    /// `:(top)` / `:/` — match from the repository root (sley already matches
39    /// repo-relative paths from the root, so this is parsed and surfaced but
40    /// does not change single-path matching; it affects prefix handling that
41    /// the consuming cluster applies).
42    top: bool,
43    /// `:(attr:...)` attribute requirements, stored verbatim. Attribute-based
44    /// selection is not yet evaluated (STAGE-B+); the labels are retained so a
45    /// pathspec carrying them round-trips and the consumer can reject/honor
46    /// them explicitly rather than silently dropping them.
47    attrs: Vec<Vec<u8>>,
48}
49
50impl PathspecElement {
51    /// Parse one pathspec argument, honoring git's magic prefixes.
52    ///
53    /// Recognizes both the long form `:(magic1,magic2,...)pattern` and the
54    /// shorthand sigils `:!`/`:^` (exclude), `:/` (top). Defaults
55    /// (`literal`/`glob`/`icase` from the global `--*-pathspecs` flags) are
56    /// supplied via `defaults` and overridden per-element by any explicit
57    /// magic. Unknown long-form magic words are an error, matching git.
58    pub fn parse(arg: &[u8], defaults: PathspecMatchMagic) -> Result<Self, PathspecParseError> {
59        let mut exclude = false;
60        let mut icase = defaults.icase;
61        let mut literal = defaults.literal;
62        let mut glob = defaults.glob;
63        let mut top = false;
64        let mut attrs: Vec<Vec<u8>> = Vec::new();
65
66        let rest = if let Some(after) = arg.strip_prefix(b":(") {
67            // Long form: :(magic[,magic...])pattern
68            let close = after
69                .iter()
70                .position(|&c| c == b')')
71                .ok_or(PathspecParseError::UnterminatedMagic)?;
72            let magic = &after[..close];
73            for word in split_magic(magic) {
74                match word.as_slice() {
75                    b"exclude" => exclude = true,
76                    b"icase" => icase = true,
77                    b"literal" => literal = true,
78                    b"glob" => glob = true,
79                    b"top" => top = true,
80                    other => {
81                        if let Some(attr) = other.strip_prefix(b"attr:") {
82                            attrs.push(attr.to_vec());
83                        } else if other.is_empty() {
84                            // Empty magic word (e.g. trailing comma) — ignore,
85                            // matching git's lenient split.
86                        } else {
87                            return Err(PathspecParseError::UnknownMagic(other.to_vec()));
88                        }
89                    }
90                }
91            }
92            &after[close + 1..]
93        } else if let Some(after) = arg.strip_prefix(b":") {
94            // Shorthand sigils. git consumes a run of leading sigils.
95            let mut idx = 0;
96            while idx < after.len() {
97                match after[idx] {
98                    b'!' | b'^' => exclude = true,
99                    b'/' => top = true,
100                    _ => break,
101                }
102                idx += 1;
103            }
104            &after[idx..]
105        } else {
106            arg
107        };
108
109        // `:(glob)` and `:(literal)` are mutually exclusive in git.
110        if glob && literal {
111            return Err(PathspecParseError::GlobLiteralConflict);
112        }
113
114        Ok(PathspecElement {
115            pattern: rest.to_vec(),
116            exclude,
117            icase,
118            literal,
119            glob,
120            top,
121            attrs,
122        })
123    }
124
125    /// Whether this element is an `:(exclude)` element.
126    pub fn is_exclude(&self) -> bool {
127        self.exclude
128    }
129
130    /// Whether this element carries `:(top)` / `:/` magic.
131    pub fn is_top(&self) -> bool {
132        self.top
133    }
134
135    /// The attribute requirements carried by `:(attr:...)`, if any.
136    pub fn attrs(&self) -> &[Vec<u8>] {
137        &self.attrs
138    }
139
140    /// Whether this element carries case-insensitive matching.
141    pub fn is_icase(&self) -> bool {
142        self.icase
143    }
144
145    /// Whether this element carries glob magic.
146    pub fn is_glob(&self) -> bool {
147        self.glob
148    }
149
150    /// The bare match pattern (magic prefix stripped).
151    pub fn pattern(&self) -> &[u8] {
152        &self.pattern
153    }
154
155    /// The [`PathspecMatchMagic`] this element matches under.
156    fn magic(&self) -> PathspecMatchMagic {
157        PathspecMatchMagic {
158            literal: self.literal,
159            glob: self.glob,
160            icase: self.icase,
161        }
162    }
163
164    /// Whether `name` (a repo-relative path, no leading slash) is selected by
165    /// this single element, ignoring its exclude polarity. Use
166    /// [`Pathspec::matches`] for the combined include/exclude semantics.
167    pub fn matches_path(&self, name: &[u8]) -> bool {
168        pathspec_item_matches(&self.pattern, name, self.magic())
169    }
170}
171
172/// A full pathspec: an ordered list of [`PathspecElement`]s combining positive
173/// (include) and `:(exclude)` patterns.
174///
175/// Semantics (git `match_pathspec`): a path matches when at least one
176/// non-exclude element selects it AND no exclude element selects it. An
177/// all-exclude (or empty) pathspec matches everything not excluded — matching
178/// git, where `git log -- ':(exclude)foo'` keeps every path but `foo`.
179#[derive(Debug, Clone, Default, PartialEq, Eq)]
180pub struct Pathspec {
181    elements: Vec<PathspecElement>,
182}
183
184impl Pathspec {
185    /// Parse a list of raw pathspec arguments under the given global magic
186    /// defaults (from `--{glob,noglob,literal,icase}-pathspecs`).
187    pub fn parse<I, S>(args: I, defaults: PathspecMatchMagic) -> Result<Self, PathspecParseError>
188    where
189        I: IntoIterator<Item = S>,
190        S: AsRef<[u8]>,
191    {
192        let mut elements = Vec::new();
193        for arg in args {
194            elements.push(PathspecElement::parse(arg.as_ref(), defaults)?);
195        }
196        Ok(Pathspec { elements })
197    }
198
199    /// An empty pathspec matches every path.
200    pub fn is_empty(&self) -> bool {
201        self.elements.is_empty()
202    }
203
204    /// The parsed elements, in order.
205    pub fn elements(&self) -> &[PathspecElement] {
206        &self.elements
207    }
208
209    /// Whether `path` (repo-relative, no leading slash) is selected.
210    ///
211    /// An empty pathspec, or one with only excludes, matches any path the
212    /// excludes don't subtract — exactly git's `match_pathspec` behavior.
213    pub fn matches(&self, path: &[u8]) -> bool {
214        if self.elements.is_empty() {
215            return true;
216        }
217        let mut have_include = false;
218        let mut included = false;
219        for element in &self.elements {
220            if element.exclude {
221                if element.matches_path(path) {
222                    return false;
223                }
224            } else {
225                have_include = true;
226                if element.matches_path(path) {
227                    included = true;
228                }
229            }
230        }
231        // With at least one include, the path must hit one of them. With only
232        // excludes, anything not excluded is kept.
233        if have_include { included } else { true }
234    }
235}
236
237/// Split a `:(...)` magic body on commas (git's `parse_long_magic` separator).
238fn split_magic(body: &[u8]) -> Vec<Vec<u8>> {
239    body.split(|&c| c == b',').map(|w| w.to_vec()).collect()
240}
241
242/// Error parsing a pathspec magic prefix.
243#[derive(Debug, Clone, PartialEq, Eq)]
244pub enum PathspecParseError {
245    /// A `:(` was not closed by a `)`.
246    UnterminatedMagic,
247    /// A long-form magic word git does not recognize.
248    UnknownMagic(Vec<u8>),
249    /// `:(glob)` and `:(literal)` were both requested.
250    GlobLiteralConflict,
251}
252
253impl core::fmt::Display for PathspecParseError {
254    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
255        match self {
256            PathspecParseError::UnterminatedMagic => {
257                write!(f, "Missing ')' at end of pathspec magic")
258            }
259            PathspecParseError::UnknownMagic(word) => {
260                write!(
261                    f,
262                    "Invalid pathspec magic '{}'",
263                    String::from_utf8_lossy(word)
264                )
265            }
266            PathspecParseError::GlobLiteralConflict => {
267                write!(f, "'literal' and 'glob' are incompatible")
268            }
269        }
270    }
271}
272
273impl std::error::Error for PathspecParseError {}
274
275/// Pathspec match magic, mirroring git's `PATHSPEC_LITERAL`/`PATHSPEC_GLOB`/
276/// `PATHSPEC_ICASE`. Constructed from the global `--{glob,noglob,icase,literal}-pathspecs`
277/// options. Drives [`pathspec_item_matches`].
278#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
279pub struct PathspecMatchMagic {
280    pub literal: bool,
281    pub glob: bool,
282    pub icase: bool,
283}
284
285/// git `is_glob_special`: characters that make a pathspec a wildcard.
286fn is_glob_special(c: u8) -> bool {
287    matches!(c, b'*' | b'?' | b'[' | b'\\')
288}
289
290/// git `simple_length`: length of the literal prefix before the first glob-special
291/// character (or end of string).
292fn simple_length(s: &[u8]) -> usize {
293    for (i, &c) in s.iter().enumerate() {
294        if is_glob_special(c) {
295            return i;
296        }
297    }
298    s.len()
299}
300
301/// Case-aware byte comparison up to `n` bytes, honoring `icase` (git `ps_strncmp`).
302fn ps_strncmp(icase: bool, a: &[u8], b: &[u8], n: usize) -> bool {
303    // Returns true when the first `n` bytes are EQUAL (mirrors `!strncmp`).
304    let a = &a[..a.len().min(n)];
305    let b = &b[..b.len().min(n)];
306    if a.len() < n && b.len() < n && a.len() != b.len() {
307        return false;
308    }
309    let len = n.min(a.len()).min(b.len());
310    for i in 0..len {
311        let (mut ca, mut cb) = (a[i], b[i]);
312        if icase {
313            ca = ca.to_ascii_lowercase();
314            cb = cb.to_ascii_lowercase();
315        }
316        if ca != cb {
317            return false;
318        }
319    }
320    true
321}
322
323/// True if `path` contains a glob-special character.
324pub fn pathspec_is_glob(path: &[u8]) -> bool {
325    path.iter().any(|byte| matches!(byte, b'*' | b'?' | b'['))
326}
327
328/// Port of git's `match_pathspec_item` for the single-pathspec / single-name case
329/// (no prefix, no attr magic). `match_` is the pathspec, `name` is the candidate
330/// path. Returns whether the pathspec selects `name` (exactly, as a directory
331/// prefix, or via wildmatch). Byte-for-byte faithful to git 2.54 for the
332/// `ls-files -- <pathspec>` path that t3070 exercises.
333pub fn pathspec_item_matches(match_: &[u8], name: &[u8], magic: PathspecMatchMagic) -> bool {
334    let icase = magic.icase;
335    let matchlen = match_.len();
336    let namelen = name.len();
337
338    // nowildcard_len: with LITERAL magic the whole pattern is literal.
339    let nowildcard_len = if magic.literal {
340        matchlen
341    } else {
342        simple_length(match_)
343    };
344
345    // Empty pathspec matches everything (git: `if (!*match) return MATCHED_RECURSIVELY`).
346    if matchlen == 0 {
347        return true;
348    }
349
350    // Literal-prefix comparison.
351    if matchlen <= namelen && ps_strncmp(icase, match_, name, matchlen) {
352        if matchlen == namelen {
353            return true; // MATCHED_EXACTLY
354        }
355        if match_[matchlen - 1] == b'/' || name[matchlen] == b'/' {
356            return true; // MATCHED_RECURSIVELY
357        }
358    } else if match_[matchlen - 1] == b'/'
359        && namelen == matchlen - 1
360        && ps_strncmp(icase, match_, name, namelen)
361    {
362        // DO_MATCH_DIRECTORY case: pathspec `foo/` vs name `foo`.
363        return true;
364    }
365
366    // Wildcard match — git `git_fnmatch(item, match, name, nowildcard_len)`.
367    if nowildcard_len < matchlen {
368        // git strips the literal prefix off BOTH pattern and name before running
369        // wildmatch (so `foo**` vs `foo/bba/arr` becomes `**` vs `/bba/arr`).
370        if nowildcard_len > 0 && !ps_strncmp(icase, match_, name, nowildcard_len) {
371            return false;
372        }
373        let pat = &match_[nowildcard_len..];
374        if name.len() < nowildcard_len {
375            return false;
376        }
377        let str_ = &name[nowildcard_len..];
378
379        let flags = if magic.glob && !magic.literal {
380            WM_PATHNAME | if icase { WM_CASEFOLD } else { 0 }
381        } else {
382            // Default pathspec (no glob magic): pathmatch semantics.
383            if icase { WM_CASEFOLD } else { 0 }
384        };
385        if wildmatch(pat, str_, flags) {
386            return true;
387        }
388    }
389
390    false
391}
392
393/// Case-insensitive match flag (git `WM_CASEFOLD`).
394pub const WM_CASEFOLD: u32 = 1;
395/// Pathname-aware match flag (git `WM_PATHNAME`): `*`/`?` do not cross `/`,
396/// `**` is required to span directory separators.
397pub const WM_PATHNAME: u32 = 2;
398
399const WM_MATCH: i32 = 0;
400const WM_NOMATCH: i32 = 1;
401const WM_ABORT_ALL: i32 = -1;
402const WM_ABORT_TO_STARSTAR: i32 = -2;
403
404#[inline]
405fn wm_isascii(c: u8) -> bool {
406    c < 0x80
407}
408#[inline]
409fn wm_isupper(c: u8) -> bool {
410    wm_isascii(c) && c.is_ascii_uppercase()
411}
412#[inline]
413fn wm_islower(c: u8) -> bool {
414    wm_isascii(c) && c.is_ascii_lowercase()
415}
416#[inline]
417fn wm_tolower(c: u8) -> u8 {
418    c.to_ascii_lowercase()
419}
420#[inline]
421fn wm_toupper(c: u8) -> u8 {
422    c.to_ascii_uppercase()
423}
424#[inline]
425fn wm_is_glob_special(c: u8) -> bool {
426    matches!(c, b'*' | b'?' | b'[' | b'\\')
427}
428
429fn wm_cc_eq(class: &[u8], lit: &[u8]) -> bool {
430    class == lit
431}
432
433fn wm_class_matches(class: &[u8], t_ch: u8, flags: u32) -> Option<bool> {
434    // Returns Some(matched) for a recognized class, or None for a malformed
435    // class name (caller maps to WM_ABORT_ALL).
436    let m = if wm_cc_eq(class, b"alnum") {
437        wm_isascii(t_ch) && t_ch.is_ascii_alphanumeric()
438    } else if wm_cc_eq(class, b"alpha") {
439        wm_isascii(t_ch) && t_ch.is_ascii_alphabetic()
440    } else if wm_cc_eq(class, b"blank") {
441        wm_isascii(t_ch) && (t_ch == b' ' || t_ch == b'\t')
442    } else if wm_cc_eq(class, b"cntrl") {
443        wm_isascii(t_ch) && t_ch.is_ascii_control()
444    } else if wm_cc_eq(class, b"digit") {
445        wm_isascii(t_ch) && t_ch.is_ascii_digit()
446    } else if wm_cc_eq(class, b"graph") {
447        wm_isascii(t_ch) && t_ch.is_ascii_graphic()
448    } else if wm_cc_eq(class, b"lower") {
449        wm_islower(t_ch)
450    } else if wm_cc_eq(class, b"print") {
451        // ISPRINT: printable including space (0x20..=0x7e).
452        wm_isascii(t_ch) && (0x20..=0x7e).contains(&t_ch)
453    } else if wm_cc_eq(class, b"punct") {
454        wm_isascii(t_ch) && t_ch.is_ascii_punctuation()
455    } else if wm_cc_eq(class, b"space") {
456        wm_isascii(t_ch) && t_ch.is_ascii_whitespace()
457    } else if wm_cc_eq(class, b"upper") {
458        wm_isupper(t_ch) || ((flags & WM_CASEFOLD) != 0 && wm_islower(t_ch))
459    } else if wm_cc_eq(class, b"xdigit") {
460        wm_isascii(t_ch) && t_ch.is_ascii_hexdigit()
461    } else {
462        return None;
463    };
464    Some(m)
465}
466
467/// Faithful port of git's `wildmatch.c::dowild`. Returns one of the internal
468/// `WM_*` codes (`WM_MATCH`, `WM_NOMATCH`, `WM_ABORT_ALL`, `WM_ABORT_TO_STARSTAR`).
469fn dowild(pattern: &[u8], text: &[u8], flags: u32) -> i32 {
470    let p = pattern;
471    let mut pi = 0usize;
472    let mut ti = 0usize;
473
474    while pi < p.len() {
475        let mut p_ch = p[pi];
476        let t_ch_raw = if ti < text.len() { text[ti] } else { 0 };
477        let mut t_ch = t_ch_raw;
478
479        if t_ch == 0 && p_ch != b'*' {
480            return WM_ABORT_ALL;
481        }
482        if (flags & WM_CASEFOLD) != 0 && wm_isupper(t_ch) {
483            t_ch = wm_tolower(t_ch);
484        }
485        if (flags & WM_CASEFOLD) != 0 && wm_isupper(p_ch) {
486            p_ch = wm_tolower(p_ch);
487        }
488
489        match p_ch {
490            b'?' => {
491                if (flags & WM_PATHNAME) != 0 && t_ch == b'/' {
492                    return WM_NOMATCH;
493                }
494                // fallthrough: advance both
495                pi += 1;
496                ti += 1;
497                continue;
498            }
499            b'*' => {
500                pi += 1;
501                let match_slash: bool;
502                if pi < p.len() && p[pi] == b'*' {
503                    let prev_p = pi; // index of the second '*'
504                    while pi < p.len() && p[pi] == b'*' {
505                        pi += 1;
506                    }
507                    if (flags & WM_PATHNAME) == 0 {
508                        match_slash = true;
509                    } else if (prev_p < 2 || p[prev_p - 2] == b'/')
510                        && (pi == p.len()
511                            || p[pi] == b'/'
512                            || (p[pi] == b'\\' && pi + 1 < p.len() && p[pi + 1] == b'/'))
513                    {
514                        if pi < p.len()
515                            && p[pi] == b'/'
516                            && dowild(&p[pi + 1..], &text[ti..], flags) == WM_MATCH
517                        {
518                            return WM_MATCH;
519                        }
520                        match_slash = true;
521                    } else {
522                        match_slash = false;
523                    }
524                } else {
525                    match_slash = (flags & WM_PATHNAME) == 0;
526                }
527
528                if pi == p.len() {
529                    // Trailing "**" matches everything; trailing "*" matches only
530                    // if there are no more slashes.
531                    if !match_slash && text[ti..].contains(&b'/') {
532                        return WM_ABORT_TO_STARSTAR;
533                    }
534                    return WM_MATCH;
535                } else if !match_slash && p[pi] == b'/' {
536                    // _one_ asterisk followed by a slash with WM_PATHNAME matches
537                    // the next directory.
538                    match text[ti..].iter().position(|&c| c == b'/') {
539                        None => return WM_ABORT_ALL,
540                        Some(off) => {
541                            ti += off; // point at the slash; consumed by loop end
542                        }
543                    }
544                    // emulate `break` then the for-loop's `text++; p++` increment:
545                    pi += 1;
546                    ti += 1;
547                    continue;
548                }
549
550                // The matching loop.
551                let mut cur_t = ti;
552                loop {
553                    let mut tc = if cur_t < text.len() { text[cur_t] } else { 0 };
554                    if tc == 0 {
555                        break;
556                    }
557                    if !wm_is_glob_special(p[pi]) {
558                        let mut pc = p[pi];
559                        if (flags & WM_CASEFOLD) != 0 && wm_isupper(pc) {
560                            pc = wm_tolower(pc);
561                        }
562                        loop {
563                            tc = if cur_t < text.len() { text[cur_t] } else { 0 };
564                            if tc == 0 {
565                                break;
566                            }
567                            if !(match_slash || tc != b'/') {
568                                break;
569                            }
570                            let mut tcf = tc;
571                            if (flags & WM_CASEFOLD) != 0 && wm_isupper(tcf) {
572                                tcf = wm_tolower(tcf);
573                            }
574                            if tcf == pc {
575                                break;
576                            }
577                            cur_t += 1;
578                        }
579                        // Recompute the casefolded tc for the comparison below.
580                        let tc_cmp = {
581                            let raw = if cur_t < text.len() { text[cur_t] } else { 0 };
582                            if (flags & WM_CASEFOLD) != 0 && wm_isupper(raw) {
583                                wm_tolower(raw)
584                            } else {
585                                raw
586                            }
587                        };
588                        if tc_cmp != pc {
589                            if match_slash {
590                                return WM_ABORT_ALL;
591                            } else {
592                                return WM_ABORT_TO_STARSTAR;
593                            }
594                        }
595                    }
596                    let matched = dowild(&p[pi..], &text[cur_t..], flags);
597                    if matched != WM_NOMATCH {
598                        if !match_slash || matched != WM_ABORT_TO_STARSTAR {
599                            return matched;
600                        }
601                    } else {
602                        let cur_raw = if cur_t < text.len() { text[cur_t] } else { 0 };
603                        if !match_slash && cur_raw == b'/' {
604                            return WM_ABORT_TO_STARSTAR;
605                        }
606                    }
607                    cur_t += 1;
608                }
609                return WM_ABORT_ALL;
610            }
611            b'[' => {
612                pi += 1;
613                let mut p_ch2 = if pi < p.len() { p[pi] } else { 0 };
614                if p_ch2 == b'^' {
615                    p_ch2 = b'!';
616                }
617                let negated = p_ch2 == b'!';
618                if negated {
619                    pi += 1;
620                    p_ch2 = if pi < p.len() { p[pi] } else { 0 };
621                }
622                let mut prev_ch: u8 = 0;
623                let mut matched = false;
624                loop {
625                    if p_ch2 == 0 {
626                        return WM_ABORT_ALL;
627                    }
628                    let mut next_prev: u8 = p_ch2;
629                    let mut skip_class = false;
630                    if p_ch2 == b'\\' {
631                        pi += 1;
632                        p_ch2 = if pi < p.len() { p[pi] } else { 0 };
633                        if p_ch2 == 0 {
634                            return WM_ABORT_ALL;
635                        }
636                        if t_ch == p_ch2 {
637                            matched = true;
638                        }
639                        next_prev = p_ch2;
640                    } else if p_ch2 == b'-' && prev_ch != 0 && pi + 1 < p.len() && p[pi + 1] != b']'
641                    {
642                        pi += 1;
643                        p_ch2 = p[pi];
644                        if p_ch2 == b'\\' {
645                            pi += 1;
646                            p_ch2 = if pi < p.len() { p[pi] } else { 0 };
647                            if p_ch2 == 0 {
648                                return WM_ABORT_ALL;
649                            }
650                        }
651                        if t_ch <= p_ch2 && t_ch >= prev_ch {
652                            matched = true;
653                        } else if (flags & WM_CASEFOLD) != 0 && wm_islower(t_ch) {
654                            let t_up = wm_toupper(t_ch);
655                            if t_up <= p_ch2 && t_up >= prev_ch {
656                                matched = true;
657                            }
658                        }
659                        next_prev = 0;
660                    } else if p_ch2 == b'[' && pi + 1 < p.len() && p[pi + 1] == b':' {
661                        // [:class:]
662                        let s = pi + 2;
663                        let mut scan = s;
664                        loop {
665                            if scan >= p.len() {
666                                break;
667                            }
668                            if p[scan] == b']' {
669                                break;
670                            }
671                            scan += 1;
672                        }
673                        pi = scan;
674                        p_ch2 = if pi < p.len() { p[pi] } else { 0 };
675                        if p_ch2 == 0 {
676                            return WM_ABORT_ALL;
677                        }
678                        // i = p - s - 1 (length of class name); require trailing ':'
679                        let class_end = pi; // index of ']'
680                        if class_end < s + 1 || p[class_end - 1] != b':' {
681                            // Not a real [:class:]; treat '[' as a literal set member.
682                            pi = s.wrapping_sub(2);
683                            p_ch2 = b'[';
684                            if t_ch == p_ch2 {
685                                matched = true;
686                            }
687                            skip_class = true;
688                            next_prev = p_ch2;
689                        } else {
690                            let class = &p[s..class_end - 1];
691                            match wm_class_matches(class, t_ch, flags) {
692                                Some(true) => matched = true,
693                                Some(false) => {}
694                                None => return WM_ABORT_ALL,
695                            }
696                            next_prev = 0;
697                        }
698                    } else if t_ch == p_ch2 {
699                        matched = true;
700                    }
701
702                    let _ = skip_class;
703                    // next: advance to the next class char
704                    prev_ch = next_prev;
705                    pi += 1;
706                    p_ch2 = if pi < p.len() { p[pi] } else { 0 };
707                    if p_ch2 == b']' {
708                        break;
709                    }
710                }
711                if matched == negated || ((flags & WM_PATHNAME) != 0 && t_ch == b'/') {
712                    return WM_NOMATCH;
713                }
714                pi += 1;
715                ti += 1;
716                continue;
717            }
718            b'\\' => {
719                // Literal match with the following character. p[pi+1]=='\0'
720                // failure is handled by the default arm below.
721                pi += 1;
722                let lit = if pi < p.len() { p[pi] } else { 0 };
723                let lit = if (flags & WM_CASEFOLD) != 0 && wm_isupper(lit) {
724                    wm_tolower(lit)
725                } else {
726                    lit
727                };
728                if t_ch != lit {
729                    return WM_NOMATCH;
730                }
731                pi += 1;
732                ti += 1;
733                continue;
734            }
735            _ => {
736                if t_ch != p_ch {
737                    return WM_NOMATCH;
738                }
739                pi += 1;
740                ti += 1;
741                continue;
742            }
743        }
744    }
745
746    if ti < text.len() && text[ti] != 0 {
747        WM_NOMATCH
748    } else {
749        WM_MATCH
750    }
751}
752
753/// Match `pattern` against `text` with git's `wildmatch` semantics.
754/// `flags` is a bitwise-OR of [`WM_CASEFOLD`] and [`WM_PATHNAME`].
755pub fn wildmatch(pattern: &[u8], text: &[u8], flags: u32) -> bool {
756    dowild(pattern, text, flags) == WM_MATCH
757}
758
759#[cfg(test)]
760mod tests {
761    use super::*;
762
763    fn ps(args: &[&str]) -> Pathspec {
764        Pathspec::parse(
765            args.iter().map(|s| s.as_bytes()),
766            PathspecMatchMagic::default(),
767        )
768        .expect("valid pathspec")
769    }
770
771    #[test]
772    fn empty_pathspec_matches_everything() {
773        let p = Pathspec::default();
774        assert!(p.is_empty());
775        assert!(p.matches(b"any/path"));
776    }
777
778    #[test]
779    fn literal_prefix_matches_directory_recursively() {
780        let p = ps(&["src"]);
781        assert!(p.matches(b"src"));
782        assert!(p.matches(b"src/lib.rs"));
783        assert!(!p.matches(b"srcs/lib.rs"));
784        assert!(!p.matches(b"other"));
785    }
786
787    #[test]
788    fn exclude_subtracts_from_includes() {
789        let p = ps(&["src", ":(exclude)src/gen"]);
790        assert!(p.matches(b"src/lib.rs"));
791        assert!(!p.matches(b"src/gen/x.rs"));
792    }
793
794    #[test]
795    fn exclude_shorthand_sigils() {
796        for spec in [":!foo", ":^foo"] {
797            let p = ps(&[spec]);
798            assert!(p.elements()[0].is_exclude());
799            // exclude-only pathspec keeps everything but the excluded path.
800            assert!(p.matches(b"bar"));
801            assert!(!p.matches(b"foo"));
802        }
803    }
804
805    #[test]
806    fn icase_magic_folds_case() {
807        let p = ps(&[":(icase)readme"]);
808        assert!(p.matches(b"README"));
809        assert!(p.matches(b"readme"));
810        let plain = ps(&["readme"]);
811        assert!(!plain.matches(b"README"));
812    }
813
814    #[test]
815    fn glob_magic_is_pathname_aware() {
816        // :(glob)*.rs uses WM_PATHNAME so `*` does not cross `/`.
817        let p = ps(&[":(glob)*.rs"]);
818        assert!(p.matches(b"lib.rs"));
819        assert!(!p.matches(b"src/lib.rs"));
820        // ** spans directories under glob magic.
821        let pp = ps(&[":(glob)**/*.rs"]);
822        assert!(pp.matches(b"src/lib.rs"));
823    }
824
825    #[test]
826    fn literal_magic_disables_wildcards() {
827        let p = ps(&[":(literal)a*b"]);
828        assert!(p.matches(b"a*b"));
829        assert!(!p.matches(b"axxb"));
830    }
831
832    #[test]
833    fn top_magic_is_parsed() {
834        let p = ps(&[":(top)src", ":/other"]);
835        assert!(p.elements()[0].is_top());
836        assert!(p.elements()[1].is_top());
837    }
838
839    #[test]
840    fn attr_magic_is_retained() {
841        let p = ps(&[":(attr:binary)data"]);
842        assert_eq!(p.elements()[0].attrs(), &[b"binary".to_vec()]);
843        assert_eq!(p.elements()[0].pattern(), b"data");
844    }
845
846    #[test]
847    fn combined_magic_words() {
848        let p = ps(&[":(exclude,icase)Cargo.lock"]);
849        let el = &p.elements()[0];
850        assert!(el.is_exclude());
851        // exclude is case-insensitive: CARGO.LOCK is subtracted too.
852        assert!(!p.matches(b"CARGO.LOCK"));
853    }
854
855    fn parse_err(arg: &[u8]) -> PathspecParseError {
856        match Pathspec::parse([arg], PathspecMatchMagic::default()) {
857            Ok(_) => panic!(
858                "expected parse error for {:?}",
859                String::from_utf8_lossy(arg)
860            ),
861            Err(e) => e,
862        }
863    }
864
865    #[test]
866    fn glob_literal_conflict_is_error() {
867        assert_eq!(
868            parse_err(b":(glob,literal)x"),
869            PathspecParseError::GlobLiteralConflict
870        );
871    }
872
873    #[test]
874    fn unknown_magic_is_error() {
875        assert!(matches!(
876            parse_err(b":(bogus)x"),
877            PathspecParseError::UnknownMagic(_)
878        ));
879    }
880
881    #[test]
882    fn unterminated_magic_is_error() {
883        assert_eq!(
884            parse_err(b":(exclude"),
885            PathspecParseError::UnterminatedMagic
886        );
887    }
888
889    #[test]
890    fn exclude_only_keeps_unmatched() {
891        let p = ps(&[":(exclude)target"]);
892        assert!(p.matches(b"src/lib.rs"));
893        assert!(!p.matches(b"target/debug"));
894    }
895}