Skip to main content

sley_pathspec/
lib.rs

1//! Shared pathspec primitive for sley.
2//!
3//! This crate owns the byte-faithful port of git's wildmatch engine
4//! (`wildmatch.c::dowild`) and the single-item pathspec matcher
5//! (`match_pathspec_item`), plus a [`Pathspec`] type that parses git's
6//! pathspec *magic* prefixes (`:(exclude)`, `:(icase)`, `:(literal)`,
7//! `:(glob)`, `:(top)`, `:(attr:...)`, and the shorthand `:!`/`:^`/`:/`).
8//!
9//! Four clusters consume this primitive: the rev-walk (`sley-rev`), diff,
10//! the worktree walker (`sley-worktree`, which re-exports the engine for its
11//! `ls-files` path), and the CLI. Keeping the wildmatch port and the magic
12//! parser in one low-level crate (depending only on `sley-core`) means there
13//! is exactly one implementation of git's matching semantics to keep in sync
14//! with the 2.54 oracle.
15//!
16//! STAGE-A scope: parsing + per-path `matches`. The TREESAME / history
17//! simplification that *consumes* a `Pathspec` to prune the rev-walk is
18//! STAGE-B; this crate only provides the matching primitive that stage will
19//! drive.
20
21use sley_core::GitError;
22use std::cell::Cell;
23use std::fs;
24use std::path::Path;
25
26/// A parsed pathspec element: a single pattern plus its magic flags.
27///
28/// Mirrors git's `struct pathspec_item` for the subset sley needs today.
29/// Construct with [`PathspecElement::parse`]; query with
30/// [`PathspecElement::matches`].
31#[derive(Debug, Clone, PartialEq, Eq)]
32pub struct PathspecElement {
33    /// The match pattern with any magic prefix stripped (git's `item.match`).
34    pattern: Vec<u8>,
35    /// `:(exclude)` / `:!` / `:^` — this element subtracts from the set.
36    exclude: bool,
37    /// `:(icase)` — case-insensitive matching.
38    icase: bool,
39    /// `:(literal)` — wildcards are matched literally (no globbing).
40    literal: bool,
41    /// `:(glob)` — pathname-aware globbing (`**` required to cross `/`).
42    glob: bool,
43    /// `:(top)` / `:/` — match from the repository root (sley already matches
44    /// repo-relative paths from the root, so this is parsed and surfaced but
45    /// does not change single-path matching; it affects prefix handling that
46    /// the consuming cluster applies).
47    top: bool,
48    /// `:(attr:...)` attribute requirements, stored verbatim. Attribute-based
49    /// selection is not yet evaluated (STAGE-B+); the labels are retained so a
50    /// pathspec carrying them round-trips and the consumer can reject/honor
51    /// them explicitly rather than silently dropping them.
52    attrs: Vec<Vec<u8>>,
53    attr_requirements: Vec<PathspecAttrRequirement>,
54}
55
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub enum PathspecAttrRequirement {
58    Set(Vec<u8>),
59    Unset(Vec<u8>),
60    Unspecified(Vec<u8>),
61    Value { name: Vec<u8>, value: Vec<u8> },
62}
63
64impl PathspecElement {
65    /// Parse one pathspec argument, honoring git's magic prefixes.
66    ///
67    /// Recognizes both the long form `:(magic1,magic2,...)pattern` and the
68    /// shorthand sigils `:!`/`:^` (exclude), `:/` (top). Defaults
69    /// (`literal`/`glob`/`icase` from the global `--*-pathspecs` flags) are
70    /// supplied via `defaults` and overridden per-element by any explicit
71    /// magic. Unknown long-form magic words are an error, matching git.
72    pub fn parse(arg: &[u8], defaults: PathspecMatchMagic) -> Result<Self, PathspecParseError> {
73        let mut exclude = false;
74        let mut icase = defaults.icase;
75        let mut literal = defaults.literal;
76        let mut glob = defaults.glob;
77        let mut top = false;
78        let mut attrs: Vec<Vec<u8>> = Vec::new();
79        let mut attr_requirements: Vec<PathspecAttrRequirement> = Vec::new();
80
81        let rest = if let Some(after) = arg.strip_prefix(b":(") {
82            // Long form: :(magic[,magic...])pattern
83            let close = after
84                .iter()
85                .position(|&c| c == b')')
86                .ok_or(PathspecParseError::UnterminatedMagic)?;
87            let magic = &after[..close];
88            for word in split_magic(magic) {
89                match word.as_slice() {
90                    b"exclude" => exclude = true,
91                    b"icase" => icase = true,
92                    b"literal" => literal = true,
93                    b"glob" => glob = true,
94                    b"top" => top = true,
95                    other => {
96                        if let Some(attr) = other.strip_prefix(b"attr:") {
97                            if !attrs.is_empty() {
98                                return Err(PathspecParseError::MultipleAttrMagic);
99                            }
100                            attrs.push(attr.to_vec());
101                            attr_requirements = parse_attr_requirements(attr)?;
102                        } else if other.is_empty() {
103                            // Empty magic word (e.g. trailing comma) — ignore,
104                            // matching git's lenient split.
105                        } else {
106                            return Err(PathspecParseError::UnknownMagic(other.to_vec()));
107                        }
108                    }
109                }
110            }
111            &after[close + 1..]
112        } else if let Some(after) = arg.strip_prefix(b":") {
113            // Shorthand sigils. git consumes a run of leading sigils.
114            let mut idx = 0;
115            while idx < after.len() {
116                match after[idx] {
117                    b'!' | b'^' => exclude = true,
118                    b'/' => top = true,
119                    _ => break,
120                }
121                idx += 1;
122            }
123            &after[idx..]
124        } else {
125            arg
126        };
127
128        // `:(glob)` and `:(literal)` are mutually exclusive in git.
129        if glob && literal {
130            return Err(PathspecParseError::GlobLiteralConflict);
131        }
132
133        Ok(PathspecElement {
134            pattern: rest.to_vec(),
135            exclude,
136            icase,
137            literal,
138            glob,
139            top,
140            attrs,
141            attr_requirements,
142        })
143    }
144
145    /// Whether this element is an `:(exclude)` element.
146    pub fn is_exclude(&self) -> bool {
147        self.exclude
148    }
149
150    /// Whether this element carries `:(top)` / `:/` magic.
151    pub fn is_top(&self) -> bool {
152        self.top
153    }
154
155    /// The attribute requirements carried by `:(attr:...)`, if any.
156    pub fn attrs(&self) -> &[Vec<u8>] {
157        &self.attrs
158    }
159
160    pub fn attr_requirements(&self) -> &[PathspecAttrRequirement] {
161        &self.attr_requirements
162    }
163
164    /// Whether this element carries case-insensitive matching.
165    pub fn is_icase(&self) -> bool {
166        self.icase
167    }
168
169    /// Whether this element carries glob magic.
170    pub fn is_glob(&self) -> bool {
171        self.glob
172    }
173
174    /// The bare match pattern (magic prefix stripped).
175    pub fn pattern(&self) -> &[u8] {
176        &self.pattern
177    }
178
179    /// The [`PathspecMatchMagic`] this element matches under.
180    pub fn magic(&self) -> PathspecMatchMagic {
181        PathspecMatchMagic {
182            literal: self.literal,
183            glob: self.glob,
184            icase: self.icase,
185        }
186    }
187
188    /// Whether `name` (a repo-relative path, no leading slash) is selected by
189    /// this single element, ignoring its exclude polarity. Use
190    /// [`Pathspec::matches`] for the combined include/exclude semantics.
191    pub fn matches_path(&self, name: &[u8]) -> bool {
192        pathspec_item_matches(&self.pattern, name, self.magic())
193    }
194
195    pub fn with_pattern(mut self, pattern: Vec<u8>) -> Self {
196        self.pattern = pattern;
197        self
198    }
199}
200
201/// A full pathspec: an ordered list of [`PathspecElement`]s combining positive
202/// (include) and `:(exclude)` patterns.
203///
204/// Semantics (git `match_pathspec`): a path matches when at least one
205/// non-exclude element selects it AND no exclude element selects it. An
206/// all-exclude (or empty) pathspec matches everything not excluded — matching
207/// git, where `git log -- ':(exclude)foo'` keeps every path but `foo`.
208#[derive(Debug, Clone, Default, PartialEq, Eq)]
209pub struct Pathspec {
210    elements: Vec<PathspecElement>,
211}
212
213impl Pathspec {
214    /// Parse a list of raw pathspec arguments under the given global magic
215    /// defaults (from `--{glob,noglob,literal,icase}-pathspecs`).
216    pub fn parse<I, S>(args: I, defaults: PathspecMatchMagic) -> Result<Self, PathspecParseError>
217    where
218        I: IntoIterator<Item = S>,
219        S: AsRef<[u8]>,
220    {
221        let mut elements = Vec::new();
222        for arg in args {
223            elements.push(PathspecElement::parse(arg.as_ref(), defaults)?);
224        }
225        Ok(Pathspec { elements })
226    }
227
228    pub fn from_elements(elements: Vec<PathspecElement>) -> Self {
229        Self { elements }
230    }
231
232    /// An empty pathspec matches every path.
233    pub fn is_empty(&self) -> bool {
234        self.elements.is_empty()
235    }
236
237    /// The parsed elements, in order.
238    pub fn elements(&self) -> &[PathspecElement] {
239        &self.elements
240    }
241
242    /// Whether `path` (repo-relative, no leading slash) is selected.
243    ///
244    /// An empty pathspec, or one with only excludes, matches any path the
245    /// excludes don't subtract — exactly git's `match_pathspec` behavior.
246    pub fn matches(&self, path: &[u8]) -> bool {
247        if self.elements.is_empty() {
248            return true;
249        }
250        let mut have_include = false;
251        let mut included = false;
252        for element in &self.elements {
253            if element.exclude {
254                if element.matches_path(path) {
255                    return false;
256                }
257            } else {
258                have_include = true;
259                if element.matches_path(path) {
260                    included = true;
261                }
262            }
263        }
264        // With at least one include, the path must hit one of them. With only
265        // excludes, anything not excluded is kept.
266        if have_include { included } else { true }
267    }
268}
269
270pub struct LsFilesPathFilter {
271    pub original: String,
272    pub recursive: bool,
273    pub is_glob: bool,
274    pub element: PathspecElement,
275    pub matched: Cell<bool>,
276}
277
278impl LsFilesPathFilter {
279    pub fn is_exclude(&self) -> bool {
280        self.element.is_exclude()
281    }
282
283    pub fn matches(&self, path: &[u8]) -> bool {
284        // Byte-exact git `match_pathspec_item` for the tracked-index path. Handles
285        // exact / directory-prefix / wildcard matching under the active magic.
286        let path_no_slash = path.strip_suffix(b"/").unwrap_or(path);
287        self.element.matches_path(path)
288            || (path_no_slash.len() != path.len() && self.element.matches_path(path_no_slash))
289    }
290}
291
292pub fn pathspec_filters_match(filters: &[LsFilesPathFilter], path: &[u8]) -> bool {
293    pathspec_filters_match_with(filters, path, |filter, path| filter.matches(path))
294}
295
296pub fn pathspec_filters_have_include(filters: &[LsFilesPathFilter]) -> bool {
297    filters.iter().any(|filter| !filter.is_exclude())
298}
299
300pub fn pathspec_filters_match_with(
301    filters: &[LsFilesPathFilter],
302    path: &[u8],
303    mut matches: impl FnMut(&LsFilesPathFilter, &[u8]) -> bool,
304) -> bool {
305    let mut have_include = false;
306    let mut included = false;
307    for filter in filters {
308        if filter.is_exclude() {
309            if matches(filter, path) {
310                filter.matched.set(true);
311                return false;
312            }
313        } else {
314            have_include = true;
315            if matches(filter, path) {
316                filter.matched.set(true);
317                included = true;
318            }
319        }
320    }
321    !have_include || included
322}
323
324#[derive(Debug, Clone, PartialEq, Eq)]
325pub enum PathspecAttributeState {
326    Set,
327    Unset,
328    Value(Vec<u8>),
329}
330
331#[derive(Debug, Clone, PartialEq, Eq)]
332pub struct PathspecAttributeCheck {
333    pub attribute: Vec<u8>,
334    pub state: Option<PathspecAttributeState>,
335}
336
337pub fn pathspec_attrs_match_with(
338    element: &PathspecElement,
339    checks: impl FnOnce(&[Vec<u8>]) -> Vec<PathspecAttributeCheck>,
340) -> bool {
341    let requirements = element.attr_requirements();
342    if requirements.is_empty() {
343        return true;
344    }
345    let requested = requirements
346        .iter()
347        .map(|requirement| match requirement {
348            PathspecAttrRequirement::Set(name)
349            | PathspecAttrRequirement::Unset(name)
350            | PathspecAttrRequirement::Unspecified(name) => name.clone(),
351            PathspecAttrRequirement::Value { name, .. } => name.clone(),
352        })
353        .collect::<Vec<_>>();
354    let checks = checks(&requested);
355    requirements.iter().all(|requirement| {
356        let (name, expected) = match requirement {
357            PathspecAttrRequirement::Set(name) => (name, AttrRequirementKind::Set),
358            PathspecAttrRequirement::Unset(name) => (name, AttrRequirementKind::Unset),
359            PathspecAttrRequirement::Unspecified(name) => (name, AttrRequirementKind::Unspecified),
360            PathspecAttrRequirement::Value { name, value } => {
361                (name, AttrRequirementKind::Value(value))
362            }
363        };
364        let state = checks
365            .iter()
366            .find(|check| &check.attribute == name)
367            .and_then(|check| check.state.as_ref());
368        match expected {
369            AttrRequirementKind::Set => matches!(state, Some(PathspecAttributeState::Set)),
370            AttrRequirementKind::Unset => matches!(state, Some(PathspecAttributeState::Unset)),
371            AttrRequirementKind::Unspecified => state.is_none(),
372            AttrRequirementKind::Value(value) => {
373                matches!(state, Some(PathspecAttributeState::Value(actual)) if actual == value)
374            }
375        }
376    })
377}
378
379enum AttrRequirementKind<'a> {
380    Set,
381    Unset,
382    Unspecified,
383    Value(&'a [u8]),
384}
385
386pub fn parse_normalized_pathspec_element(
387    prefix: &[u8],
388    arg: &str,
389    magic: PathspecMatchMagic,
390) -> sley_core::Result<PathspecElement> {
391    let element = PathspecElement::parse(arg.as_bytes(), magic)
392        .map_err(|err| GitError::Command(format!("bad pathspec: {err}")))?;
393    let base = if element.is_top() {
394        b"".as_slice()
395    } else {
396        prefix
397    };
398    let pattern = normalize_ls_files_pathspec(base, &String::from_utf8_lossy(element.pattern()))?;
399    Ok(element.with_pattern(pattern))
400}
401
402pub fn normalized_revwalk_pathspec(
403    cwd: &Path,
404    worktree_root: Option<&Path>,
405    pathspecs: &[String],
406    magic: PathspecMatchMagic,
407) -> sley_core::Result<Pathspec> {
408    let prefix = if let Some(root) = worktree_root {
409        let root = fs::canonicalize(root)?;
410        let cwd = fs::canonicalize(cwd)?;
411        cwd.strip_prefix(&root)
412            .map(|relative| relative.to_string_lossy().replace('\\', "/").into_bytes())
413            .unwrap_or_default()
414    } else {
415        Vec::new()
416    };
417    let elements = pathspecs
418        .iter()
419        .map(|spec| parse_normalized_pathspec_element(&prefix, spec, magic))
420        .collect::<sley_core::Result<Vec<_>>>()?;
421    Ok(Pathspec::from_elements(elements))
422}
423
424pub fn normalize_ls_files_pathspec(prefix: &[u8], arg: &str) -> sley_core::Result<Vec<u8>> {
425    let mut components = prefix
426        .split(|byte| *byte == b'/')
427        .filter(|component| !component.is_empty())
428        .map(Vec::from)
429        .collect::<Vec<_>>();
430    for component in Path::new(arg).components() {
431        match component {
432            std::path::Component::CurDir => {}
433            std::path::Component::ParentDir => {
434                components.pop().ok_or_else(|| {
435                    GitError::InvalidPath(format!("pathspec {arg} is outside worktree"))
436                })?;
437            }
438            std::path::Component::Normal(name) => {
439                components.push(name.to_string_lossy().as_bytes().to_vec());
440            }
441            std::path::Component::RootDir | std::path::Component::Prefix(_) => {
442                return Err(GitError::Unsupported(
443                    "ls-files pathspecs currently support relative paths".into(),
444                ));
445            }
446        }
447    }
448    Ok(components.join(&b'/'))
449}
450
451/// Split a `:(...)` magic body on commas (git's `parse_long_magic` separator).
452fn split_magic(body: &[u8]) -> Vec<Vec<u8>> {
453    let mut words = Vec::new();
454    let mut word = Vec::new();
455    let mut escaped = false;
456    for &byte in body {
457        if escaped {
458            word.push(byte);
459            escaped = false;
460        } else if byte == b'\\' {
461            word.push(byte);
462            escaped = true;
463        } else if byte == b',' {
464            words.push(std::mem::take(&mut word));
465        } else {
466            word.push(byte);
467        }
468    }
469    words.push(word);
470    words
471}
472
473fn parse_attr_requirements(
474    body: &[u8],
475) -> Result<Vec<PathspecAttrRequirement>, PathspecParseError> {
476    if body.is_empty() {
477        return Err(PathspecParseError::EmptyAttrMagic);
478    }
479    let mut requirements = Vec::new();
480    for raw in body.split(|byte| byte.is_ascii_whitespace()) {
481        if raw.is_empty() {
482            continue;
483        }
484        requirements.push(parse_attr_requirement(raw)?);
485    }
486    if requirements.is_empty() {
487        return Err(PathspecParseError::EmptyAttrMagic);
488    }
489    Ok(requirements)
490}
491
492fn parse_attr_requirement(raw: &[u8]) -> Result<PathspecAttrRequirement, PathspecParseError> {
493    if let Some(rest) = raw.strip_prefix(b"-") {
494        if rest.contains(&b'=') {
495            return Err(PathspecParseError::InvalidAttrSpec(raw.to_vec()));
496        }
497        validate_attr_name(rest)?;
498        return Ok(PathspecAttrRequirement::Unset(rest.to_vec()));
499    }
500    if let Some(rest) = raw.strip_prefix(b"!") {
501        if rest.contains(&b'=') {
502            return Err(PathspecParseError::InvalidAttrSpec(raw.to_vec()));
503        }
504        validate_attr_name(rest)?;
505        return Ok(PathspecAttrRequirement::Unspecified(rest.to_vec()));
506    }
507    if let Some(equal) = raw.iter().position(|byte| *byte == b'=') {
508        let name = &raw[..equal];
509        let value = unescape_attr_value(&raw[equal + 1..])?;
510        validate_attr_name(name)?;
511        return Ok(PathspecAttrRequirement::Value {
512            name: name.to_vec(),
513            value,
514        });
515    }
516    validate_attr_name(raw)?;
517    Ok(PathspecAttrRequirement::Set(raw.to_vec()))
518}
519
520fn validate_attr_name(name: &[u8]) -> Result<(), PathspecParseError> {
521    if name.is_empty()
522        || !name
523            .iter()
524            .all(|byte| byte.is_ascii_alphanumeric() || matches!(*byte, b'-' | b'_' | b'.'))
525    {
526        return Err(PathspecParseError::InvalidAttrSpec(name.to_vec()));
527    }
528    Ok(())
529}
530
531fn unescape_attr_value(value: &[u8]) -> Result<Vec<u8>, PathspecParseError> {
532    let mut out = Vec::with_capacity(value.len());
533    let mut idx = 0usize;
534    while idx < value.len() {
535        if value[idx] != b'\\' {
536            out.push(value[idx]);
537            idx += 1;
538            continue;
539        }
540        let Some(&next) = value.get(idx + 1) else {
541            return Err(PathspecParseError::AttrValueTrailingBackslash);
542        };
543        if next != b',' {
544            return Err(PathspecParseError::AttrValueUnsupportedBackslash);
545        }
546        out.push(next);
547        idx += 2;
548    }
549    Ok(out)
550}
551
552/// Error parsing a pathspec magic prefix.
553#[derive(Debug, Clone, PartialEq, Eq)]
554pub enum PathspecParseError {
555    /// A `:(` was not closed by a `)`.
556    UnterminatedMagic,
557    /// A long-form magic word git does not recognize.
558    UnknownMagic(Vec<u8>),
559    /// `:(glob)` and `:(literal)` were both requested.
560    GlobLiteralConflict,
561    EmptyAttrMagic,
562    MultipleAttrMagic,
563    InvalidAttrSpec(Vec<u8>),
564    AttrValueTrailingBackslash,
565    AttrValueUnsupportedBackslash,
566}
567
568impl core::fmt::Display for PathspecParseError {
569    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
570        match self {
571            PathspecParseError::UnterminatedMagic => {
572                write!(f, "Missing ')' at end of pathspec magic")
573            }
574            PathspecParseError::UnknownMagic(word) => {
575                write!(
576                    f,
577                    "Invalid pathspec magic '{}'",
578                    String::from_utf8_lossy(word)
579                )
580            }
581            PathspecParseError::GlobLiteralConflict => {
582                write!(f, "'literal' and 'glob' are incompatible")
583            }
584            PathspecParseError::EmptyAttrMagic => write!(f, "empty attr magic is not allowed"),
585            PathspecParseError::MultipleAttrMagic => {
586                write!(f, "Only one 'attr:' specification is allowed")
587            }
588            PathspecParseError::InvalidAttrSpec(spec) => write!(
589                f,
590                "invalid attribute specification '{}'",
591                String::from_utf8_lossy(spec)
592            ),
593            PathspecParseError::AttrValueTrailingBackslash => {
594                write!(
595                    f,
596                    "Escape character '\\' not allowed as last character in attr value"
597                )
598            }
599            PathspecParseError::AttrValueUnsupportedBackslash => {
600                write!(f, "Only '\\,' is supported for value matching")
601            }
602        }
603    }
604}
605
606impl std::error::Error for PathspecParseError {}
607
608/// Pathspec match magic, mirroring git's `PATHSPEC_LITERAL`/`PATHSPEC_GLOB`/
609/// `PATHSPEC_ICASE`. Constructed from the global `--{glob,noglob,icase,literal}-pathspecs`
610/// options. Drives [`pathspec_item_matches`].
611#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
612pub struct PathspecMatchMagic {
613    pub literal: bool,
614    pub glob: bool,
615    pub icase: bool,
616}
617
618/// git `is_glob_special`: characters that make a pathspec a wildcard.
619fn is_glob_special(c: u8) -> bool {
620    matches!(c, b'*' | b'?' | b'[' | b'\\')
621}
622
623/// git `simple_length`: length of the literal prefix before the first glob-special
624/// character (or end of string).
625fn simple_length(s: &[u8]) -> usize {
626    for (i, &c) in s.iter().enumerate() {
627        if is_glob_special(c) {
628            return i;
629        }
630    }
631    s.len()
632}
633
634/// Case-aware byte comparison up to `n` bytes, honoring `icase` (git `ps_strncmp`).
635fn ps_strncmp(icase: bool, a: &[u8], b: &[u8], n: usize) -> bool {
636    // Returns true when the first `n` bytes are EQUAL (mirrors `!strncmp`).
637    let a = &a[..a.len().min(n)];
638    let b = &b[..b.len().min(n)];
639    if a.len() < n && b.len() < n && a.len() != b.len() {
640        return false;
641    }
642    let len = n.min(a.len()).min(b.len());
643    for i in 0..len {
644        let (mut ca, mut cb) = (a[i], b[i]);
645        if icase {
646            ca = ca.to_ascii_lowercase();
647            cb = cb.to_ascii_lowercase();
648        }
649        if ca != cb {
650            return false;
651        }
652    }
653    true
654}
655
656/// True if `path` contains a glob-special character.
657pub fn pathspec_is_glob(path: &[u8]) -> bool {
658    path.iter().any(|byte| matches!(byte, b'*' | b'?' | b'['))
659}
660
661/// Port of git's `match_pathspec_item` for the single-pathspec / single-name case
662/// (no prefix, no attr magic). `match_` is the pathspec, `name` is the candidate
663/// path. Returns whether the pathspec selects `name` (exactly, as a directory
664/// prefix, or via wildmatch). Byte-for-byte faithful to git 2.54 for the
665/// `ls-files -- <pathspec>` path that t3070 exercises.
666pub fn pathspec_item_matches(match_: &[u8], name: &[u8], magic: PathspecMatchMagic) -> bool {
667    let icase = magic.icase;
668    let matchlen = match_.len();
669    let namelen = name.len();
670
671    // nowildcard_len: with LITERAL magic the whole pattern is literal.
672    let nowildcard_len = if magic.literal {
673        matchlen
674    } else {
675        simple_length(match_)
676    };
677
678    // Empty pathspec matches everything (git: `if (!*match) return MATCHED_RECURSIVELY`).
679    if matchlen == 0 {
680        return true;
681    }
682
683    // Literal-prefix comparison.
684    if matchlen <= namelen && ps_strncmp(icase, match_, name, matchlen) {
685        if matchlen == namelen {
686            return true; // MATCHED_EXACTLY
687        }
688        if match_[matchlen - 1] == b'/' || name[matchlen] == b'/' {
689            return true; // MATCHED_RECURSIVELY
690        }
691    } else if match_[matchlen - 1] == b'/'
692        && namelen == matchlen - 1
693        && ps_strncmp(icase, match_, name, namelen)
694    {
695        // DO_MATCH_DIRECTORY case: pathspec `foo/` vs name `foo`.
696        return true;
697    }
698
699    // Wildcard match — git `git_fnmatch(item, match, name, nowildcard_len)`.
700    if nowildcard_len < matchlen {
701        // git strips the literal prefix off BOTH pattern and name before running
702        // wildmatch (so `foo**` vs `foo/bba/arr` becomes `**` vs `/bba/arr`).
703        if nowildcard_len > 0 && !ps_strncmp(icase, match_, name, nowildcard_len) {
704            return false;
705        }
706        let pat = &match_[nowildcard_len..];
707        if name.len() < nowildcard_len {
708            return false;
709        }
710        let str_ = &name[nowildcard_len..];
711
712        let flags = if magic.glob && !magic.literal {
713            WM_PATHNAME | if icase { WM_CASEFOLD } else { 0 }
714        } else {
715            // Default pathspec (no glob magic): pathmatch semantics.
716            if icase { WM_CASEFOLD } else { 0 }
717        };
718        if wildmatch(pat, str_, flags) {
719            return true;
720        }
721    }
722
723    false
724}
725
726/// Case-insensitive match flag (git `WM_CASEFOLD`).
727pub const WM_CASEFOLD: u32 = 1;
728/// Pathname-aware match flag (git `WM_PATHNAME`): `*`/`?` do not cross `/`,
729/// `**` is required to span directory separators.
730pub const WM_PATHNAME: u32 = 2;
731
732const WM_MATCH: i32 = 0;
733const WM_NOMATCH: i32 = 1;
734const WM_ABORT_ALL: i32 = -1;
735const WM_ABORT_TO_STARSTAR: i32 = -2;
736
737#[inline]
738fn wm_isascii(c: u8) -> bool {
739    c < 0x80
740}
741#[inline]
742fn wm_isupper(c: u8) -> bool {
743    wm_isascii(c) && c.is_ascii_uppercase()
744}
745#[inline]
746fn wm_islower(c: u8) -> bool {
747    wm_isascii(c) && c.is_ascii_lowercase()
748}
749#[inline]
750fn wm_tolower(c: u8) -> u8 {
751    c.to_ascii_lowercase()
752}
753#[inline]
754fn wm_toupper(c: u8) -> u8 {
755    c.to_ascii_uppercase()
756}
757#[inline]
758fn wm_is_glob_special(c: u8) -> bool {
759    matches!(c, b'*' | b'?' | b'[' | b'\\')
760}
761
762fn wm_cc_eq(class: &[u8], lit: &[u8]) -> bool {
763    class == lit
764}
765
766fn wm_class_matches(class: &[u8], t_ch: u8, flags: u32) -> Option<bool> {
767    // Returns Some(matched) for a recognized class, or None for a malformed
768    // class name (caller maps to WM_ABORT_ALL).
769    let m = if wm_cc_eq(class, b"alnum") {
770        wm_isascii(t_ch) && t_ch.is_ascii_alphanumeric()
771    } else if wm_cc_eq(class, b"alpha") {
772        wm_isascii(t_ch) && t_ch.is_ascii_alphabetic()
773    } else if wm_cc_eq(class, b"blank") {
774        wm_isascii(t_ch) && (t_ch == b' ' || t_ch == b'\t')
775    } else if wm_cc_eq(class, b"cntrl") {
776        wm_isascii(t_ch) && t_ch.is_ascii_control()
777    } else if wm_cc_eq(class, b"digit") {
778        wm_isascii(t_ch) && t_ch.is_ascii_digit()
779    } else if wm_cc_eq(class, b"graph") {
780        wm_isascii(t_ch) && t_ch.is_ascii_graphic()
781    } else if wm_cc_eq(class, b"lower") {
782        wm_islower(t_ch)
783    } else if wm_cc_eq(class, b"print") {
784        // ISPRINT: printable including space (0x20..=0x7e).
785        wm_isascii(t_ch) && (0x20..=0x7e).contains(&t_ch)
786    } else if wm_cc_eq(class, b"punct") {
787        wm_isascii(t_ch) && t_ch.is_ascii_punctuation()
788    } else if wm_cc_eq(class, b"space") {
789        wm_isascii(t_ch) && t_ch.is_ascii_whitespace()
790    } else if wm_cc_eq(class, b"upper") {
791        wm_isupper(t_ch) || ((flags & WM_CASEFOLD) != 0 && wm_islower(t_ch))
792    } else if wm_cc_eq(class, b"xdigit") {
793        wm_isascii(t_ch) && t_ch.is_ascii_hexdigit()
794    } else {
795        return None;
796    };
797    Some(m)
798}
799
800/// Faithful port of git's `wildmatch.c::dowild`. Returns one of the internal
801/// `WM_*` codes (`WM_MATCH`, `WM_NOMATCH`, `WM_ABORT_ALL`, `WM_ABORT_TO_STARSTAR`).
802fn dowild(pattern: &[u8], text: &[u8], flags: u32) -> i32 {
803    let p = pattern;
804    let mut pi = 0usize;
805    let mut ti = 0usize;
806
807    while pi < p.len() {
808        let mut p_ch = p[pi];
809        let t_ch_raw = if ti < text.len() { text[ti] } else { 0 };
810        let mut t_ch = t_ch_raw;
811
812        if t_ch == 0 && p_ch != b'*' {
813            return WM_ABORT_ALL;
814        }
815        if (flags & WM_CASEFOLD) != 0 && wm_isupper(t_ch) {
816            t_ch = wm_tolower(t_ch);
817        }
818        if (flags & WM_CASEFOLD) != 0 && wm_isupper(p_ch) {
819            p_ch = wm_tolower(p_ch);
820        }
821
822        match p_ch {
823            b'?' => {
824                if (flags & WM_PATHNAME) != 0 && t_ch == b'/' {
825                    return WM_NOMATCH;
826                }
827                // fallthrough: advance both
828                pi += 1;
829                ti += 1;
830                continue;
831            }
832            b'*' => {
833                pi += 1;
834                let match_slash: bool;
835                if pi < p.len() && p[pi] == b'*' {
836                    let prev_p = pi; // index of the second '*'
837                    while pi < p.len() && p[pi] == b'*' {
838                        pi += 1;
839                    }
840                    if (flags & WM_PATHNAME) == 0 {
841                        match_slash = true;
842                    } else if (prev_p < 2 || p[prev_p - 2] == b'/')
843                        && (pi == p.len()
844                            || p[pi] == b'/'
845                            || (p[pi] == b'\\' && pi + 1 < p.len() && p[pi + 1] == b'/'))
846                    {
847                        if pi < p.len()
848                            && p[pi] == b'/'
849                            && dowild(&p[pi + 1..], &text[ti..], flags) == WM_MATCH
850                        {
851                            return WM_MATCH;
852                        }
853                        match_slash = true;
854                    } else {
855                        match_slash = false;
856                    }
857                } else {
858                    match_slash = (flags & WM_PATHNAME) == 0;
859                }
860
861                if pi == p.len() {
862                    // Trailing "**" matches everything; trailing "*" matches only
863                    // if there are no more slashes.
864                    if !match_slash && text[ti..].contains(&b'/') {
865                        return WM_ABORT_TO_STARSTAR;
866                    }
867                    return WM_MATCH;
868                } else if !match_slash && p[pi] == b'/' {
869                    // _one_ asterisk followed by a slash with WM_PATHNAME matches
870                    // the next directory.
871                    match text[ti..].iter().position(|&c| c == b'/') {
872                        None => return WM_ABORT_ALL,
873                        Some(off) => {
874                            ti += off; // point at the slash; consumed by loop end
875                        }
876                    }
877                    // emulate `break` then the for-loop's `text++; p++` increment:
878                    pi += 1;
879                    ti += 1;
880                    continue;
881                }
882
883                // The matching loop.
884                let mut cur_t = ti;
885                loop {
886                    let mut tc = if cur_t < text.len() { text[cur_t] } else { 0 };
887                    if tc == 0 {
888                        break;
889                    }
890                    if !wm_is_glob_special(p[pi]) {
891                        let mut pc = p[pi];
892                        if (flags & WM_CASEFOLD) != 0 && wm_isupper(pc) {
893                            pc = wm_tolower(pc);
894                        }
895                        loop {
896                            tc = if cur_t < text.len() { text[cur_t] } else { 0 };
897                            if tc == 0 {
898                                break;
899                            }
900                            if !(match_slash || tc != b'/') {
901                                break;
902                            }
903                            let mut tcf = tc;
904                            if (flags & WM_CASEFOLD) != 0 && wm_isupper(tcf) {
905                                tcf = wm_tolower(tcf);
906                            }
907                            if tcf == pc {
908                                break;
909                            }
910                            cur_t += 1;
911                        }
912                        // Recompute the casefolded tc for the comparison below.
913                        let tc_cmp = {
914                            let raw = if cur_t < text.len() { text[cur_t] } else { 0 };
915                            if (flags & WM_CASEFOLD) != 0 && wm_isupper(raw) {
916                                wm_tolower(raw)
917                            } else {
918                                raw
919                            }
920                        };
921                        if tc_cmp != pc {
922                            if match_slash {
923                                return WM_ABORT_ALL;
924                            } else {
925                                return WM_ABORT_TO_STARSTAR;
926                            }
927                        }
928                    }
929                    let matched = dowild(&p[pi..], &text[cur_t..], flags);
930                    if matched != WM_NOMATCH {
931                        if !match_slash || matched != WM_ABORT_TO_STARSTAR {
932                            return matched;
933                        }
934                    } else {
935                        let cur_raw = if cur_t < text.len() { text[cur_t] } else { 0 };
936                        if !match_slash && cur_raw == b'/' {
937                            return WM_ABORT_TO_STARSTAR;
938                        }
939                    }
940                    cur_t += 1;
941                }
942                return WM_ABORT_ALL;
943            }
944            b'[' => {
945                pi += 1;
946                let mut p_ch2 = if pi < p.len() { p[pi] } else { 0 };
947                if p_ch2 == b'^' {
948                    p_ch2 = b'!';
949                }
950                let negated = p_ch2 == b'!';
951                if negated {
952                    pi += 1;
953                    p_ch2 = if pi < p.len() { p[pi] } else { 0 };
954                }
955                let mut prev_ch: u8 = 0;
956                let mut matched = false;
957                loop {
958                    if p_ch2 == 0 {
959                        return WM_ABORT_ALL;
960                    }
961                    let mut next_prev: u8 = p_ch2;
962                    let mut skip_class = false;
963                    if p_ch2 == b'\\' {
964                        pi += 1;
965                        p_ch2 = if pi < p.len() { p[pi] } else { 0 };
966                        if p_ch2 == 0 {
967                            return WM_ABORT_ALL;
968                        }
969                        if t_ch == p_ch2 {
970                            matched = true;
971                        }
972                        next_prev = p_ch2;
973                    } else if p_ch2 == b'-' && prev_ch != 0 && pi + 1 < p.len() && p[pi + 1] != b']'
974                    {
975                        pi += 1;
976                        p_ch2 = p[pi];
977                        if p_ch2 == b'\\' {
978                            pi += 1;
979                            p_ch2 = if pi < p.len() { p[pi] } else { 0 };
980                            if p_ch2 == 0 {
981                                return WM_ABORT_ALL;
982                            }
983                        }
984                        if t_ch <= p_ch2 && t_ch >= prev_ch {
985                            matched = true;
986                        } else if (flags & WM_CASEFOLD) != 0 && wm_islower(t_ch) {
987                            let t_up = wm_toupper(t_ch);
988                            if t_up <= p_ch2 && t_up >= prev_ch {
989                                matched = true;
990                            }
991                        }
992                        next_prev = 0;
993                    } else if p_ch2 == b'[' && pi + 1 < p.len() && p[pi + 1] == b':' {
994                        // [:class:]
995                        let s = pi + 2;
996                        let mut scan = s;
997                        loop {
998                            if scan >= p.len() {
999                                break;
1000                            }
1001                            if p[scan] == b']' {
1002                                break;
1003                            }
1004                            scan += 1;
1005                        }
1006                        pi = scan;
1007                        p_ch2 = if pi < p.len() { p[pi] } else { 0 };
1008                        if p_ch2 == 0 {
1009                            return WM_ABORT_ALL;
1010                        }
1011                        // i = p - s - 1 (length of class name); require trailing ':'
1012                        let class_end = pi; // index of ']'
1013                        if class_end < s + 1 || p[class_end - 1] != b':' {
1014                            // Not a real [:class:]; treat '[' as a literal set member.
1015                            pi = s.wrapping_sub(2);
1016                            p_ch2 = b'[';
1017                            if t_ch == p_ch2 {
1018                                matched = true;
1019                            }
1020                            skip_class = true;
1021                            next_prev = p_ch2;
1022                        } else {
1023                            let class = &p[s..class_end - 1];
1024                            match wm_class_matches(class, t_ch, flags) {
1025                                Some(true) => matched = true,
1026                                Some(false) => {}
1027                                None => return WM_ABORT_ALL,
1028                            }
1029                            next_prev = 0;
1030                        }
1031                    } else if t_ch == p_ch2 {
1032                        matched = true;
1033                    }
1034
1035                    let _ = skip_class;
1036                    // next: advance to the next class char
1037                    prev_ch = next_prev;
1038                    pi += 1;
1039                    p_ch2 = if pi < p.len() { p[pi] } else { 0 };
1040                    if p_ch2 == b']' {
1041                        break;
1042                    }
1043                }
1044                if matched == negated || ((flags & WM_PATHNAME) != 0 && t_ch == b'/') {
1045                    return WM_NOMATCH;
1046                }
1047                pi += 1;
1048                ti += 1;
1049                continue;
1050            }
1051            b'\\' => {
1052                // Literal match with the following character. p[pi+1]=='\0'
1053                // failure is handled by the default arm below.
1054                pi += 1;
1055                let lit = if pi < p.len() { p[pi] } else { 0 };
1056                let lit = if (flags & WM_CASEFOLD) != 0 && wm_isupper(lit) {
1057                    wm_tolower(lit)
1058                } else {
1059                    lit
1060                };
1061                if t_ch != lit {
1062                    return WM_NOMATCH;
1063                }
1064                pi += 1;
1065                ti += 1;
1066                continue;
1067            }
1068            _ => {
1069                if t_ch != p_ch {
1070                    return WM_NOMATCH;
1071                }
1072                pi += 1;
1073                ti += 1;
1074                continue;
1075            }
1076        }
1077    }
1078
1079    if ti < text.len() && text[ti] != 0 {
1080        WM_NOMATCH
1081    } else {
1082        WM_MATCH
1083    }
1084}
1085
1086/// Match `pattern` against `text` with git's `wildmatch` semantics.
1087/// `flags` is a bitwise-OR of [`WM_CASEFOLD`] and [`WM_PATHNAME`].
1088pub fn wildmatch(pattern: &[u8], text: &[u8], flags: u32) -> bool {
1089    dowild(pattern, text, flags) == WM_MATCH
1090}
1091
1092#[cfg(test)]
1093mod tests {
1094    use super::*;
1095
1096    fn ps(args: &[&str]) -> Pathspec {
1097        Pathspec::parse(
1098            args.iter().map(|s| s.as_bytes()),
1099            PathspecMatchMagic::default(),
1100        )
1101        .expect("valid pathspec")
1102    }
1103
1104    #[test]
1105    fn empty_pathspec_matches_everything() {
1106        let p = Pathspec::default();
1107        assert!(p.is_empty());
1108        assert!(p.matches(b"any/path"));
1109    }
1110
1111    #[test]
1112    fn literal_prefix_matches_directory_recursively() {
1113        let p = ps(&["src"]);
1114        assert!(p.matches(b"src"));
1115        assert!(p.matches(b"src/lib.rs"));
1116        assert!(!p.matches(b"srcs/lib.rs"));
1117        assert!(!p.matches(b"other"));
1118    }
1119
1120    #[test]
1121    fn exclude_subtracts_from_includes() {
1122        let p = ps(&["src", ":(exclude)src/gen"]);
1123        assert!(p.matches(b"src/lib.rs"));
1124        assert!(!p.matches(b"src/gen/x.rs"));
1125    }
1126
1127    #[test]
1128    fn exclude_shorthand_sigils() {
1129        for spec in [":!foo", ":^foo"] {
1130            let p = ps(&[spec]);
1131            assert!(p.elements()[0].is_exclude());
1132            // exclude-only pathspec keeps everything but the excluded path.
1133            assert!(p.matches(b"bar"));
1134            assert!(!p.matches(b"foo"));
1135        }
1136    }
1137
1138    #[test]
1139    fn icase_magic_folds_case() {
1140        let p = ps(&[":(icase)readme"]);
1141        assert!(p.matches(b"README"));
1142        assert!(p.matches(b"readme"));
1143        let plain = ps(&["readme"]);
1144        assert!(!plain.matches(b"README"));
1145    }
1146
1147    #[test]
1148    fn glob_magic_is_pathname_aware() {
1149        // :(glob)*.rs uses WM_PATHNAME so `*` does not cross `/`.
1150        let p = ps(&[":(glob)*.rs"]);
1151        assert!(p.matches(b"lib.rs"));
1152        assert!(!p.matches(b"src/lib.rs"));
1153        // ** spans directories under glob magic.
1154        let pp = ps(&[":(glob)**/*.rs"]);
1155        assert!(pp.matches(b"src/lib.rs"));
1156    }
1157
1158    #[test]
1159    fn literal_magic_disables_wildcards() {
1160        let p = ps(&[":(literal)a*b"]);
1161        assert!(p.matches(b"a*b"));
1162        assert!(!p.matches(b"axxb"));
1163    }
1164
1165    #[test]
1166    fn top_magic_is_parsed() {
1167        let p = ps(&[":(top)src", ":/other"]);
1168        assert!(p.elements()[0].is_top());
1169        assert!(p.elements()[1].is_top());
1170    }
1171
1172    #[test]
1173    fn attr_magic_is_retained() {
1174        let p = ps(&[":(attr:binary)data"]);
1175        assert_eq!(p.elements()[0].attrs(), &[b"binary".to_vec()]);
1176        assert_eq!(p.elements()[0].pattern(), b"data");
1177    }
1178
1179    #[test]
1180    fn combined_magic_words() {
1181        let p = ps(&[":(exclude,icase)Cargo.lock"]);
1182        let el = &p.elements()[0];
1183        assert!(el.is_exclude());
1184        // exclude is case-insensitive: CARGO.LOCK is subtracted too.
1185        assert!(!p.matches(b"CARGO.LOCK"));
1186    }
1187
1188    fn parse_err(arg: &[u8]) -> PathspecParseError {
1189        match Pathspec::parse([arg], PathspecMatchMagic::default()) {
1190            Ok(_) => panic!(
1191                "expected parse error for {:?}",
1192                String::from_utf8_lossy(arg)
1193            ),
1194            Err(e) => e,
1195        }
1196    }
1197
1198    #[test]
1199    fn glob_literal_conflict_is_error() {
1200        assert_eq!(
1201            parse_err(b":(glob,literal)x"),
1202            PathspecParseError::GlobLiteralConflict
1203        );
1204    }
1205
1206    #[test]
1207    fn unknown_magic_is_error() {
1208        assert!(matches!(
1209            parse_err(b":(bogus)x"),
1210            PathspecParseError::UnknownMagic(_)
1211        ));
1212    }
1213
1214    #[test]
1215    fn unterminated_magic_is_error() {
1216        assert_eq!(
1217            parse_err(b":(exclude"),
1218            PathspecParseError::UnterminatedMagic
1219        );
1220    }
1221
1222    #[test]
1223    fn exclude_only_keeps_unmatched() {
1224        let p = ps(&[":(exclude)target"]);
1225        assert!(p.matches(b"src/lib.rs"));
1226        assert!(!p.matches(b"target/debug"));
1227    }
1228}