Skip to main content

grit_lib/
pathspec.rs

1//! Git-compatible pathspec matching (magic tokens and global flags).
2//!
3//! Global flags are read from the same environment variables as Git:
4//! `GIT_LITERAL_PATHSPECS`, `GIT_GLOB_PATHSPECS`, `GIT_NOGLOB_PATHSPECS`,
5//! `GIT_ICASE_PATHSPECS`. The `grit` binary sets these from CLI flags such as
6//! `--literal-pathspecs` before dispatching subcommands.
7
8use crate::crlf::path_has_gitattribute;
9use crate::crlf::AttrRule;
10use crate::wildmatch::{wildmatch, WM_CASEFOLD, WM_PATHNAME};
11
12/// Returns the length of the leading literal segment before the first glob metacharacter,
13/// matching Git's `simple_length()` (`*` `?` `[` `\`) on bytes.
14#[must_use]
15pub fn simple_length(match_str: &str) -> usize {
16    let b = match_str.as_bytes();
17    let mut len = 0usize;
18    for &c in b {
19        if matches!(c, b'*' | b'?' | b'[' | b'\\') {
20            break;
21        }
22        len += 1;
23    }
24    len
25}
26
27#[derive(Debug, Clone, Default)]
28struct PathspecMagic {
29    literal: bool,
30    glob: bool,
31    icase: bool,
32    exclude: bool,
33    prefix: Option<String>,
34    /// `:(attr:NAME)` — match paths that have gitattribute `NAME` set.
35    attr_name: Option<String>,
36}
37
38fn parse_maybe_bool(v: &str) -> Option<bool> {
39    let s = v.trim().to_ascii_lowercase();
40    match s.as_str() {
41        "true" | "yes" | "on" | "1" => Some(true),
42        "false" | "no" | "off" | "0" => Some(false),
43        _ => None,
44    }
45}
46
47fn git_env_bool(key: &str, default: bool) -> bool {
48    match std::env::var(key) {
49        Ok(v) => parse_maybe_bool(&v).unwrap_or(default),
50        Err(_) => default,
51    }
52}
53
54fn literal_global() -> bool {
55    git_env_bool("GIT_LITERAL_PATHSPECS", false)
56}
57
58/// Whether `GIT_LITERAL_PATHSPECS` is enabled (shell `*` and `?` are literal, not globs).
59#[must_use]
60pub fn literal_pathspecs_enabled() -> bool {
61    literal_global()
62}
63
64fn glob_global() -> bool {
65    git_env_bool("GIT_GLOB_PATHSPECS", false)
66}
67
68fn noglob_global() -> bool {
69    git_env_bool("GIT_NOGLOB_PATHSPECS", false)
70}
71
72fn icase_global() -> bool {
73    git_env_bool("GIT_ICASE_PATHSPECS", false)
74}
75
76/// Validates global pathspec environment flags the same way Git does.
77///
78/// Returns an error message suitable for `bail!` when flags are incompatible.
79#[must_use]
80pub fn validate_global_pathspec_flags() -> Result<(), String> {
81    let lit = literal_global();
82    let glob = glob_global();
83    let noglob = noglob_global();
84    let icase = icase_global();
85
86    if glob && noglob {
87        return Err("global 'glob' and 'noglob' pathspec settings are incompatible".to_string());
88    }
89    if lit && (glob || noglob || icase) {
90        return Err(
91            "global 'literal' pathspec setting is incompatible with all other global pathspec settings"
92                .to_string(),
93        );
94    }
95    Ok(())
96}
97
98fn parse_long_magic(rest_after_paren: &str) -> Option<(PathspecMagic, &str)> {
99    let close = rest_after_paren.find(')')?;
100    let magic_part = &rest_after_paren[..close];
101    let tail = &rest_after_paren[close + 1..];
102    let mut magic = PathspecMagic::default();
103    for raw in magic_part.split(',') {
104        let token = raw.trim();
105        if token.is_empty() {
106            continue;
107        }
108        if let Some(p) = token.strip_prefix("prefix:") {
109            magic.prefix = Some(p.to_string());
110            continue;
111        }
112        if let Some(name) = token.strip_prefix("attr:") {
113            if !name.is_empty() {
114                magic.attr_name = Some(name.to_string());
115            }
116            continue;
117        }
118        if token.eq_ignore_ascii_case("literal") {
119            magic.literal = true;
120        } else if token.eq_ignore_ascii_case("glob") {
121            magic.glob = true;
122        } else if token.eq_ignore_ascii_case("icase") {
123            magic.icase = true;
124        } else if token.eq_ignore_ascii_case("exclude") {
125            magic.exclude = true;
126        }
127    }
128    Some((magic, tail))
129}
130
131/// `elem` is the full pathspec beginning with `:` (short magic form, not `:(...)`).
132fn parse_short_magic(elem: &str) -> (PathspecMagic, &str) {
133    let bytes = elem.as_bytes();
134    let mut i = 1usize;
135    let mut magic = PathspecMagic::default();
136    while i < bytes.len() && bytes[i] != b':' {
137        let ch = bytes[i];
138        if ch == b'^' {
139            magic.exclude = true;
140            i += 1;
141            continue;
142        }
143        let is_magic = match ch {
144            b'!' => {
145                magic.exclude = true;
146                true
147            }
148            b'/' => true, // :(top) — strip `:/` from pattern later
149            _ => false,
150        };
151        if is_magic {
152            i += 1;
153            continue;
154        }
155        break;
156    }
157    if i < bytes.len() && bytes[i] == b':' {
158        i += 1;
159    }
160    (magic, &elem[i..])
161}
162
163/// Strip `:(magic)` / `:magic` prefix when not in literal-global mode.
164fn parse_element_magic(elem: &str) -> (PathspecMagic, &str) {
165    if !elem.starts_with(':') || literal_global() {
166        return (PathspecMagic::default(), elem);
167    }
168    if elem.starts_with(":(") {
169        return parse_long_magic(&elem[2..]).unwrap_or((PathspecMagic::default(), elem));
170    }
171    parse_short_magic(elem)
172}
173
174fn combine_magic(element: PathspecMagic) -> PathspecMagic {
175    let mut m = element;
176    if literal_global() {
177        m.literal = true;
178    }
179    if glob_global() && !m.literal {
180        m.glob = true;
181    }
182    if icase_global() {
183        m.icase = true;
184    }
185    if noglob_global() && !m.glob {
186        m.literal = true;
187    }
188    m
189}
190
191fn strip_top_magic(mut pattern: &str) -> &str {
192    if let Some(r) = pattern.strip_prefix(":/") {
193        pattern = r;
194    }
195    pattern
196}
197
198/// True if `path` is matched by `spec` (Git pathspec syntax, including magic and globals).
199#[must_use]
200pub fn pathspec_matches(spec: &str, path: &str) -> bool {
201    let (elem_magic, raw_pattern) = parse_element_magic(spec);
202    let magic = combine_magic(elem_magic);
203
204    if magic.literal && magic.glob {
205        // Git dies; treat as non-match for robustness.
206        return false;
207    }
208
209    if magic.exclude {
210        // Exclude pathspecs are handled by higher layers; do not match positively here.
211        return false;
212    }
213
214    let pattern = strip_top_magic(raw_pattern);
215    let path_for_match = if let Some(prefix) = magic.prefix.as_deref() {
216        if !path.starts_with(prefix) {
217            return false;
218        }
219        &path[prefix.len()..]
220    } else {
221        path
222    };
223
224    pathspec_matches_tail(pattern, path_for_match, magic)
225}
226
227fn pathspec_matches_tail(pattern: &str, path: &str, magic: PathspecMagic) -> bool {
228    if pattern.is_empty() {
229        return true;
230    }
231
232    let flags = if magic.icase { WM_CASEFOLD } else { 0 };
233
234    if magic.literal {
235        return literal_prefix_match(pattern, path);
236    }
237
238    let wm_flags = if magic.glob {
239        flags | WM_PATHNAME
240    } else {
241        flags
242    };
243
244    let pattern_bytes = pattern.as_bytes();
245    let path_bytes = path.as_bytes();
246    let simple = simple_length(pattern);
247
248    if simple < pattern.len() {
249        if wildmatch(pattern_bytes, path_bytes, wm_flags) {
250            return true;
251        }
252    } else if ps_str_eq(pattern, path, magic.icase) {
253        return true;
254    }
255
256    if let Some(prefix) = pattern.strip_suffix('/') {
257        if ps_str_eq(prefix, path, magic.icase) {
258            return true;
259        }
260        let prefix_slash = format!("{prefix}/");
261        if path_starts_with(path, &prefix_slash, magic.icase) {
262            return true;
263        }
264        return false;
265    }
266
267    let prefix_slash = format!("{pattern}/");
268    path == pattern || path_starts_with(path, &prefix_slash, magic.icase)
269}
270
271fn ps_str_eq(a: &str, b: &str, icase: bool) -> bool {
272    if icase {
273        a.eq_ignore_ascii_case(b)
274    } else {
275        a == b
276    }
277}
278
279fn path_starts_with(path: &str, prefix: &str, icase: bool) -> bool {
280    if icase {
281        path.get(..prefix.len())
282            .is_some_and(|head| head.eq_ignore_ascii_case(prefix))
283    } else {
284        path.starts_with(prefix)
285    }
286}
287
288fn literal_prefix_match(pattern: &str, path: &str) -> bool {
289    if let Some(prefix) = pattern.strip_suffix('/') {
290        return path == prefix || path.starts_with(&format!("{prefix}/"));
291    }
292    path == pattern || path.starts_with(&format!("{pattern}/"))
293}
294
295/// Optional path metadata for literal pathspecs with a trailing `/` (tree-walk / diff-tree).
296///
297/// Git treats `dir/` as “directory or git submodule only”: a regular file `dir`
298/// does not match, but a tree entry `dir` or gitlink `dir` does.
299#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
300pub struct PathspecMatchContext {
301    /// The index/tree entry is a directory (mode `040000`).
302    pub is_directory: bool,
303    /// The entry is a git submodule / gitlink (`160000`).
304    pub is_git_submodule: bool,
305}
306
307/// Returns whether `path` matches the pathspec `spec` with default (file) context.
308///
309/// For pathspecs ending in `/`, a path equal to the prefix matches only when
310/// [`PathspecMatchContext`] indicates a directory or submodule; see
311/// [`matches_pathspec_with_context`].
312#[must_use]
313pub fn matches_pathspec(spec: &str, path: &str) -> bool {
314    matches_pathspec_with_context(spec, path, PathspecMatchContext::default())
315}
316
317/// Like [`matches_pathspec`], but uses `ctx` for trailing-`/` literal pathspecs.
318#[must_use]
319pub fn matches_pathspec_with_context(spec: &str, path: &str, ctx: PathspecMatchContext) -> bool {
320    let trimmed = spec.strip_prefix("./").unwrap_or(spec);
321    if trimmed == "." || trimmed.is_empty() {
322        return true;
323    }
324
325    if trimmed.contains('*') || trimmed.contains('?') || trimmed.contains('[') {
326        let flags = if trimmed.contains("**") {
327            WM_PATHNAME
328        } else {
329            0
330        };
331        if wildmatch(trimmed.as_bytes(), path.as_bytes(), flags) {
332            return true;
333        }
334        if (ctx.is_directory || ctx.is_git_submodule)
335            && !path.is_empty()
336            && trimmed.len() > path.len()
337            && trimmed.as_bytes().get(path.len()) == Some(&b'/')
338            && trimmed.starts_with(path)
339        {
340            return true;
341        }
342        return false;
343    }
344
345    if let Some(prefix) = trimmed.strip_suffix('/') {
346        if path.starts_with(&format!("{prefix}/")) {
347            return true;
348        }
349        if path == prefix {
350            return ctx.is_directory || ctx.is_git_submodule;
351        }
352        return false;
353    }
354
355    path == trimmed || path.starts_with(&format!("{trimmed}/"))
356}
357
358/// Parse a Git mode string (e.g. `100644`, `040000`) into a [`PathspecMatchContext`].
359#[must_use]
360pub fn context_from_mode_octal(mode: &str) -> PathspecMatchContext {
361    let Ok(bits) = u32::from_str_radix(mode, 8) else {
362        return PathspecMatchContext::default();
363    };
364    context_from_mode_bits(bits)
365}
366
367/// Classify a raw Git mode (e.g. from an index or tree entry) for pathspec matching.
368#[must_use]
369pub fn context_from_mode_bits(mode: u32) -> PathspecMatchContext {
370    let ty = mode & 0o170000;
371    PathspecMatchContext {
372        is_directory: ty == 0o040000,
373        is_git_submodule: ty == 0o160000,
374    }
375}
376
377/// Match a pathspec against a tree path, using `.gitattributes` for `:(attr:...)`.
378///
379/// Used by `git archive` style tree walks: `mode` supplies directory/gitlink context for
380/// literal pathspecs ending in `/`.
381#[must_use]
382pub fn matches_pathspec_for_object(
383    spec: &str,
384    path: &str,
385    mode: u32,
386    attr_rules: &[AttrRule],
387) -> bool {
388    let (elem_magic, raw_pattern) = parse_element_magic(spec);
389    let magic = combine_magic(elem_magic);
390
391    if magic.literal && magic.glob {
392        return false;
393    }
394    if magic.exclude {
395        return false;
396    }
397
398    let ctx = context_from_mode_bits(mode);
399    let is_dir_for_attr = path.ends_with('/') || ctx.is_directory || ctx.is_git_submodule;
400
401    if let Some(ref attr) = magic.attr_name {
402        if !path_has_gitattribute(attr_rules, path, is_dir_for_attr, attr) {
403            return false;
404        }
405    }
406
407    let pattern = strip_top_magic(raw_pattern);
408    let path_for_match = if let Some(prefix) = magic.prefix.as_deref() {
409        if !path.starts_with(prefix) {
410            return false;
411        }
412        &path[prefix.len()..]
413    } else {
414        path
415    };
416    if magic.literal || magic.glob || magic.icase {
417        pathspec_matches_tail(pattern, path_for_match, magic)
418    } else {
419        matches_pathspec_with_context(pattern, path_for_match, ctx)
420    }
421}
422
423/// Returns wildmatch flags for `:(icase)` / `:(glob)`-style patterns when those
424/// appear as explicit magic (not used by default CLI pathspecs).
425#[must_use]
426pub fn wildmatch_flags_icase_glob(icase: bool, glob: bool) -> u32 {
427    let mut f = if glob { WM_PATHNAME } else { 0 };
428    if icase {
429        f |= WM_CASEFOLD;
430    }
431    f
432}
433
434#[cfg(test)]
435mod tree_entry_pathspec_tests {
436    use super::*;
437
438    #[test]
439    fn literal_prefix_and_exact() {
440        assert!(matches_pathspec("path1", "path1/file1"));
441        assert!(matches_pathspec_with_context(
442            "path1/",
443            "path1/file1",
444            PathspecMatchContext::default()
445        ));
446        assert!(matches_pathspec("file0", "file0"));
447        assert!(!matches_pathspec("path", "path1/file1"));
448    }
449
450    #[test]
451    fn wildcards_cross_slash_by_default() {
452        assert!(matches_pathspec("f*", "file0"));
453        assert!(matches_pathspec("*file1", "path1/file1"));
454        assert!(matches_pathspec_with_context(
455            "path1/f*",
456            "path1",
457            PathspecMatchContext {
458                is_directory: true,
459                ..Default::default()
460            }
461        ));
462        assert!(matches_pathspec("path1/*file1", "path1/file1"));
463    }
464
465    #[test]
466    fn trailing_slash_directory_only() {
467        assert!(!matches_pathspec_with_context(
468            "file0/",
469            "file0",
470            PathspecMatchContext::default()
471        ));
472        assert!(matches_pathspec_with_context(
473            "file0/",
474            "file0",
475            PathspecMatchContext {
476                is_directory: true,
477                ..Default::default()
478            }
479        ));
480        assert!(matches_pathspec_with_context(
481            "submod/",
482            "submod",
483            PathspecMatchContext {
484                is_git_submodule: true,
485                ..Default::default()
486            }
487        ));
488    }
489}