Skip to main content

grit_lib/
attributes.rs

1//! Gitattributes parsing and pattern matching for `check-attr` and validation.
2//!
3//! Implements Git-consistent rule ordering, macro expansion (`[attr]`), `binary`
4//! expansion, `**` globbing via [`crate::wildmatch`], and optional case folding
5//! for `core.ignorecase`.
6
7use crate::config::parse_path;
8use crate::config::ConfigSet;
9use crate::index::normalize_mode;
10use crate::index::Index;
11use crate::index::MODE_EXECUTABLE;
12use crate::index::MODE_GITLINK;
13use crate::index::MODE_REGULAR;
14use crate::index::MODE_SYMLINK;
15use crate::index::MODE_TREE;
16use crate::objects::parse_tree;
17use crate::objects::ObjectId;
18use crate::objects::ObjectKind;
19use crate::odb::Odb;
20use crate::repo::Repository;
21use crate::rev_parse::resolve_revision;
22use crate::wildmatch::{wildmatch, WM_CASEFOLD, WM_PATHNAME};
23use std::borrow::Cow;
24use std::collections::HashMap;
25use std::ffi::OsStr;
26use std::fs;
27use std::path::{Component, Path, PathBuf};
28use std::sync::{Arc, Mutex, OnceLock};
29use std::time::SystemTime;
30
31/// Maximum length of a single `.gitattributes` line (bytes), matching Git (`ATTR_MAX_LINE_LENGTH`).
32/// Lines of this length or longer are ignored with a warning.
33pub const MAX_ATTR_LINE_BYTES: usize = 2048;
34
35/// Maximum `.gitattributes` file size (bytes) before Git ignores the file.
36pub const MAX_ATTR_FILE_BYTES: usize = 100 * 1024 * 1024;
37
38/// Parsed attribute value for display (`check-attr` output).
39#[derive(Debug, Clone, PartialEq, Eq)]
40pub enum AttrValue {
41    Set,
42    /// Explicit `-attr` in a rule — `check-attr` prints `unset`.
43    Unset,
44    /// Macro body `!attr` — clears the attribute to *unspecified* (not `unset`).
45    Clear,
46    Value(String),
47}
48
49impl AttrValue {
50    /// Text form as printed by `git check-attr`.
51    #[must_use]
52    pub fn display(&self) -> &str {
53        match self {
54            AttrValue::Set => "set",
55            AttrValue::Unset => "unset",
56            AttrValue::Clear => "unspecified",
57            AttrValue::Value(v) => v.as_str(),
58        }
59    }
60}
61
62/// Pattern flags after Git `parse_path_pattern` (`dir.c`).
63const PAT_NODIR: u32 = 1;
64const PAT_MUSTBEDIR: u32 = 2;
65const PAT_ENDSWITH: u32 = 4;
66
67#[inline]
68fn is_glob_special_attr(c: u8) -> bool {
69    matches!(c, b'*' | b'?' | b'[' | b'\\')
70}
71
72/// Length of initial literal segment before the first glob special (Git `simple_length`).
73fn simple_length_pat(s: &str) -> usize {
74    let b = s.as_bytes();
75    let mut i = 0;
76    while i < b.len() {
77        if is_glob_special_attr(b[i]) {
78            return i;
79        }
80        i += 1;
81    }
82    i
83}
84
85/// Parse pattern text like Git `parse_path_pattern` (after `!` and unquoting are handled).
86fn parse_attr_pattern_fields(pat: &str) -> (String, u32, usize) {
87    let mut flags = 0u32;
88    let mut len = pat.len();
89    if len > 0 && pat.as_bytes()[len - 1] == b'/' {
90        len -= 1;
91        flags |= PAT_MUSTBEDIR;
92    }
93    let p = &pat[..len];
94    let has_slash = p.as_bytes().contains(&b'/');
95    if !has_slash {
96        flags |= PAT_NODIR;
97    }
98    if let Some(rest) = p.strip_prefix('*') {
99        if !rest.is_empty() && simple_length_pat(rest) == rest.len() {
100            flags |= PAT_ENDSWITH;
101        }
102    }
103    let mut nowild = simple_length_pat(p);
104    if nowild > len {
105        nowild = len;
106    }
107    (p.to_string(), flags, nowild)
108}
109
110/// One line in a gitattributes file.
111#[derive(Debug, Clone)]
112pub struct AttrRule {
113    /// Directory of the `.gitattributes` file that defined this rule (repo-relative, `/`,
114    /// no trailing slash). Empty for the repository root file.
115    pub attr_base: String,
116    /// Pattern body (no leading `!`; trailing `/` stripped; same as Git after `parse_path_pattern` prep).
117    pub pattern: String,
118    /// From `parse_path_pattern`: basename-only match vs full path under `attr_base`.
119    pub pattern_flags: u32,
120    /// Length of leading literal segment before first wildcard (Git `nowildcardlen`).
121    pub nowildcardlen: usize,
122    /// If true, this rule was discarded (negative pattern) after emitting a warning.
123    pub skip: bool,
124    /// 1-based line number in the source file.
125    pub line: usize,
126    /// Attribute assignments in source order (last wins for duplicates on this line).
127    pub attrs: Vec<(String, AttrValue)>,
128}
129
130/// Macro definitions from `[attr]name ...` lines.
131#[derive(Debug, Clone, Default)]
132pub struct MacroTable {
133    /// Maps macro name → list of assignments (e.g. `!test` → unset test).
134    pub defs: HashMap<String, Vec<(String, AttrValue)>>,
135}
136
137/// Result of parsing a gitattributes file.
138#[derive(Debug, Clone, Default)]
139pub struct ParsedGitAttributes {
140    pub rules: Vec<AttrRule>,
141    pub macros: MacroTable,
142    pub warnings: Vec<String>,
143}
144
145/// Returns true if `name` is reserved (`builtin_*` except the real builtin names Git allows).
146#[must_use]
147pub fn is_reserved_builtin_name(name: &str) -> bool {
148    let Some(rest) = name.strip_prefix("builtin_") else {
149        return false;
150    };
151    matches!(rest, "objectmode")
152}
153
154/// Validate user-defined attribute names in parsed rules (for `git add`).
155///
156/// Returns an error string matching Git when a rule uses an invalid `builtin_*` name.
157pub fn validate_rules_for_add(
158    rules: &[AttrRule],
159    display_path: &str,
160) -> std::result::Result<(), String> {
161    for rule in rules {
162        if rule.skip {
163            continue;
164        }
165        for (name, _) in &rule.attrs {
166            if name.starts_with("builtin_") && !is_reserved_builtin_name(name) {
167                return Err(format!(
168                    "{name} is not a valid attribute name: {display_path}:{}",
169                    rule.line
170                ));
171            }
172        }
173    }
174    Ok(())
175}
176
177/// Collect warnings for invalid `builtin_*` assignments (check-attr continues).
178pub fn builtin_warnings_for_rules(rules: &[AttrRule], display_path: &str) -> Vec<String> {
179    let mut w = Vec::new();
180    for rule in rules {
181        if rule.skip {
182            continue;
183        }
184        for (name, _) in &rule.attrs {
185            if name == "builtin_objectmode" {
186                w.push(format!(
187                    "builtin_objectmode is not a valid attribute name: {display_path}:{}",
188                    rule.line
189                ));
190            } else if name.starts_with("builtin_") && !is_reserved_builtin_name(name) {
191                w.push(format!(
192                    "{name} is not a valid attribute name: {display_path}:{}",
193                    rule.line
194                ));
195            }
196        }
197    }
198    w
199}
200
201fn default_global_attributes_path() -> Option<PathBuf> {
202    let home = std::env::var("HOME").ok()?;
203    if let Ok(xdg) = std::env::var("XDG_CONFIG_HOME") {
204        if !xdg.is_empty() {
205            return Some(PathBuf::from(xdg).join("git/attributes"));
206        }
207    }
208    Some(PathBuf::from(home).join(".config/git/attributes"))
209}
210
211fn global_attributes_path(
212    repo: &Repository,
213) -> std::result::Result<Option<PathBuf>, crate::error::Error> {
214    let config = ConfigSet::load(Some(&repo.git_dir), true)?;
215    if let Some(path) = config.get("core.attributesfile") {
216        return Ok(Some(PathBuf::from(parse_path(&path))));
217    }
218    Ok(default_global_attributes_path())
219}
220
221/// Read a `.gitattributes` path; if it is a symlink, record an error and skip (in-tree rules).
222fn read_gitattributes_maybe_symlink(
223    path: &Path,
224    display: &str,
225    warnings: &mut Vec<String>,
226) -> Option<String> {
227    let meta = fs::symlink_metadata(path).ok()?;
228    if meta.file_type().is_symlink() {
229        warnings.push(format!(
230            "unable to access '{display}': Too many levels of symbolic links"
231        ));
232        return None;
233    }
234    fs::read_to_string(path).ok()
235}
236
237/// Parse one gitattributes file from disk (patterns are relative to `attr_base`, the directory
238/// containing the file — use `""` for the repository root file).
239pub fn parse_gitattributes_file_content(content: &str, display_path: &str) -> ParsedGitAttributes {
240    parse_gitattributes_content_impl(content, display_path, false, "")
241}
242
243/// Parse attributes defined in a `.gitattributes` file located in `attr_base` (repo-relative,
244/// `/` separators, no trailing slash; empty string for the repository root).
245pub fn parse_gitattributes_file_content_with_base(
246    content: &str,
247    display_path: &str,
248    attr_base: &str,
249) -> ParsedGitAttributes {
250    parse_gitattributes_content_impl(content, display_path, false, attr_base)
251}
252
253fn preprocess_gitattributes_blob_text(content: &str) -> Cow<'_, str> {
254    if !content.contains("\\n") {
255        return Cow::Borrowed(content);
256    }
257    Cow::Owned(content.replace("\\n", "\n"))
258}
259
260fn parse_gitattributes_content_impl(
261    content: &str,
262    display_path: &str,
263    from_blob: bool,
264    attr_base: &str,
265) -> ParsedGitAttributes {
266    let preprocessed = if from_blob {
267        preprocess_gitattributes_blob_text(content)
268    } else {
269        Cow::Borrowed(content)
270    };
271    let content = preprocessed.as_ref();
272
273    let mut out = ParsedGitAttributes::default();
274    for (idx, raw_line) in content.lines().enumerate() {
275        let line_no = idx + 1;
276        let line_bytes = raw_line.as_bytes();
277        if line_bytes.len() >= MAX_ATTR_LINE_BYTES {
278            out.warnings.push(format!(
279                "warning: ignoring overly long attributes line {line_no}"
280            ));
281            continue;
282        }
283        parse_one_line(
284            raw_line,
285            line_no,
286            display_path,
287            from_blob,
288            attr_base,
289            &mut out,
290        );
291    }
292    out.warnings
293        .extend(builtin_warnings_for_rules(&out.rules, display_path));
294    out
295}
296
297/// Skip leading ASCII blanks only (matches Git's `blank` in `attr.c`).
298fn skip_ascii_blank(s: &str) -> &str {
299    s.trim_start_matches([' ', '\t', '\r', '\n'])
300}
301
302/// First whitespace-delimited token and the remainder (Git `strcspn` on `blank`).
303fn split_at_first_blank(s: &str) -> (&str, &str) {
304    let bytes = s.as_bytes();
305    let n = bytes
306        .iter()
307        .position(|&b| matches!(b, b' ' | b'\t' | b'\r' | b'\n'))
308        .unwrap_or(bytes.len());
309    s.split_at(n)
310}
311
312/// C-style unquote for a pattern that starts with `"` (see Git `unquote_c_style` in `quote.c`).
313fn unquote_c_style(quoted: &str) -> Result<(String, &str), ()> {
314    let b = quoted.as_bytes();
315    if b.is_empty() || b[0] != b'"' {
316        return Err(());
317    }
318    let mut q = &b[1..];
319    let mut out = Vec::new();
320    loop {
321        let len = q
322            .iter()
323            .position(|&c| c == b'"' || c == b'\\')
324            .unwrap_or(q.len());
325        out.extend_from_slice(&q[..len]);
326        q = &q[len..];
327        if q.is_empty() {
328            return Err(());
329        }
330        match q[0] {
331            b'"' => {
332                let rest = std::str::from_utf8(&q[1..]).map_err(|_| ())?;
333                return Ok((String::from_utf8(out).map_err(|_| ())?, rest));
334            }
335            b'\\' => {
336                q = &q[1..];
337                if q.is_empty() {
338                    return Err(());
339                }
340                let ch = q[0];
341                q = &q[1..];
342                match ch {
343                    b'a' => out.push(0x07),
344                    b'b' => out.push(0x08),
345                    b'f' => out.push(0x0c),
346                    b'n' => out.push(b'\n'),
347                    b'r' => out.push(b'\r'),
348                    b't' => out.push(b'\t'),
349                    b'v' => out.push(0x0b),
350                    b'\\' => out.push(b'\\'),
351                    b'"' => out.push(b'"'),
352                    b'0'..=b'3' => {
353                        let mut ac = u32::from(ch - b'0') << 6;
354                        if q.len() < 2 {
355                            return Err(());
356                        }
357                        let ch2 = q[0];
358                        let ch3 = q[1];
359                        if !(b'0'..=b'7').contains(&ch2) || !(b'0'..=b'7').contains(&ch3) {
360                            return Err(());
361                        }
362                        ac |= u32::from(ch2 - b'0') << 3;
363                        ac |= u32::from(ch3 - b'0');
364                        q = &q[2..];
365                        out.push(ac as u8);
366                    }
367                    _ => return Err(()),
368                }
369            }
370            _ => return Err(()),
371        }
372    }
373}
374
375/// One attribute assignment token (`parse_attr` in Git `attr.c`).
376fn parse_one_attr_token_git(s: &str) -> (&str, Option<&str>, &str) {
377    let bytes = s.as_bytes();
378    let token_end = bytes
379        .iter()
380        .position(|&b| matches!(b, b' ' | b'\t' | b'\r' | b'\n'))
381        .unwrap_or(bytes.len());
382    let eq_pos = s.find('=');
383    let eq_in_token = eq_pos.filter(|&eq| eq < token_end);
384    let (name, val) = if let Some(eq) = eq_in_token {
385        (&s[..eq], Some(&s[eq + 1..token_end]))
386    } else {
387        (&s[..token_end], None)
388    };
389    let rest = skip_ascii_blank(&s[token_end..]);
390    (name, val, rest)
391}
392
393fn accumulate_attr_states(
394    mut states: &str,
395    attrs: &mut Vec<(String, AttrValue)>,
396    macros: &MacroTable,
397    in_macro_def: bool,
398) {
399    loop {
400        states = skip_ascii_blank(states);
401        if states.is_empty() {
402            break;
403        }
404        let (name, val, rest) = parse_one_attr_token_git(states);
405        states = rest;
406        let tok = match val {
407            Some(v) => format!("{name}={v}"),
408            None => name.to_string(),
409        };
410        push_attr_token(&tok, attrs, macros, in_macro_def);
411    }
412}
413
414const ATTR_MACRO_PREFIX: &str = "[attr]";
415
416fn parse_one_line(
417    raw_line: &str,
418    line_no: usize,
419    display_path: &str,
420    from_blob: bool,
421    attr_base: &str,
422    out: &mut ParsedGitAttributes,
423) {
424    let _ = display_path;
425    let _ = from_blob;
426    let cp = skip_ascii_blank(raw_line);
427    if cp.is_empty() || cp.starts_with('#') {
428        return;
429    }
430
431    let (pattern_token, states) = if cp.as_bytes().first() == Some(&b'"') {
432        match unquote_c_style(cp) {
433            Ok((pat, rest)) => (pat, rest),
434            Err(()) => {
435                let (a, b) = split_at_first_blank(cp);
436                (a.to_string(), b)
437            }
438        }
439    } else {
440        let (a, b) = split_at_first_blank(cp);
441        (a.to_string(), b)
442    };
443
444    if pattern_token.len() > ATTR_MACRO_PREFIX.len() && pattern_token.starts_with(ATTR_MACRO_PREFIX)
445    {
446        let rest = skip_ascii_blank(&pattern_token[ATTR_MACRO_PREFIX.len()..]);
447        let (macro_name, leftover) = split_at_first_blank(rest);
448        if !leftover.is_empty() || macro_name.is_empty() {
449            return;
450        }
451        let mut attrs = Vec::new();
452        accumulate_attr_states(states, &mut attrs, &out.macros, true);
453        out.macros.defs.insert(macro_name.to_string(), attrs);
454        return;
455    }
456
457    if pattern_token.starts_with('!') && !pattern_token.starts_with("\\!") {
458        out.warnings
459            .push("Negative patterns are ignored".to_string());
460        return;
461    }
462    let pattern_raw = pattern_token.replace("\\!", "!");
463    let (pattern, pattern_flags, nowildcardlen) = parse_attr_pattern_fields(&pattern_raw);
464    let mut attrs = Vec::new();
465    accumulate_attr_states(states, &mut attrs, &out.macros, false);
466    if attrs.is_empty() {
467        return;
468    }
469    out.rules.push(AttrRule {
470        attr_base: attr_base.to_string(),
471        pattern,
472        pattern_flags,
473        nowildcardlen,
474        skip: false,
475        line: line_no,
476        attrs,
477    });
478}
479
480fn push_attr_token(
481    tok: &str,
482    attrs: &mut Vec<(String, AttrValue)>,
483    _macros: &MacroTable,
484    in_macro_def: bool,
485) {
486    if tok == "binary" {
487        attrs.push(("text".into(), AttrValue::Unset));
488        attrs.push(("diff".into(), AttrValue::Unset));
489        attrs.push(("merge".into(), AttrValue::Unset));
490        attrs.push(("binary".into(), AttrValue::Set));
491        return;
492    }
493    if in_macro_def {
494        if let Some(rest) = tok.strip_prefix('!') {
495            attrs.push((rest.to_string(), AttrValue::Clear));
496            return;
497        }
498    }
499    if let Some(rest) = tok.strip_prefix('-') {
500        attrs.push((rest.to_string(), AttrValue::Unset));
501        return;
502    }
503    if let Some((k, v)) = tok.split_once('=') {
504        let v = v.trim_end_matches(|c: char| {
505            matches!(c, ' ' | '\t' | '\r' | '\n') || c == '\u{000b}' || c == '\u{000c}'
506        });
507        attrs.push((k.to_string(), AttrValue::Value(v.to_string())));
508        return;
509    }
510    attrs.push((tok.to_string(), AttrValue::Set));
511}
512
513fn fspathncmp(a: &[u8], b: &[u8], count: usize, icase: bool) -> bool {
514    if a.len() < count || b.len() < count {
515        return false;
516    }
517    if icase {
518        a[..count]
519            .iter()
520            .zip(&b[..count])
521            .all(|(x, y)| x.eq_ignore_ascii_case(y))
522    } else {
523        a[..count] == b[..count]
524    }
525}
526
527/// Git `match_basename` (`dir.c`) for attribute patterns.
528fn match_basename_git(
529    basename: &[u8],
530    pattern: &[u8],
531    prefix: usize,
532    patternlen: usize,
533    pat_flags: u32,
534    icase: bool,
535) -> bool {
536    let basenamelen = basename.len();
537    let wm_flags = if icase { WM_CASEFOLD } else { 0 };
538    if prefix == patternlen {
539        return patternlen == basenamelen && fspathncmp(pattern, basename, basenamelen, icase);
540    }
541    if (pat_flags & PAT_ENDSWITH) != 0 {
542        if patternlen <= 1 {
543            return false;
544        }
545        let lit_len = patternlen - 1;
546        if lit_len > basenamelen {
547            return false;
548        }
549        return fspathncmp(
550            &pattern[1..patternlen],
551            &basename[basenamelen - lit_len..],
552            lit_len,
553            icase,
554        );
555    }
556    wildmatch(&pattern[..patternlen], basename, wm_flags)
557}
558
559/// Git `match_pathname` (`dir.c`) for attribute patterns.
560#[allow(clippy::too_many_arguments)]
561fn match_pathname_git(
562    pathname: &[u8],
563    pathlen: usize,
564    base: &[u8],
565    baselen: usize,
566    mut pattern: &[u8],
567    mut prefix: usize,
568    mut patternlen: usize,
569    icase: bool,
570) -> bool {
571    let pathname = &pathname[..pathlen.min(pathname.len())];
572
573    if !pattern.is_empty() && pattern[0] == b'/' {
574        pattern = &pattern[1..];
575        patternlen -= 1;
576        prefix = prefix.saturating_sub(1);
577    }
578
579    if pathlen < baselen + 1 {
580        return false;
581    }
582    if baselen > 0 && pathname[baselen] != b'/' {
583        return false;
584    }
585    if !fspathncmp(pathname, base, baselen, icase) {
586        return false;
587    }
588
589    let namelen = if baselen == 0 {
590        pathlen
591    } else {
592        pathlen - baselen - 1
593    };
594    let name = &pathname[pathlen - namelen..];
595
596    if prefix > 0 {
597        if prefix > namelen {
598            return false;
599        }
600        if !fspathncmp(pattern, name, prefix, icase) {
601            return false;
602        }
603        if patternlen == prefix && namelen == prefix {
604            return true;
605        }
606        let advance = prefix - 1;
607        pattern = &pattern[advance..];
608        patternlen -= advance;
609        let name = &name[advance..];
610        let wm_flags = WM_PATHNAME | if icase { WM_CASEFOLD } else { 0 };
611        return wildmatch(&pattern[..patternlen], name, wm_flags);
612    }
613
614    let wm_flags = WM_PATHNAME | if icase { WM_CASEFOLD } else { 0 };
615    wildmatch(&pattern[..patternlen], name, wm_flags)
616}
617
618/// Directory prefix of `rel_path` (no trailing slash), or `""` for a top-level file.
619fn path_dir_prefix(rel_path: &str) -> &str {
620    match rel_path.rfind('/') {
621        Some(i) => &rel_path[..i],
622        None => "",
623    }
624}
625
626/// Whether a rule from `dir/.gitattributes` may apply to `rel_path` (Git `prepare_attr_stack`).
627///
628/// Rules from nested attribute files only affect paths inside that directory tree.
629#[must_use]
630pub fn attr_rule_applies_to_path(attr_base: &str, rel_path: &str, icase: bool) -> bool {
631    if attr_base.is_empty() {
632        return true;
633    }
634    let dir = path_dir_prefix(rel_path);
635    if dir.is_empty() {
636        return false;
637    }
638    let prefix_eq = |d: &str, b: &str| {
639        if icase {
640            d.eq_ignore_ascii_case(b)
641        } else {
642            d == b
643        }
644    };
645    if prefix_eq(dir, attr_base) {
646        return true;
647    }
648    let bl = attr_base.len();
649    if dir.len() > bl && dir.as_bytes()[bl] == b'/' && prefix_eq(&dir[..bl], attr_base) {
650        return true;
651    }
652    false
653}
654
655/// Match one parsed rule against a repo-relative path (Git `path_matches` / `attr.c`).
656#[must_use]
657pub fn attr_rule_matches(rule: &AttrRule, rel_path: &str, icase: bool) -> bool {
658    if !attr_rule_applies_to_path(&rule.attr_base, rel_path, icase) {
659        return false;
660    }
661    let pathname = rel_path.as_bytes();
662    let pathlen = pathname.len();
663    let isdir = pathlen > 0 && pathname[pathlen - 1] == b'/';
664
665    if (rule.pattern_flags & PAT_MUSTBEDIR) != 0 && !isdir {
666        return false;
667    }
668
669    let eff_pathlen = if isdir { pathlen - 1 } else { pathlen };
670    let pathname_trim = &pathname[..eff_pathlen];
671
672    let basename_offset = pathname_trim
673        .iter()
674        .rposition(|&b| b == b'/')
675        .map(|i| i + 1)
676        .unwrap_or(0);
677
678    let pat = rule.pattern.as_bytes();
679    let prefix = rule.nowildcardlen.min(pat.len());
680    let patternlen = pat.len();
681
682    if (rule.pattern_flags & PAT_NODIR) != 0 {
683        let bn = &pathname_trim[basename_offset..];
684        return match_basename_git(bn, pat, prefix, patternlen, rule.pattern_flags, icase);
685    }
686
687    let base = rule.attr_base.as_bytes();
688    match_pathname_git(
689        pathname_trim,
690        eff_pathlen,
691        base,
692        base.len(),
693        pat,
694        prefix,
695        patternlen,
696        icase,
697    )
698}
699
700/// Expand macros and `binary` for one rule's assignments into source-order operations.
701///
702/// These must be applied in order to the same map as later rules (not folded into a local map),
703/// so `!attr` / macro clears remove attributes set by earlier rules on the same path.
704fn expand_rule_attrs_flat(rule: &AttrRule, macros: &MacroTable) -> Vec<(String, AttrValue)> {
705    let mut flat: Vec<(String, AttrValue)> = Vec::new();
706    for (name, val) in &rule.attrs {
707        if name == "binary" {
708            flat.push(("text".into(), AttrValue::Unset));
709            flat.push(("diff".into(), AttrValue::Unset));
710            flat.push(("merge".into(), AttrValue::Unset));
711            flat.push(("binary".into(), AttrValue::Set));
712            continue;
713        }
714        if let Some(exp) = macros.defs.get(name) {
715            flat.push((name.clone(), val.clone()));
716            for (n, v) in exp {
717                flat.push((n.clone(), v.clone()));
718            }
719        } else {
720            flat.push((name.clone(), val.clone()));
721        }
722    }
723    flat
724}
725
726/// Merge assignments: later rules override earlier; within one expanded rule, last wins.
727pub fn collect_attrs_for_path(
728    rules: &[AttrRule],
729    macros: &MacroTable,
730    rel_path: &str,
731    icase: bool,
732) -> HashMap<String, AttrValue> {
733    let mut map: HashMap<String, AttrValue> = HashMap::new();
734    for rule in rules {
735        if rule.skip {
736            continue;
737        }
738        if !attr_rule_matches(rule, rel_path, icase) {
739            continue;
740        }
741        let ops = expand_rule_attrs_flat(rule, macros);
742        for (n, v) in ops {
743            match v {
744                AttrValue::Clear => {
745                    map.remove(&n);
746                }
747                _ => {
748                    map.insert(n, v);
749                }
750            }
751        }
752    }
753    map
754}
755
756/// Quote a path for `check-attr` output (C-style) when needed.
757#[must_use]
758pub fn quote_path_for_check_attr(path: &str) -> String {
759    let needs = path
760        .chars()
761        .any(|c| c.is_control() || c == '"' || c == '\\');
762    if !needs {
763        return path.to_string();
764    }
765    let mut s = String::new();
766    s.push('"');
767    for c in path.chars() {
768        match c {
769            '"' => s.push_str("\\\""),
770            '\\' => s.push_str("\\\\"),
771            _ if c.is_control() => s.push_str(&format!("\\{:o}", c as u32)),
772            _ => s.push(c),
773        }
774    }
775    s.push('"');
776    s
777}
778
779/// Normalize `.` / `..` segments in a repo-relative path string.
780#[must_use]
781pub fn normalize_rel_path(path: &str) -> String {
782    let p = Path::new(path);
783    let mut stack: Vec<String> = Vec::new();
784    for c in p.components() {
785        match c {
786            Component::Normal(s) => stack.push(s.to_string_lossy().into_owned()),
787            Component::ParentDir => {
788                let _ = stack.pop();
789            }
790            Component::CurDir => {}
791            _ => {}
792        }
793    }
794    stack.join("/")
795}
796
797fn lexical_normalize_path(path: PathBuf) -> PathBuf {
798    let mut out = PathBuf::new();
799    for c in path.components() {
800        match c {
801            Component::Prefix(prefix) => out.push(prefix.as_os_str()),
802            Component::RootDir => out.push(c),
803            Component::CurDir => {}
804            Component::ParentDir => {
805                let _ = out.pop();
806            }
807            Component::Normal(_) => out.push(c),
808        }
809    }
810    out
811}
812
813/// Resolve a user path to a repo-relative path (forward slashes).
814///
815/// Uses [`std::fs::canonicalize`] when the target exists; otherwise resolves `..` lexically from the
816/// current directory so paths like `../f` work for missing files (Git `prefix_path`, t0003).
817pub fn path_relative_to_worktree(
818    repo: &Repository,
819    path_str: &str,
820) -> std::result::Result<String, String> {
821    let wt = repo
822        .work_tree
823        .as_ref()
824        .ok_or_else(|| "bare repository — no work tree".to_string())?;
825    let cwd = std::env::current_dir().map_err(|e| e.to_string())?;
826    let p = Path::new(path_str);
827    let combined = if p.is_absolute() {
828        p.to_path_buf()
829    } else {
830        cwd.join(p)
831    };
832
833    let wt_canon = wt.canonicalize().map_err(|e| e.to_string())?;
834
835    if let Ok(abs) = combined.canonicalize() {
836        let rel = abs
837            .strip_prefix(&wt_canon)
838            .map_err(|_| format!("path outside repository: {}", path_str))?;
839        return Ok(normalize_rel_path(
840            rel.to_str().ok_or_else(|| "invalid path".to_string())?,
841        ));
842    }
843
844    let abs_lex = lexical_normalize_path(combined);
845    let rel = abs_lex
846        .strip_prefix(&wt_canon)
847        .map_err(|_| format!("path outside repository: {}", path_str))?;
848    Ok(normalize_rel_path(
849        rel.to_str().ok_or_else(|| "invalid path".to_string())?,
850    ))
851}
852
853fn collect_nested_gitattributes_dirs(work_tree: &Path) -> Vec<PathBuf> {
854    let mut dirs: Vec<PathBuf> = Vec::new();
855    walk_dirs(work_tree, work_tree, &mut dirs);
856    dirs.sort_by(|a, b| {
857        let da = a.components().count();
858        let db = b.components().count();
859        da.cmp(&db).then_with(|| a.cmp(b))
860    });
861    dirs
862}
863
864fn walk_dirs(root: &Path, cur: &Path, dirs: &mut Vec<PathBuf>) {
865    let Ok(rd) = fs::read_dir(cur) else {
866        return;
867    };
868    for e in rd.flatten() {
869        let p = e.path();
870        let ft = e.file_type().ok();
871        if ft.is_some_and(|t| t.is_dir()) {
872            if p.file_name() == Some(OsStr::new(".git")) {
873                continue;
874            }
875            let rel = p.strip_prefix(root).unwrap_or(&p);
876            dirs.push(rel.to_path_buf());
877            walk_dirs(root, &p, dirs);
878        }
879    }
880}
881
882// ── Process-lifetime gitattributes cache ─────────────────────────────
883//
884// `load_gitattributes_stack` re-walks the entire working tree (`read_dir`
885// per directory) and re-parses every `.gitattributes` on each call, and hot
886// paths (grep/diff/add/checkout) call it per file. The parsed stack is
887// memoized for the process lifetime and revalidated with stat stamps on
888// every call:
889//
890// - the global attributes file, root `.gitattributes`, and
891//   `info/attributes` are stamped (mtime + size, or "absent"), recorded
892//   *before* the parse;
893// - the work-tree root directory is mtime-stamped, so creating or deleting
894//   a root-level entry forces a re-walk. Nested `.gitattributes` files are
895//   *not* revalidated per query (see `collect_stack_stamps`); within one
896//   process they behave like C git's process-lifetime attribute cache.
897//
898// Tree-sourced stacks (`attr.tree` / `GIT_ATTR_SOURCE`) are keyed by tree
899// OID and never revalidated: tree objects are content-addressed and
900// immutable.
901//
902// The resolved global-attributes *path* (from `core.attributesFile`) is
903// recorded at parse time and only re-statted afterwards; a mid-process
904// change to that config value is not detected. C git caches the attribute
905// stack per directory for the whole process with no revalidation at all,
906// so serving a stamped copy is strictly more conservative than upstream.
907
908type AttrFileStamp = (PathBuf, Option<(SystemTime, u64)>);
909type AttrDirStamp = (PathBuf, Option<SystemTime>);
910
911struct AttrStackCacheEntry {
912    file_stamps: Vec<AttrFileStamp>,
913    dir_stamps: Vec<AttrDirStamp>,
914    parsed: Arc<ParsedGitAttributes>,
915}
916
917fn attr_stack_cache() -> &'static Mutex<HashMap<(PathBuf, PathBuf), AttrStackCacheEntry>> {
918    static CACHE: OnceLock<Mutex<HashMap<(PathBuf, PathBuf), AttrStackCacheEntry>>> =
919        OnceLock::new();
920    CACHE.get_or_init(|| Mutex::new(HashMap::new()))
921}
922
923fn attr_bare_cache() -> &'static Mutex<HashMap<PathBuf, AttrStackCacheEntry>> {
924    static CACHE: OnceLock<Mutex<HashMap<PathBuf, AttrStackCacheEntry>>> = OnceLock::new();
925    CACHE.get_or_init(|| Mutex::new(HashMap::new()))
926}
927
928fn attr_tree_cache() -> &'static Mutex<HashMap<ObjectId, Arc<ParsedGitAttributes>>> {
929    static CACHE: OnceLock<Mutex<HashMap<ObjectId, Arc<ParsedGitAttributes>>>> = OnceLock::new();
930    CACHE.get_or_init(|| Mutex::new(HashMap::new()))
931}
932
933/// `symlink_metadata`-based stamp, matching `read_gitattributes_maybe_symlink`
934/// (symlinked `.gitattributes` files are skipped by the parser, but stamping
935/// the link still detects replacement by a regular file).
936fn attr_file_stamp(path: &Path) -> Option<(SystemTime, u64)> {
937    fs::symlink_metadata(path)
938        .ok()
939        .and_then(|m| Some((m.modified().ok()?, m.len())))
940}
941
942fn attr_dir_stamp(path: &Path) -> Option<SystemTime> {
943    fs::symlink_metadata(path).ok().and_then(|m| m.modified().ok())
944}
945
946fn attr_stamps_valid(entry: &AttrStackCacheEntry) -> bool {
947    entry
948        .file_stamps
949        .iter()
950        .all(|(path, stamp)| attr_file_stamp(path) == *stamp)
951        && entry
952            .dir_stamps
953            .iter()
954            .all(|(path, stamp)| attr_dir_stamp(path) == *stamp)
955}
956
957/// Stamp the cheap top-level inputs of the stack: the global attributes
958/// file, root `.gitattributes`, `info/attributes`, and the work-tree root
959/// directory's mtime (~4 stats per validation).
960///
961/// Nested per-directory `.gitattributes` files are deliberately *not*
962/// stamped: revalidating them costs two stats per walked directory, which
963/// dominates per-file hot loops on large trees. Within one process a change
964/// to an already-loaded nested file is therefore served stale — matching
965/// C git, which caches attribute stacks for the whole process with no
966/// revalidation at all. The checkout/apply/merge materialization paths are
967/// unaffected: they read attributes through
968/// `crlf::load_gitattributes_for_checkout` (index/odb-sourced), not this
969/// work-tree stack. Creating or deleting entries in the work-tree *root*
970/// still bumps its stamped mtime and forces a fresh walk.
971fn collect_stack_stamps(
972    repo: &Repository,
973    work_tree: &Path,
974) -> std::result::Result<(Vec<AttrFileStamp>, Vec<AttrDirStamp>), crate::error::Error> {
975    let mut file_stamps = Vec::new();
976    if let Some(g) = global_attributes_path(repo)? {
977        let stamp = attr_file_stamp(&g);
978        file_stamps.push((g, stamp));
979    }
980    let root_ga = work_tree.join(".gitattributes");
981    let stamp = attr_file_stamp(&root_ga);
982    file_stamps.push((root_ga, stamp));
983    let info = repo.git_dir.join("info/attributes");
984    let stamp = attr_file_stamp(&info);
985    file_stamps.push((info, stamp));
986    let dir_stamps = vec![(work_tree.to_path_buf(), attr_dir_stamp(work_tree))];
987    Ok((file_stamps, dir_stamps))
988}
989
990/// Load the full stack of attribute rules for a normal repository (working tree).
991///
992/// Results are memoized for the process lifetime and revalidated against
993/// stat stamps on every call (see the cache notes above).
994pub fn load_gitattributes_stack(
995    repo: &Repository,
996    work_tree: &Path,
997) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
998    let key = (repo.git_dir.clone(), work_tree.to_path_buf());
999    {
1000        let cache = attr_stack_cache()
1001            .lock()
1002            .unwrap_or_else(std::sync::PoisonError::into_inner);
1003        if let Some(entry) = cache.get(&key) {
1004            if attr_stamps_valid(entry) {
1005                return Ok((*entry.parsed).clone());
1006            }
1007        }
1008    }
1009    let (file_stamps, dir_stamps) = collect_stack_stamps(repo, work_tree)?;
1010    let parsed = load_gitattributes_stack_uncached(repo, work_tree)?;
1011    let mut cache = attr_stack_cache()
1012        .lock()
1013        .unwrap_or_else(std::sync::PoisonError::into_inner);
1014    cache.insert(
1015        key,
1016        AttrStackCacheEntry {
1017            file_stamps,
1018            dir_stamps,
1019            parsed: Arc::new(parsed.clone()),
1020        },
1021    );
1022    Ok(parsed)
1023}
1024
1025fn load_gitattributes_stack_uncached(
1026    repo: &Repository,
1027    work_tree: &Path,
1028) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
1029    let mut merged = ParsedGitAttributes::default();
1030
1031    if let Some(g) = global_attributes_path(repo)? {
1032        if g.exists() {
1033            if let Ok(content) = fs::read_to_string(&g) {
1034                if content.len() <= MAX_ATTR_FILE_BYTES {
1035                    let mut p =
1036                        parse_gitattributes_file_content(&content, g.to_string_lossy().as_ref());
1037                    merged.rules.append(&mut p.rules);
1038                    merged.macros.defs.extend(p.macros.defs.drain());
1039                    merged.warnings.append(&mut p.warnings);
1040                } else {
1041                    merged.warnings.push(format!(
1042                        "warning: ignoring overly large gitattributes file '{}'",
1043                        g.display()
1044                    ));
1045                }
1046            }
1047        }
1048    }
1049
1050    let root_ga = work_tree.join(".gitattributes");
1051    if let Some(content) =
1052        read_gitattributes_maybe_symlink(&root_ga, ".gitattributes", &mut merged.warnings)
1053    {
1054        if content.len() <= MAX_ATTR_FILE_BYTES {
1055            let mut p = parse_gitattributes_file_content(&content, ".gitattributes");
1056            merged.rules.append(&mut p.rules);
1057            merged.macros.defs.extend(p.macros.defs.drain());
1058            merged.warnings.append(&mut p.warnings);
1059        } else {
1060            merged.warnings.push(
1061                "warning: ignoring overly large gitattributes file '.gitattributes'".to_string(),
1062            );
1063        }
1064    }
1065
1066    for rel in collect_nested_gitattributes_dirs(work_tree) {
1067        let ga = work_tree.join(&rel).join(".gitattributes");
1068        if let Some(content) = read_gitattributes_maybe_symlink(
1069            &ga,
1070            &format!("{}/.gitattributes", rel.display()),
1071            &mut merged.warnings,
1072        ) {
1073            if content.len() > MAX_ATTR_FILE_BYTES {
1074                merged.warnings.push(format!(
1075                    "warning: ignoring overly large gitattributes file '{}'",
1076                    ga.display()
1077                ));
1078                continue;
1079            }
1080            let prefix = rel.to_string_lossy().replace('\\', "/");
1081            let mut p = parse_gitattributes_file_content_with_base(
1082                &content,
1083                &ga.to_string_lossy(),
1084                &prefix,
1085            );
1086            merged.rules.append(&mut p.rules);
1087            merged.macros.defs.extend(p.macros.defs.drain());
1088            merged.warnings.append(&mut p.warnings);
1089        }
1090    }
1091
1092    let info = repo.git_dir.join("info/attributes");
1093    if info.exists() {
1094        if let Ok(content) = fs::read_to_string(&info) {
1095            if content.len() <= MAX_ATTR_FILE_BYTES {
1096                let mut p = parse_gitattributes_file_content(&content, "info/attributes");
1097                merged.rules.append(&mut p.rules);
1098                merged.macros.defs.extend(p.macros.defs.drain());
1099                merged.warnings.append(&mut p.warnings);
1100            }
1101        }
1102    }
1103
1104    Ok(merged)
1105}
1106
1107/// Bare repository: only `info/attributes` from disk (no in-repo `.gitattributes` file).
1108///
1109/// Memoized like [`load_gitattributes_stack`], keyed by `git_dir`.
1110pub fn load_gitattributes_bare(
1111    repo: &Repository,
1112) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
1113    let key = repo.git_dir.clone();
1114    {
1115        let cache = attr_bare_cache()
1116            .lock()
1117            .unwrap_or_else(std::sync::PoisonError::into_inner);
1118        if let Some(entry) = cache.get(&key) {
1119            if attr_stamps_valid(entry) {
1120                return Ok((*entry.parsed).clone());
1121            }
1122        }
1123    }
1124    let mut file_stamps = Vec::new();
1125    if let Some(g) = global_attributes_path(repo)? {
1126        let stamp = attr_file_stamp(&g);
1127        file_stamps.push((g, stamp));
1128    }
1129    let info = repo.git_dir.join("info/attributes");
1130    let stamp = attr_file_stamp(&info);
1131    file_stamps.push((info, stamp));
1132    let parsed = load_gitattributes_bare_uncached(repo)?;
1133    let mut cache = attr_bare_cache()
1134        .lock()
1135        .unwrap_or_else(std::sync::PoisonError::into_inner);
1136    cache.insert(
1137        key,
1138        AttrStackCacheEntry {
1139            file_stamps,
1140            dir_stamps: Vec::new(),
1141            parsed: Arc::new(parsed.clone()),
1142        },
1143    );
1144    Ok(parsed)
1145}
1146
1147fn load_gitattributes_bare_uncached(
1148    repo: &Repository,
1149) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
1150    let mut merged = ParsedGitAttributes::default();
1151    if let Some(g) = global_attributes_path(repo)? {
1152        if g.exists() {
1153            if let Ok(content) = fs::read_to_string(&g) {
1154                if content.len() <= MAX_ATTR_FILE_BYTES {
1155                    let mut p =
1156                        parse_gitattributes_file_content(&content, g.to_string_lossy().as_ref());
1157                    merged.rules.append(&mut p.rules);
1158                    merged.macros.defs.extend(p.macros.defs.drain());
1159                    merged.warnings.append(&mut p.warnings);
1160                }
1161            }
1162        }
1163    }
1164    let info = repo.git_dir.join("info/attributes");
1165    if info.exists() {
1166        if let Ok(content) = fs::read_to_string(&info) {
1167            if content.len() <= MAX_ATTR_FILE_BYTES {
1168                let mut p = parse_gitattributes_file_content(&content, "info/attributes");
1169                merged.rules.append(&mut p.rules);
1170                merged.macros.defs.extend(p.macros.defs.drain());
1171                merged.warnings.append(&mut p.warnings);
1172            }
1173        }
1174    }
1175    // Without a work tree, Git reads tracked `.gitattributes` from the index (Git
1176    // `read_attr_from_index`), so e.g. `git -C .git diff-tree --check` still honours a
1177    // committed `* -whitespace` attribute. Prepend index rules so work-tree-equivalent
1178    // ordering (closer paths win) is preserved relative to info/global.
1179    if let Ok(index) = Index::load(&repo.git_dir.join("index")) {
1180        if let Ok(mut from_index) = load_gitattributes_from_index(&index, &repo.odb, &repo.git_dir)
1181        {
1182            // info/global attributes are lower priority than per-tree `.gitattributes`,
1183            // so place the index rules ahead of what we have collected so far.
1184            from_index.rules.append(&mut merged.rules);
1185            merged.rules = from_index.rules;
1186            for (k, v) in from_index.macros.defs.drain() {
1187                merged.macros.defs.entry(k).or_insert(v);
1188            }
1189            merged.warnings.append(&mut from_index.warnings);
1190        }
1191    }
1192    Ok(merged)
1193}
1194
1195/// Read `.gitattributes` blob from a tree object at `tree_oid`, recursively.
1196pub fn load_gitattributes_from_tree(
1197    odb: &Odb,
1198    tree_oid: &ObjectId,
1199) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
1200    // Tree objects are content-addressed and immutable: no revalidation.
1201    {
1202        let cache = attr_tree_cache()
1203            .lock()
1204            .unwrap_or_else(std::sync::PoisonError::into_inner);
1205        if let Some(parsed) = cache.get(tree_oid) {
1206            return Ok((**parsed).clone());
1207        }
1208    }
1209    let mut merged = ParsedGitAttributes::default();
1210    walk_tree_attrs(odb, tree_oid, "", &mut merged)?;
1211    let mut cache = attr_tree_cache()
1212        .lock()
1213        .unwrap_or_else(std::sync::PoisonError::into_inner);
1214    cache.insert(*tree_oid, Arc::new(merged.clone()));
1215    Ok(merged)
1216}
1217
1218fn walk_tree_attrs(
1219    odb: &Odb,
1220    tree_oid: &ObjectId,
1221    prefix: &str,
1222    merged: &mut ParsedGitAttributes,
1223) -> std::result::Result<(), crate::error::Error> {
1224    let obj = odb.read(tree_oid)?;
1225    if obj.kind != ObjectKind::Tree {
1226        return Ok(());
1227    }
1228    let entries = parse_tree(&obj.data)?;
1229    for e in entries {
1230        let name = String::from_utf8_lossy(&e.name).to_string();
1231        let path = if prefix.is_empty() {
1232            name.clone()
1233        } else {
1234            format!("{prefix}/{name}")
1235        };
1236        match e.mode {
1237            0o040000 => {
1238                walk_tree_attrs(odb, &e.oid, &path, merged)?;
1239            }
1240            0o100644 | 0o100755 | 0o120000 if name == ".gitattributes" => {
1241                let oid = e.oid;
1242                {
1243                    let blob = odb.read(&oid)?;
1244                    if blob.kind != ObjectKind::Blob {
1245                        continue;
1246                    }
1247                    if blob.data.len() > MAX_ATTR_FILE_BYTES {
1248                        merged.warnings.push(
1249                            "warning: ignoring overly large gitattributes blob '.gitattributes'"
1250                                .to_string(),
1251                        );
1252                        continue;
1253                    }
1254                    let content = String::from_utf8_lossy(&blob.data).into_owned();
1255                    let display = format!("{path} (tree)");
1256                    let attr_base = Path::new(&path)
1257                        .parent()
1258                        .map(|p| p.to_string_lossy().replace('\\', "/"))
1259                        .unwrap_or_default();
1260                    let mut p =
1261                        parse_gitattributes_content_impl(&content, &display, true, &attr_base);
1262                    merged.rules.append(&mut p.rules);
1263                    merged.macros.defs.extend(p.macros.defs.drain());
1264                    merged.warnings.append(&mut p.warnings);
1265                }
1266            }
1267            _ => {}
1268        }
1269    }
1270    Ok(())
1271}
1272
1273/// Load merged `.gitattributes` rules for diff and merge (respects `GIT_ATTR_SOURCE` / `attr.tree`).
1274///
1275/// Resolution order matches Git's attribute source for diff: optional tree from
1276/// [`resolve_attr_treeish`], then work tree stack (or bare `info/attributes` only).
1277///
1278/// # Errors
1279///
1280/// Returns an error when a tree-ish source is set from the environment or command line and cannot
1281/// be resolved (Git: *"bad --attr-source or GIT_ATTR_SOURCE"*).
1282pub fn load_gitattributes_for_diff(
1283    repo: &Repository,
1284) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
1285    let (treeish, ignore_bad_tree) = resolve_attr_treeish(repo, None)?;
1286    if let Some(spec) = treeish.filter(|s| !s.is_empty()) {
1287        match resolve_tree_oid(repo, &spec) {
1288            Ok(oid) => return load_gitattributes_from_tree(&repo.odb, &oid),
1289            Err(_) if ignore_bad_tree => {}
1290            Err(_) => {
1291                return Err(crate::error::Error::InvalidRef(format!(
1292                    "bad --attr-source or GIT_ATTR_SOURCE: {spec}"
1293                )));
1294            }
1295        }
1296    }
1297    if let Some(wt) = repo.work_tree.as_deref() {
1298        return load_gitattributes_stack(repo, wt);
1299    }
1300    load_gitattributes_bare(repo)
1301}
1302
1303/// Resolve `attr.tree`, `GIT_ATTR_SOURCE`, `--source` precedence for check-attr.
1304///
1305/// The second return value is `ignore_bad_resolution`: when true (only for `attr.tree` from
1306/// config), an unresolvable tree-ish falls back to reading `.gitattributes` from the work tree
1307/// or index instead of erroring (matches Git `compute_default_attr_source`).
1308pub fn resolve_attr_treeish(
1309    repo: &Repository,
1310    source_arg: Option<&str>,
1311) -> std::result::Result<(Option<String>, bool), crate::error::Error> {
1312    let env_src = std::env::var("GIT_ATTR_SOURCE")
1313        .ok()
1314        .filter(|s| !s.is_empty());
1315    let config = ConfigSet::load(Some(&repo.git_dir), true)?;
1316    let cfg_tree = config.get("attr.tree");
1317    if let Some(s) = source_arg.map(|s| s.to_string()) {
1318        return Ok((Some(s), false));
1319    }
1320    if let Some(s) = env_src {
1321        return Ok((Some(s), false));
1322    }
1323    if let Some(s) = cfg_tree {
1324        return Ok((Some(s), true));
1325    }
1326    Ok((None, false))
1327}
1328
1329/// Parse a revision to a tree OID for attribute loading.
1330pub fn resolve_tree_oid(repo: &Repository, spec: &str) -> std::result::Result<ObjectId, String> {
1331    let oid = resolve_revision(repo, spec).map_err(|e| e.to_string())?;
1332    let obj = repo.read_replaced(&oid).map_err(|e| e.to_string())?;
1333    match obj.kind {
1334        ObjectKind::Commit => {
1335            let c = crate::objects::parse_commit(&obj.data).map_err(|e| e.to_string())?;
1336            Ok(c.tree)
1337        }
1338        ObjectKind::Tree => Ok(oid),
1339        _ => Err("revision is not a commit or tree".to_string()),
1340    }
1341}
1342
1343/// Load attributes from the index (stage 0) for `.gitattributes` paths only.
1344pub fn load_gitattributes_from_index(
1345    index: &Index,
1346    odb: &Odb,
1347    work_tree: &Path,
1348) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
1349    let mut merged = ParsedGitAttributes::default();
1350    let mut paths: Vec<Vec<u8>> = index
1351        .entries
1352        .iter()
1353        .filter(|e| e.stage() == 0 && e.path.ends_with(b".gitattributes"))
1354        .map(|e| e.path.clone())
1355        .collect();
1356    paths.sort();
1357    for path_bytes in paths {
1358        let Ok(rel) = std::str::from_utf8(&path_bytes) else {
1359            continue;
1360        };
1361        let Some(entry) = index.get(&path_bytes, 0) else {
1362            continue;
1363        };
1364        let obj = odb.read(&entry.oid)?;
1365        if obj.data.len() > MAX_ATTR_FILE_BYTES {
1366            merged.warnings.push(format!(
1367                "warning: ignoring overly large gitattributes blob '{}'",
1368                rel
1369            ));
1370            continue;
1371        }
1372        let content = String::from_utf8_lossy(&obj.data);
1373        let attr_base = Path::new(rel)
1374            .parent()
1375            .map(|p| p.to_string_lossy().replace('\\', "/"))
1376            .unwrap_or_default();
1377        let mut p = parse_gitattributes_content_impl(&content, rel, true, &attr_base);
1378        merged.rules.append(&mut p.rules);
1379        merged.macros.defs.extend(p.macros.defs.drain());
1380        merged.warnings.append(&mut p.warnings);
1381    }
1382    let _ = work_tree;
1383    Ok(merged)
1384}
1385
1386/// Return `builtin_objectmode` value for a path (working tree), or `None` if unavailable.
1387///
1388/// Submodule checkout directories (`.git` is a file containing `gitdir:`) report `160000`
1389/// like Git, not `040000`.
1390#[must_use]
1391pub fn builtin_objectmode_worktree(repo: &Repository, rel_path: &str) -> Option<String> {
1392    let wt = repo.work_tree.as_ref()?;
1393    let p = wt.join(rel_path);
1394    let meta = fs::symlink_metadata(&p).ok()?;
1395    let ft = meta.file_type();
1396    if ft.is_symlink() {
1397        return Some("120000".to_string());
1398    }
1399    if ft.is_dir() {
1400        let git = p.join(".git");
1401        if let Ok(git_meta) = fs::symlink_metadata(&git) {
1402            if !git_meta.file_type().is_dir() {
1403                if let Ok(content) = fs::read_to_string(&git) {
1404                    if content.starts_with("gitdir:") {
1405                        return Some("160000".to_string());
1406                    }
1407                }
1408            }
1409        }
1410        return Some("040000".to_string());
1411    }
1412    #[cfg(unix)]
1413    {
1414        use std::os::unix::fs::MetadataExt;
1415        let m = normalize_mode(meta.mode());
1416        Some(format!("{:06o}", m))
1417    }
1418    #[cfg(not(unix))]
1419    {
1420        let _ = repo;
1421        None
1422    }
1423}
1424
1425/// `builtin_objectmode` from the index when `--cached` is used.
1426#[must_use]
1427pub fn builtin_objectmode_index(index: &Index, rel_path: &str) -> Option<String> {
1428    let key = rel_path.as_bytes();
1429    let e = index.get(key, 0)?;
1430    let m = e.mode;
1431    if m == MODE_SYMLINK {
1432        return Some("120000".to_string());
1433    }
1434    if m == MODE_GITLINK {
1435        return Some("160000".to_string());
1436    }
1437    if m == MODE_TREE {
1438        return Some("040000".to_string());
1439    }
1440    if m == MODE_EXECUTABLE {
1441        return Some("100755".to_string());
1442    }
1443    if m == MODE_REGULAR {
1444        return Some("100644".to_string());
1445    }
1446    Some(format!("{:06o}", m))
1447}
1448
1449#[cfg(test)]
1450mod tests {
1451    use super::*;
1452
1453    #[test]
1454    fn d_yes_rule_clears_test_after_d_star() {
1455        let mut merged = ParsedGitAttributes::default();
1456        let root = parse_gitattributes_file_content("[attr]notest !test\n", ".gitattributes");
1457        merged.macros.defs.extend(root.macros.defs);
1458        let mut ab = parse_gitattributes_file_content_with_base(
1459            "h test=a/b/h\nd/* test=a/b/d/*\nd/yes notest\n",
1460            "a/b/.gitattributes",
1461            "a/b",
1462        );
1463        assert_eq!(ab.rules.len(), 3);
1464        merged.rules.append(&mut ab.rules);
1465        merged.macros.defs.extend(ab.macros.defs);
1466        let d_yes = merged
1467            .rules
1468            .iter()
1469            .find(|r| r.pattern == "d/yes")
1470            .expect("d/yes rule");
1471        assert!(attr_rule_matches(d_yes, "a/b/d/yes", false));
1472        let m = collect_attrs_for_path(&merged.rules, &merged.macros, "a/b/d/yes", false);
1473        assert!(
1474            m.get("test").is_none(),
1475            "expected test cleared by notest macro, got {:?}",
1476            m.get("test")
1477        );
1478    }
1479}
1480
1481#[cfg(test)]
1482mod attr_cache_tests {
1483    use super::*;
1484    use filetime::FileTime;
1485
1486    fn test_repo(td: &Path) -> Repository {
1487        crate::repo::init_repository(td, false, "main", None, "files").expect("init repo")
1488    }
1489
1490    fn rules_for(repo: &Repository, wt: &Path) -> Vec<String> {
1491        let parsed = load_gitattributes_stack(repo, wt).expect("load stack");
1492        parsed.rules.iter().map(|r| r.pattern.clone()).collect()
1493    }
1494
1495    fn mtime_of(path: &Path) -> FileTime {
1496        FileTime::from_last_modification_time(&fs::symlink_metadata(path).expect("stat"))
1497    }
1498
1499    fn restore_mtime(path: &Path, stamp: FileTime) {
1500        filetime::set_file_mtime(path, stamp).expect("restore mtime");
1501    }
1502
1503    #[test]
1504    fn stack_cache_serves_same_stamp_and_invalidates_on_change() {
1505        let td = tempfile::tempdir().expect("tempdir");
1506        let wt = td.path();
1507        let repo = test_repo(wt);
1508        let ga = wt.join(".gitattributes");
1509        fs::write(&ga, "*.aaa text\n").expect("write v1");
1510        let wt_t0 = mtime_of(wt);
1511        restore_mtime(wt, wt_t0);
1512        let t0 = mtime_of(&ga);
1513        assert_eq!(rules_for(&repo, wt), vec!["*.aaa".to_string()]);
1514
1515        // Same size + restored mtime (file and work-tree dir): stat cannot
1516        // tell the difference, so the cached parse is served. This is the
1517        // assertion that proves the cache is actually used.
1518        fs::write(&ga, "*.bbb text\n").expect("write v2");
1519        restore_mtime(&ga, t0);
1520        restore_mtime(wt, wt_t0);
1521        assert_eq!(rules_for(&repo, wt), vec!["*.aaa".to_string()]);
1522
1523        // A size change invalidates even with restored mtimes.
1524        fs::write(&ga, "*.ccc-longer text\n").expect("write v3");
1525        restore_mtime(&ga, t0);
1526        restore_mtime(wt, wt_t0);
1527        assert_eq!(rules_for(&repo, wt), vec!["*.ccc-longer".to_string()]);
1528    }
1529
1530    #[test]
1531    fn new_nested_gitattributes_is_detected() {
1532        let td = tempfile::tempdir().expect("tempdir");
1533        let wt = td.path();
1534        let repo = test_repo(wt);
1535        fs::write(wt.join(".gitattributes"), "root-rule text\n").expect("write root");
1536        // Pre-age the work-tree mtime so the upcoming mkdir visibly bumps it
1537        // even on filesystems with coarse mtime ticks.
1538        restore_mtime(wt, FileTime::from_unix_time(1_000_000_000, 0));
1539        assert_eq!(rules_for(&repo, wt), vec!["root-rule".to_string()]);
1540
1541        // Creating a subdirectory bumps the stamped work-tree mtime, forcing
1542        // a re-walk that discovers the new nested file.
1543        fs::create_dir(wt.join("sub")).expect("mkdir");
1544        fs::write(wt.join("sub/.gitattributes"), "nested-rule text\n").expect("write nested");
1545        assert_eq!(
1546            rules_for(&repo, wt),
1547            vec!["root-rule".to_string(), "nested-rule".to_string()]
1548        );
1549    }
1550
1551    #[test]
1552    fn modified_nested_gitattributes_follows_c_git_process_semantics() {
1553        let td = tempfile::tempdir().expect("tempdir");
1554        let wt = td.path();
1555        let repo = test_repo(wt);
1556        fs::create_dir(wt.join("sub")).expect("mkdir");
1557        let nested = wt.join("sub/.gitattributes");
1558        fs::write(&nested, "one text\n").expect("write v1");
1559        // Pre-age the work-tree mtime so the later root-level mkdir visibly
1560        // bumps it even on filesystems with coarse mtime ticks.
1561        restore_mtime(wt, FileTime::from_unix_time(1_000_000_000, 0));
1562        assert_eq!(rules_for(&repo, wt), vec!["one".to_string()]);
1563
1564        // Nested files are not revalidated per query: within one process a
1565        // content edit is served from cache, matching C git's
1566        // process-lifetime attribute caching.
1567        fs::write(&nested, "two-longer text\n").expect("write v2");
1568        assert_eq!(rules_for(&repo, wt), vec!["one".to_string()]);
1569
1570        // Any root-level signal (here: a new top-level directory) bumps the
1571        // stamped work-tree mtime and the fresh walk picks up the edit.
1572        fs::create_dir(wt.join("poke")).expect("mkdir poke");
1573        assert_eq!(rules_for(&repo, wt), vec!["two-longer".to_string()]);
1574    }
1575
1576    #[test]
1577    fn info_attributes_is_stamped() {
1578        let td = tempfile::tempdir().expect("tempdir");
1579        let wt = td.path();
1580        let repo = test_repo(wt);
1581        assert!(rules_for(&repo, wt).is_empty());
1582
1583        // info/attributes appearing after a cached empty load must be seen.
1584        fs::write(repo.git_dir.join("info/attributes"), "from-info text\n")
1585            .expect("write info");
1586        assert_eq!(rules_for(&repo, wt), vec!["from-info".to_string()]);
1587    }
1588}