Skip to main content

grit_lib/
attributes.rs

1//! Gitattributes parsing and pattern matching for `check-attr` and validation.
2//!
3//! Implements Git-consistent rule ordering, macro expansion (`[attr]`), `binary`
4//! expansion, `**` globbing via [`crate::wildmatch`], and optional case folding
5//! for `core.ignorecase`.
6
7use crate::config::parse_path;
8use crate::config::ConfigSet;
9use crate::index::normalize_mode;
10use crate::index::Index;
11use crate::index::MODE_EXECUTABLE;
12use crate::index::MODE_GITLINK;
13use crate::index::MODE_REGULAR;
14use crate::index::MODE_SYMLINK;
15use crate::index::MODE_TREE;
16use crate::objects::parse_tree;
17use crate::objects::ObjectId;
18use crate::objects::ObjectKind;
19use crate::odb::Odb;
20use crate::repo::Repository;
21use crate::rev_parse::resolve_revision;
22use crate::wildmatch::{wildmatch, WM_CASEFOLD, WM_PATHNAME};
23use std::borrow::Cow;
24use std::collections::HashMap;
25use std::ffi::OsStr;
26use std::fs;
27use std::path::{Component, Path, PathBuf};
28
29/// Maximum length of a single `.gitattributes` line (bytes), matching Git (`ATTR_MAX_LINE_LENGTH`).
30/// Lines of this length or longer are ignored with a warning.
31pub const MAX_ATTR_LINE_BYTES: usize = 2048;
32
33/// Maximum `.gitattributes` file size (bytes) before Git ignores the file.
34pub const MAX_ATTR_FILE_BYTES: usize = 100 * 1024 * 1024;
35
36/// Parsed attribute value for display (`check-attr` output).
37#[derive(Debug, Clone, PartialEq, Eq)]
38pub enum AttrValue {
39    Set,
40    /// Explicit `-attr` in a rule — `check-attr` prints `unset`.
41    Unset,
42    /// Macro body `!attr` — clears the attribute to *unspecified* (not `unset`).
43    Clear,
44    Value(String),
45}
46
47impl AttrValue {
48    /// Text form as printed by `git check-attr`.
49    #[must_use]
50    pub fn display(&self) -> &str {
51        match self {
52            AttrValue::Set => "set",
53            AttrValue::Unset => "unset",
54            AttrValue::Clear => "unspecified",
55            AttrValue::Value(v) => v.as_str(),
56        }
57    }
58}
59
60/// Pattern flags after Git `parse_path_pattern` (`dir.c`).
61const PAT_NODIR: u32 = 1;
62const PAT_MUSTBEDIR: u32 = 2;
63const PAT_ENDSWITH: u32 = 4;
64
65#[inline]
66fn is_glob_special_attr(c: u8) -> bool {
67    matches!(c, b'*' | b'?' | b'[' | b'\\')
68}
69
70/// Length of initial literal segment before the first glob special (Git `simple_length`).
71fn simple_length_pat(s: &str) -> usize {
72    let b = s.as_bytes();
73    let mut i = 0;
74    while i < b.len() {
75        if is_glob_special_attr(b[i]) {
76            return i;
77        }
78        i += 1;
79    }
80    i
81}
82
83/// Parse pattern text like Git `parse_path_pattern` (after `!` and unquoting are handled).
84fn parse_attr_pattern_fields(pat: &str) -> (String, u32, usize) {
85    let mut flags = 0u32;
86    let mut len = pat.len();
87    if len > 0 && pat.as_bytes()[len - 1] == b'/' {
88        len -= 1;
89        flags |= PAT_MUSTBEDIR;
90    }
91    let p = &pat[..len];
92    let has_slash = p.as_bytes().contains(&b'/');
93    if !has_slash {
94        flags |= PAT_NODIR;
95    }
96    if let Some(rest) = p.strip_prefix('*') {
97        if !rest.is_empty() && simple_length_pat(rest) == rest.len() {
98            flags |= PAT_ENDSWITH;
99        }
100    }
101    let mut nowild = simple_length_pat(p);
102    if nowild > len {
103        nowild = len;
104    }
105    (p.to_string(), flags, nowild)
106}
107
108/// One line in a gitattributes file.
109#[derive(Debug, Clone)]
110pub struct AttrRule {
111    /// Directory of the `.gitattributes` file that defined this rule (repo-relative, `/`,
112    /// no trailing slash). Empty for the repository root file.
113    pub attr_base: String,
114    /// Pattern body (no leading `!`; trailing `/` stripped; same as Git after `parse_path_pattern` prep).
115    pub pattern: String,
116    /// From `parse_path_pattern`: basename-only match vs full path under `attr_base`.
117    pub pattern_flags: u32,
118    /// Length of leading literal segment before first wildcard (Git `nowildcardlen`).
119    pub nowildcardlen: usize,
120    /// If true, this rule was discarded (negative pattern) after emitting a warning.
121    pub skip: bool,
122    /// 1-based line number in the source file.
123    pub line: usize,
124    /// Attribute assignments in source order (last wins for duplicates on this line).
125    pub attrs: Vec<(String, AttrValue)>,
126}
127
128/// Macro definitions from `[attr]name ...` lines.
129#[derive(Debug, Clone, Default)]
130pub struct MacroTable {
131    /// Maps macro name → list of assignments (e.g. `!test` → unset test).
132    pub defs: HashMap<String, Vec<(String, AttrValue)>>,
133}
134
135/// Result of parsing a gitattributes file.
136#[derive(Debug, Default)]
137pub struct ParsedGitAttributes {
138    pub rules: Vec<AttrRule>,
139    pub macros: MacroTable,
140    pub warnings: Vec<String>,
141}
142
143/// Returns true if `name` is reserved (`builtin_*` except the real builtin names Git allows).
144#[must_use]
145pub fn is_reserved_builtin_name(name: &str) -> bool {
146    let Some(rest) = name.strip_prefix("builtin_") else {
147        return false;
148    };
149    matches!(rest, "objectmode")
150}
151
152/// Validate user-defined attribute names in parsed rules (for `git add`).
153///
154/// Returns an error string matching Git when a rule uses an invalid `builtin_*` name.
155pub fn validate_rules_for_add(
156    rules: &[AttrRule],
157    display_path: &str,
158) -> std::result::Result<(), String> {
159    for rule in rules {
160        if rule.skip {
161            continue;
162        }
163        for (name, _) in &rule.attrs {
164            if name.starts_with("builtin_") && !is_reserved_builtin_name(name) {
165                return Err(format!(
166                    "{name} is not a valid attribute name: {display_path}:{}",
167                    rule.line
168                ));
169            }
170        }
171    }
172    Ok(())
173}
174
175/// Collect warnings for invalid `builtin_*` assignments (check-attr continues).
176pub fn builtin_warnings_for_rules(rules: &[AttrRule], display_path: &str) -> Vec<String> {
177    let mut w = Vec::new();
178    for rule in rules {
179        if rule.skip {
180            continue;
181        }
182        for (name, _) in &rule.attrs {
183            if name == "builtin_objectmode" {
184                w.push(format!(
185                    "builtin_objectmode is not a valid attribute name: {display_path}:{}",
186                    rule.line
187                ));
188            } else if name.starts_with("builtin_") && !is_reserved_builtin_name(name) {
189                w.push(format!(
190                    "{name} is not a valid attribute name: {display_path}:{}",
191                    rule.line
192                ));
193            }
194        }
195    }
196    w
197}
198
199fn default_global_attributes_path() -> Option<PathBuf> {
200    let home = std::env::var("HOME").ok()?;
201    if let Ok(xdg) = std::env::var("XDG_CONFIG_HOME") {
202        if !xdg.is_empty() {
203            return Some(PathBuf::from(xdg).join("git/attributes"));
204        }
205    }
206    Some(PathBuf::from(home).join(".config/git/attributes"))
207}
208
209fn global_attributes_path(
210    repo: &Repository,
211) -> std::result::Result<Option<PathBuf>, crate::error::Error> {
212    let config = ConfigSet::load(Some(&repo.git_dir), true)?;
213    if let Some(path) = config.get("core.attributesfile") {
214        return Ok(Some(PathBuf::from(parse_path(&path))));
215    }
216    Ok(default_global_attributes_path())
217}
218
219/// Read a `.gitattributes` path; if it is a symlink, record an error and skip (in-tree rules).
220fn read_gitattributes_maybe_symlink(
221    path: &Path,
222    display: &str,
223    warnings: &mut Vec<String>,
224) -> Option<String> {
225    let meta = fs::symlink_metadata(path).ok()?;
226    if meta.file_type().is_symlink() {
227        warnings.push(format!(
228            "unable to access '{display}': Too many levels of symbolic links"
229        ));
230        return None;
231    }
232    fs::read_to_string(path).ok()
233}
234
235/// Parse one gitattributes file from disk (patterns are relative to `attr_base`, the directory
236/// containing the file — use `""` for the repository root file).
237pub fn parse_gitattributes_file_content(content: &str, display_path: &str) -> ParsedGitAttributes {
238    parse_gitattributes_content_impl(content, display_path, false, "")
239}
240
241/// Parse attributes defined in a `.gitattributes` file located in `attr_base` (repo-relative,
242/// `/` separators, no trailing slash; empty string for the repository root).
243pub fn parse_gitattributes_file_content_with_base(
244    content: &str,
245    display_path: &str,
246    attr_base: &str,
247) -> ParsedGitAttributes {
248    parse_gitattributes_content_impl(content, display_path, false, attr_base)
249}
250
251fn preprocess_gitattributes_blob_text(content: &str) -> Cow<'_, str> {
252    if !content.contains("\\n") {
253        return Cow::Borrowed(content);
254    }
255    Cow::Owned(content.replace("\\n", "\n"))
256}
257
258fn parse_gitattributes_content_impl(
259    content: &str,
260    display_path: &str,
261    from_blob: bool,
262    attr_base: &str,
263) -> ParsedGitAttributes {
264    let preprocessed = if from_blob {
265        preprocess_gitattributes_blob_text(content)
266    } else {
267        Cow::Borrowed(content)
268    };
269    let content = preprocessed.as_ref();
270
271    let mut out = ParsedGitAttributes::default();
272    for (idx, raw_line) in content.lines().enumerate() {
273        let line_no = idx + 1;
274        let line_bytes = raw_line.as_bytes();
275        if line_bytes.len() >= MAX_ATTR_LINE_BYTES {
276            out.warnings.push(format!(
277                "warning: ignoring overly long attributes line {line_no}"
278            ));
279            continue;
280        }
281        parse_one_line(
282            raw_line,
283            line_no,
284            display_path,
285            from_blob,
286            attr_base,
287            &mut out,
288        );
289    }
290    out.warnings
291        .extend(builtin_warnings_for_rules(&out.rules, display_path));
292    out
293}
294
295/// Skip leading ASCII blanks only (matches Git's `blank` in `attr.c`).
296fn skip_ascii_blank(s: &str) -> &str {
297    s.trim_start_matches([' ', '\t', '\r', '\n'])
298}
299
300/// First whitespace-delimited token and the remainder (Git `strcspn` on `blank`).
301fn split_at_first_blank(s: &str) -> (&str, &str) {
302    let bytes = s.as_bytes();
303    let n = bytes
304        .iter()
305        .position(|&b| matches!(b, b' ' | b'\t' | b'\r' | b'\n'))
306        .unwrap_or(bytes.len());
307    s.split_at(n)
308}
309
310/// C-style unquote for a pattern that starts with `"` (see Git `unquote_c_style` in `quote.c`).
311fn unquote_c_style(quoted: &str) -> Result<(String, &str), ()> {
312    let b = quoted.as_bytes();
313    if b.is_empty() || b[0] != b'"' {
314        return Err(());
315    }
316    let mut q = &b[1..];
317    let mut out = Vec::new();
318    loop {
319        let len = q
320            .iter()
321            .position(|&c| c == b'"' || c == b'\\')
322            .unwrap_or(q.len());
323        out.extend_from_slice(&q[..len]);
324        q = &q[len..];
325        if q.is_empty() {
326            return Err(());
327        }
328        match q[0] {
329            b'"' => {
330                let rest = std::str::from_utf8(&q[1..]).map_err(|_| ())?;
331                return Ok((String::from_utf8(out).map_err(|_| ())?, rest));
332            }
333            b'\\' => {
334                q = &q[1..];
335                if q.is_empty() {
336                    return Err(());
337                }
338                let ch = q[0];
339                q = &q[1..];
340                match ch {
341                    b'a' => out.push(0x07),
342                    b'b' => out.push(0x08),
343                    b'f' => out.push(0x0c),
344                    b'n' => out.push(b'\n'),
345                    b'r' => out.push(b'\r'),
346                    b't' => out.push(b'\t'),
347                    b'v' => out.push(0x0b),
348                    b'\\' => out.push(b'\\'),
349                    b'"' => out.push(b'"'),
350                    b'0'..=b'3' => {
351                        let mut ac = u32::from(ch - b'0') << 6;
352                        if q.len() < 2 {
353                            return Err(());
354                        }
355                        let ch2 = q[0];
356                        let ch3 = q[1];
357                        if !(b'0'..=b'7').contains(&ch2) || !(b'0'..=b'7').contains(&ch3) {
358                            return Err(());
359                        }
360                        ac |= u32::from(ch2 - b'0') << 3;
361                        ac |= u32::from(ch3 - b'0');
362                        q = &q[2..];
363                        out.push(ac as u8);
364                    }
365                    _ => return Err(()),
366                }
367            }
368            _ => return Err(()),
369        }
370    }
371}
372
373/// One attribute assignment token (`parse_attr` in Git `attr.c`).
374fn parse_one_attr_token_git(s: &str) -> (&str, Option<&str>, &str) {
375    let bytes = s.as_bytes();
376    let token_end = bytes
377        .iter()
378        .position(|&b| matches!(b, b' ' | b'\t' | b'\r' | b'\n'))
379        .unwrap_or(bytes.len());
380    let eq_pos = s.find('=');
381    let eq_in_token = eq_pos.filter(|&eq| eq < token_end);
382    let (name, val) = if let Some(eq) = eq_in_token {
383        (&s[..eq], Some(&s[eq + 1..token_end]))
384    } else {
385        (&s[..token_end], None)
386    };
387    let rest = skip_ascii_blank(&s[token_end..]);
388    (name, val, rest)
389}
390
391fn accumulate_attr_states(
392    mut states: &str,
393    attrs: &mut Vec<(String, AttrValue)>,
394    macros: &MacroTable,
395    in_macro_def: bool,
396) {
397    loop {
398        states = skip_ascii_blank(states);
399        if states.is_empty() {
400            break;
401        }
402        let (name, val, rest) = parse_one_attr_token_git(states);
403        states = rest;
404        let tok = match val {
405            Some(v) => format!("{name}={v}"),
406            None => name.to_string(),
407        };
408        push_attr_token(&tok, attrs, macros, in_macro_def);
409    }
410}
411
412const ATTR_MACRO_PREFIX: &str = "[attr]";
413
414fn parse_one_line(
415    raw_line: &str,
416    line_no: usize,
417    display_path: &str,
418    from_blob: bool,
419    attr_base: &str,
420    out: &mut ParsedGitAttributes,
421) {
422    let _ = display_path;
423    let _ = from_blob;
424    let cp = skip_ascii_blank(raw_line);
425    if cp.is_empty() || cp.starts_with('#') {
426        return;
427    }
428
429    let (pattern_token, states) = if cp.as_bytes().first() == Some(&b'"') {
430        match unquote_c_style(cp) {
431            Ok((pat, rest)) => (pat, rest),
432            Err(()) => {
433                let (a, b) = split_at_first_blank(cp);
434                (a.to_string(), b)
435            }
436        }
437    } else {
438        let (a, b) = split_at_first_blank(cp);
439        (a.to_string(), b)
440    };
441
442    if pattern_token.len() > ATTR_MACRO_PREFIX.len() && pattern_token.starts_with(ATTR_MACRO_PREFIX)
443    {
444        let rest = skip_ascii_blank(&pattern_token[ATTR_MACRO_PREFIX.len()..]);
445        let (macro_name, leftover) = split_at_first_blank(rest);
446        if !leftover.is_empty() || macro_name.is_empty() {
447            return;
448        }
449        let mut attrs = Vec::new();
450        accumulate_attr_states(states, &mut attrs, &out.macros, true);
451        out.macros.defs.insert(macro_name.to_string(), attrs);
452        return;
453    }
454
455    if pattern_token.starts_with('!') && !pattern_token.starts_with("\\!") {
456        out.warnings
457            .push("Negative patterns are ignored".to_string());
458        return;
459    }
460    let pattern_raw = pattern_token.replace("\\!", "!");
461    let (pattern, pattern_flags, nowildcardlen) = parse_attr_pattern_fields(&pattern_raw);
462    let mut attrs = Vec::new();
463    accumulate_attr_states(states, &mut attrs, &out.macros, false);
464    if attrs.is_empty() {
465        return;
466    }
467    out.rules.push(AttrRule {
468        attr_base: attr_base.to_string(),
469        pattern,
470        pattern_flags,
471        nowildcardlen,
472        skip: false,
473        line: line_no,
474        attrs,
475    });
476}
477
478fn push_attr_token(
479    tok: &str,
480    attrs: &mut Vec<(String, AttrValue)>,
481    _macros: &MacroTable,
482    in_macro_def: bool,
483) {
484    if tok == "binary" {
485        attrs.push(("text".into(), AttrValue::Unset));
486        attrs.push(("diff".into(), AttrValue::Unset));
487        attrs.push(("merge".into(), AttrValue::Unset));
488        attrs.push(("binary".into(), AttrValue::Set));
489        return;
490    }
491    if in_macro_def {
492        if let Some(rest) = tok.strip_prefix('!') {
493            attrs.push((rest.to_string(), AttrValue::Clear));
494            return;
495        }
496    }
497    if let Some(rest) = tok.strip_prefix('-') {
498        attrs.push((rest.to_string(), AttrValue::Unset));
499        return;
500    }
501    if let Some((k, v)) = tok.split_once('=') {
502        let v = v.trim_end_matches(|c: char| {
503            matches!(c, ' ' | '\t' | '\r' | '\n') || c == '\u{000b}' || c == '\u{000c}'
504        });
505        attrs.push((k.to_string(), AttrValue::Value(v.to_string())));
506        return;
507    }
508    attrs.push((tok.to_string(), AttrValue::Set));
509}
510
511fn fspathncmp(a: &[u8], b: &[u8], count: usize, icase: bool) -> bool {
512    if a.len() < count || b.len() < count {
513        return false;
514    }
515    if icase {
516        a[..count]
517            .iter()
518            .zip(&b[..count])
519            .all(|(x, y)| x.eq_ignore_ascii_case(y))
520    } else {
521        a[..count] == b[..count]
522    }
523}
524
525/// Git `match_basename` (`dir.c`) for attribute patterns.
526fn match_basename_git(
527    basename: &[u8],
528    pattern: &[u8],
529    prefix: usize,
530    patternlen: usize,
531    pat_flags: u32,
532    icase: bool,
533) -> bool {
534    let basenamelen = basename.len();
535    let wm_flags = if icase { WM_CASEFOLD } else { 0 };
536    if prefix == patternlen {
537        return patternlen == basenamelen && fspathncmp(pattern, basename, basenamelen, icase);
538    }
539    if (pat_flags & PAT_ENDSWITH) != 0 {
540        if patternlen <= 1 {
541            return false;
542        }
543        let lit_len = patternlen - 1;
544        if lit_len > basenamelen {
545            return false;
546        }
547        return fspathncmp(
548            &pattern[1..patternlen],
549            &basename[basenamelen - lit_len..],
550            lit_len,
551            icase,
552        );
553    }
554    wildmatch(&pattern[..patternlen], basename, wm_flags)
555}
556
557/// Git `match_pathname` (`dir.c`) for attribute patterns.
558#[allow(clippy::too_many_arguments)]
559fn match_pathname_git(
560    pathname: &[u8],
561    pathlen: usize,
562    base: &[u8],
563    baselen: usize,
564    mut pattern: &[u8],
565    mut prefix: usize,
566    mut patternlen: usize,
567    icase: bool,
568) -> bool {
569    let pathname = &pathname[..pathlen.min(pathname.len())];
570
571    if !pattern.is_empty() && pattern[0] == b'/' {
572        pattern = &pattern[1..];
573        patternlen -= 1;
574        prefix = prefix.saturating_sub(1);
575    }
576
577    if pathlen < baselen + 1 {
578        return false;
579    }
580    if baselen > 0 && pathname[baselen] != b'/' {
581        return false;
582    }
583    if !fspathncmp(pathname, base, baselen, icase) {
584        return false;
585    }
586
587    let namelen = if baselen == 0 {
588        pathlen
589    } else {
590        pathlen - baselen - 1
591    };
592    let name = &pathname[pathlen - namelen..];
593
594    if prefix > 0 {
595        if prefix > namelen {
596            return false;
597        }
598        if !fspathncmp(pattern, name, prefix, icase) {
599            return false;
600        }
601        if patternlen == prefix && namelen == prefix {
602            return true;
603        }
604        let advance = prefix - 1;
605        pattern = &pattern[advance..];
606        patternlen -= advance;
607        let name = &name[advance..];
608        let wm_flags = WM_PATHNAME | if icase { WM_CASEFOLD } else { 0 };
609        return wildmatch(&pattern[..patternlen], name, wm_flags);
610    }
611
612    let wm_flags = WM_PATHNAME | if icase { WM_CASEFOLD } else { 0 };
613    wildmatch(&pattern[..patternlen], name, wm_flags)
614}
615
616/// Directory prefix of `rel_path` (no trailing slash), or `""` for a top-level file.
617fn path_dir_prefix(rel_path: &str) -> &str {
618    match rel_path.rfind('/') {
619        Some(i) => &rel_path[..i],
620        None => "",
621    }
622}
623
624/// Whether a rule from `dir/.gitattributes` may apply to `rel_path` (Git `prepare_attr_stack`).
625///
626/// Rules from nested attribute files only affect paths inside that directory tree.
627#[must_use]
628pub fn attr_rule_applies_to_path(attr_base: &str, rel_path: &str, icase: bool) -> bool {
629    if attr_base.is_empty() {
630        return true;
631    }
632    let dir = path_dir_prefix(rel_path);
633    if dir.is_empty() {
634        return false;
635    }
636    let prefix_eq = |d: &str, b: &str| {
637        if icase {
638            d.eq_ignore_ascii_case(b)
639        } else {
640            d == b
641        }
642    };
643    if prefix_eq(dir, attr_base) {
644        return true;
645    }
646    let bl = attr_base.len();
647    if dir.len() > bl && dir.as_bytes()[bl] == b'/' && prefix_eq(&dir[..bl], attr_base) {
648        return true;
649    }
650    false
651}
652
653/// Match one parsed rule against a repo-relative path (Git `path_matches` / `attr.c`).
654#[must_use]
655pub fn attr_rule_matches(rule: &AttrRule, rel_path: &str, icase: bool) -> bool {
656    if !attr_rule_applies_to_path(&rule.attr_base, rel_path, icase) {
657        return false;
658    }
659    let pathname = rel_path.as_bytes();
660    let pathlen = pathname.len();
661    let isdir = pathlen > 0 && pathname[pathlen - 1] == b'/';
662
663    if (rule.pattern_flags & PAT_MUSTBEDIR) != 0 && !isdir {
664        return false;
665    }
666
667    let eff_pathlen = if isdir { pathlen - 1 } else { pathlen };
668    let pathname_trim = &pathname[..eff_pathlen];
669
670    let basename_offset = pathname_trim
671        .iter()
672        .rposition(|&b| b == b'/')
673        .map(|i| i + 1)
674        .unwrap_or(0);
675
676    let pat = rule.pattern.as_bytes();
677    let prefix = rule.nowildcardlen.min(pat.len());
678    let patternlen = pat.len();
679
680    if (rule.pattern_flags & PAT_NODIR) != 0 {
681        let bn = &pathname_trim[basename_offset..];
682        return match_basename_git(bn, pat, prefix, patternlen, rule.pattern_flags, icase);
683    }
684
685    let base = rule.attr_base.as_bytes();
686    match_pathname_git(
687        pathname_trim,
688        eff_pathlen,
689        base,
690        base.len(),
691        pat,
692        prefix,
693        patternlen,
694        icase,
695    )
696}
697
698/// Expand macros and `binary` for one rule's assignments into source-order operations.
699///
700/// These must be applied in order to the same map as later rules (not folded into a local map),
701/// so `!attr` / macro clears remove attributes set by earlier rules on the same path.
702fn expand_rule_attrs_flat(rule: &AttrRule, macros: &MacroTable) -> Vec<(String, AttrValue)> {
703    let mut flat: Vec<(String, AttrValue)> = Vec::new();
704    for (name, val) in &rule.attrs {
705        if name == "binary" {
706            flat.push(("text".into(), AttrValue::Unset));
707            flat.push(("diff".into(), AttrValue::Unset));
708            flat.push(("merge".into(), AttrValue::Unset));
709            flat.push(("binary".into(), AttrValue::Set));
710            continue;
711        }
712        if let Some(exp) = macros.defs.get(name) {
713            flat.push((name.clone(), val.clone()));
714            for (n, v) in exp {
715                flat.push((n.clone(), v.clone()));
716            }
717        } else {
718            flat.push((name.clone(), val.clone()));
719        }
720    }
721    flat
722}
723
724/// Merge assignments: later rules override earlier; within one expanded rule, last wins.
725pub fn collect_attrs_for_path(
726    rules: &[AttrRule],
727    macros: &MacroTable,
728    rel_path: &str,
729    icase: bool,
730) -> HashMap<String, AttrValue> {
731    let mut map: HashMap<String, AttrValue> = HashMap::new();
732    for rule in rules {
733        if rule.skip {
734            continue;
735        }
736        if !attr_rule_matches(rule, rel_path, icase) {
737            continue;
738        }
739        let ops = expand_rule_attrs_flat(rule, macros);
740        for (n, v) in ops {
741            match v {
742                AttrValue::Clear => {
743                    map.remove(&n);
744                }
745                _ => {
746                    map.insert(n, v);
747                }
748            }
749        }
750    }
751    map
752}
753
754/// Quote a path for `check-attr` output (C-style) when needed.
755#[must_use]
756pub fn quote_path_for_check_attr(path: &str) -> String {
757    let needs = path
758        .chars()
759        .any(|c| c.is_control() || c == '"' || c == '\\');
760    if !needs {
761        return path.to_string();
762    }
763    let mut s = String::new();
764    s.push('"');
765    for c in path.chars() {
766        match c {
767            '"' => s.push_str("\\\""),
768            '\\' => s.push_str("\\\\"),
769            _ if c.is_control() => s.push_str(&format!("\\{:o}", c as u32)),
770            _ => s.push(c),
771        }
772    }
773    s.push('"');
774    s
775}
776
777/// Normalize `.` / `..` segments in a repo-relative path string.
778#[must_use]
779pub fn normalize_rel_path(path: &str) -> String {
780    let p = Path::new(path);
781    let mut stack: Vec<String> = Vec::new();
782    for c in p.components() {
783        match c {
784            Component::Normal(s) => stack.push(s.to_string_lossy().into_owned()),
785            Component::ParentDir => {
786                let _ = stack.pop();
787            }
788            Component::CurDir => {}
789            _ => {}
790        }
791    }
792    stack.join("/")
793}
794
795fn lexical_normalize_path(path: PathBuf) -> PathBuf {
796    let mut out = PathBuf::new();
797    for c in path.components() {
798        match c {
799            Component::Prefix(prefix) => out.push(prefix.as_os_str()),
800            Component::RootDir => out.push(c),
801            Component::CurDir => {}
802            Component::ParentDir => {
803                let _ = out.pop();
804            }
805            Component::Normal(_) => out.push(c),
806        }
807    }
808    out
809}
810
811/// Resolve a user path to a repo-relative path (forward slashes).
812///
813/// Uses [`std::fs::canonicalize`] when the target exists; otherwise resolves `..` lexically from the
814/// current directory so paths like `../f` work for missing files (Git `prefix_path`, t0003).
815pub fn path_relative_to_worktree(
816    repo: &Repository,
817    path_str: &str,
818) -> std::result::Result<String, String> {
819    let wt = repo
820        .work_tree
821        .as_ref()
822        .ok_or_else(|| "bare repository — no work tree".to_string())?;
823    let cwd = std::env::current_dir().map_err(|e| e.to_string())?;
824    let p = Path::new(path_str);
825    let combined = if p.is_absolute() {
826        p.to_path_buf()
827    } else {
828        cwd.join(p)
829    };
830
831    let wt_canon = wt.canonicalize().map_err(|e| e.to_string())?;
832
833    if let Ok(abs) = combined.canonicalize() {
834        let rel = abs
835            .strip_prefix(&wt_canon)
836            .map_err(|_| format!("path outside repository: {}", path_str))?;
837        return Ok(normalize_rel_path(
838            rel.to_str().ok_or_else(|| "invalid path".to_string())?,
839        ));
840    }
841
842    let abs_lex = lexical_normalize_path(combined);
843    let rel = abs_lex
844        .strip_prefix(&wt_canon)
845        .map_err(|_| format!("path outside repository: {}", path_str))?;
846    Ok(normalize_rel_path(
847        rel.to_str().ok_or_else(|| "invalid path".to_string())?,
848    ))
849}
850
851fn collect_nested_gitattributes_dirs(work_tree: &Path) -> Vec<PathBuf> {
852    let mut dirs: Vec<PathBuf> = Vec::new();
853    walk_dirs(work_tree, work_tree, &mut dirs);
854    dirs.sort_by(|a, b| {
855        let da = a.components().count();
856        let db = b.components().count();
857        da.cmp(&db).then_with(|| a.cmp(b))
858    });
859    dirs
860}
861
862fn walk_dirs(root: &Path, cur: &Path, dirs: &mut Vec<PathBuf>) {
863    let Ok(rd) = fs::read_dir(cur) else {
864        return;
865    };
866    for e in rd.flatten() {
867        let p = e.path();
868        let ft = e.file_type().ok();
869        if ft.is_some_and(|t| t.is_dir()) {
870            if p.file_name() == Some(OsStr::new(".git")) {
871                continue;
872            }
873            let rel = p.strip_prefix(root).unwrap_or(&p);
874            dirs.push(rel.to_path_buf());
875            walk_dirs(root, &p, dirs);
876        }
877    }
878}
879
880/// Load the full stack of attribute rules for a normal repository (working tree).
881pub fn load_gitattributes_stack(
882    repo: &Repository,
883    work_tree: &Path,
884) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
885    let mut merged = ParsedGitAttributes::default();
886
887    if let Some(g) = global_attributes_path(repo)? {
888        if g.exists() {
889            if let Ok(content) = fs::read_to_string(&g) {
890                if content.len() <= MAX_ATTR_FILE_BYTES {
891                    let mut p =
892                        parse_gitattributes_file_content(&content, g.to_string_lossy().as_ref());
893                    merged.rules.append(&mut p.rules);
894                    merged.macros.defs.extend(p.macros.defs.drain());
895                    merged.warnings.append(&mut p.warnings);
896                } else {
897                    merged.warnings.push(format!(
898                        "warning: ignoring overly large gitattributes file '{}'",
899                        g.display()
900                    ));
901                }
902            }
903        }
904    }
905
906    let root_ga = work_tree.join(".gitattributes");
907    if let Some(content) =
908        read_gitattributes_maybe_symlink(&root_ga, ".gitattributes", &mut merged.warnings)
909    {
910        if content.len() <= MAX_ATTR_FILE_BYTES {
911            let mut p = parse_gitattributes_file_content(&content, ".gitattributes");
912            merged.rules.append(&mut p.rules);
913            merged.macros.defs.extend(p.macros.defs.drain());
914            merged.warnings.append(&mut p.warnings);
915        } else {
916            merged.warnings.push(
917                "warning: ignoring overly large gitattributes file '.gitattributes'".to_string(),
918            );
919        }
920    }
921
922    for rel in collect_nested_gitattributes_dirs(work_tree) {
923        let ga = work_tree.join(&rel).join(".gitattributes");
924        if let Some(content) = read_gitattributes_maybe_symlink(
925            &ga,
926            &format!("{}/.gitattributes", rel.display()),
927            &mut merged.warnings,
928        ) {
929            if content.len() > MAX_ATTR_FILE_BYTES {
930                merged.warnings.push(format!(
931                    "warning: ignoring overly large gitattributes file '{}'",
932                    ga.display()
933                ));
934                continue;
935            }
936            let prefix = rel.to_string_lossy().replace('\\', "/");
937            let mut p = parse_gitattributes_file_content_with_base(
938                &content,
939                &ga.to_string_lossy(),
940                &prefix,
941            );
942            merged.rules.append(&mut p.rules);
943            merged.macros.defs.extend(p.macros.defs.drain());
944            merged.warnings.append(&mut p.warnings);
945        }
946    }
947
948    let info = repo.git_dir.join("info/attributes");
949    if info.exists() {
950        if let Ok(content) = fs::read_to_string(&info) {
951            if content.len() <= MAX_ATTR_FILE_BYTES {
952                let mut p = parse_gitattributes_file_content(&content, "info/attributes");
953                merged.rules.append(&mut p.rules);
954                merged.macros.defs.extend(p.macros.defs.drain());
955                merged.warnings.append(&mut p.warnings);
956            }
957        }
958    }
959
960    Ok(merged)
961}
962
963/// Bare repository: only `info/attributes` from disk (no in-repo `.gitattributes` file).
964pub fn load_gitattributes_bare(
965    repo: &Repository,
966) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
967    let mut merged = ParsedGitAttributes::default();
968    if let Some(g) = global_attributes_path(repo)? {
969        if g.exists() {
970            if let Ok(content) = fs::read_to_string(&g) {
971                if content.len() <= MAX_ATTR_FILE_BYTES {
972                    let mut p =
973                        parse_gitattributes_file_content(&content, g.to_string_lossy().as_ref());
974                    merged.rules.append(&mut p.rules);
975                    merged.macros.defs.extend(p.macros.defs.drain());
976                    merged.warnings.append(&mut p.warnings);
977                }
978            }
979        }
980    }
981    let info = repo.git_dir.join("info/attributes");
982    if info.exists() {
983        if let Ok(content) = fs::read_to_string(&info) {
984            if content.len() <= MAX_ATTR_FILE_BYTES {
985                let mut p = parse_gitattributes_file_content(&content, "info/attributes");
986                merged.rules.append(&mut p.rules);
987                merged.macros.defs.extend(p.macros.defs.drain());
988                merged.warnings.append(&mut p.warnings);
989            }
990        }
991    }
992    // Without a work tree, Git reads tracked `.gitattributes` from the index (Git
993    // `read_attr_from_index`), so e.g. `git -C .git diff-tree --check` still honours a
994    // committed `* -whitespace` attribute. Prepend index rules so work-tree-equivalent
995    // ordering (closer paths win) is preserved relative to info/global.
996    if let Ok(index) = Index::load(&repo.git_dir.join("index")) {
997        if let Ok(mut from_index) = load_gitattributes_from_index(&index, &repo.odb, &repo.git_dir)
998        {
999            // info/global attributes are lower priority than per-tree `.gitattributes`,
1000            // so place the index rules ahead of what we have collected so far.
1001            from_index.rules.append(&mut merged.rules);
1002            merged.rules = from_index.rules;
1003            for (k, v) in from_index.macros.defs.drain() {
1004                merged.macros.defs.entry(k).or_insert(v);
1005            }
1006            merged.warnings.append(&mut from_index.warnings);
1007        }
1008    }
1009    Ok(merged)
1010}
1011
1012/// Read `.gitattributes` blob from a tree object at `tree_oid`, recursively.
1013pub fn load_gitattributes_from_tree(
1014    odb: &Odb,
1015    tree_oid: &ObjectId,
1016) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
1017    let mut merged = ParsedGitAttributes::default();
1018    walk_tree_attrs(odb, tree_oid, "", &mut merged)?;
1019    Ok(merged)
1020}
1021
1022fn walk_tree_attrs(
1023    odb: &Odb,
1024    tree_oid: &ObjectId,
1025    prefix: &str,
1026    merged: &mut ParsedGitAttributes,
1027) -> std::result::Result<(), crate::error::Error> {
1028    let obj = odb.read(tree_oid)?;
1029    if obj.kind != ObjectKind::Tree {
1030        return Ok(());
1031    }
1032    let entries = parse_tree(&obj.data)?;
1033    for e in entries {
1034        let name = String::from_utf8_lossy(&e.name).to_string();
1035        let path = if prefix.is_empty() {
1036            name.clone()
1037        } else {
1038            format!("{prefix}/{name}")
1039        };
1040        match e.mode {
1041            0o040000 => {
1042                walk_tree_attrs(odb, &e.oid, &path, merged)?;
1043            }
1044            0o100644 | 0o100755 | 0o120000 if name == ".gitattributes" => {
1045                let oid = e.oid;
1046                {
1047                    let blob = odb.read(&oid)?;
1048                    if blob.kind != ObjectKind::Blob {
1049                        continue;
1050                    }
1051                    if blob.data.len() > MAX_ATTR_FILE_BYTES {
1052                        merged.warnings.push(
1053                            "warning: ignoring overly large gitattributes blob '.gitattributes'"
1054                                .to_string(),
1055                        );
1056                        continue;
1057                    }
1058                    let content = String::from_utf8_lossy(&blob.data).into_owned();
1059                    let display = format!("{path} (tree)");
1060                    let attr_base = Path::new(&path)
1061                        .parent()
1062                        .map(|p| p.to_string_lossy().replace('\\', "/"))
1063                        .unwrap_or_default();
1064                    let mut p =
1065                        parse_gitattributes_content_impl(&content, &display, true, &attr_base);
1066                    merged.rules.append(&mut p.rules);
1067                    merged.macros.defs.extend(p.macros.defs.drain());
1068                    merged.warnings.append(&mut p.warnings);
1069                }
1070            }
1071            _ => {}
1072        }
1073    }
1074    Ok(())
1075}
1076
1077/// Load merged `.gitattributes` rules for diff and merge (respects `GIT_ATTR_SOURCE` / `attr.tree`).
1078///
1079/// Resolution order matches Git's attribute source for diff: optional tree from
1080/// [`resolve_attr_treeish`], then work tree stack (or bare `info/attributes` only).
1081///
1082/// # Errors
1083///
1084/// Returns an error when a tree-ish source is set from the environment or command line and cannot
1085/// be resolved (Git: *"bad --attr-source or GIT_ATTR_SOURCE"*).
1086pub fn load_gitattributes_for_diff(
1087    repo: &Repository,
1088) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
1089    let (treeish, ignore_bad_tree) = resolve_attr_treeish(repo, None)?;
1090    if let Some(spec) = treeish.filter(|s| !s.is_empty()) {
1091        match resolve_tree_oid(repo, &spec) {
1092            Ok(oid) => return load_gitattributes_from_tree(&repo.odb, &oid),
1093            Err(_) if ignore_bad_tree => {}
1094            Err(_) => {
1095                return Err(crate::error::Error::InvalidRef(format!(
1096                    "bad --attr-source or GIT_ATTR_SOURCE: {spec}"
1097                )));
1098            }
1099        }
1100    }
1101    if let Some(wt) = repo.work_tree.as_deref() {
1102        return load_gitattributes_stack(repo, wt);
1103    }
1104    load_gitattributes_bare(repo)
1105}
1106
1107/// Resolve `attr.tree`, `GIT_ATTR_SOURCE`, `--source` precedence for check-attr.
1108///
1109/// The second return value is `ignore_bad_resolution`: when true (only for `attr.tree` from
1110/// config), an unresolvable tree-ish falls back to reading `.gitattributes` from the work tree
1111/// or index instead of erroring (matches Git `compute_default_attr_source`).
1112pub fn resolve_attr_treeish(
1113    repo: &Repository,
1114    source_arg: Option<&str>,
1115) -> std::result::Result<(Option<String>, bool), crate::error::Error> {
1116    let env_src = std::env::var("GIT_ATTR_SOURCE")
1117        .ok()
1118        .filter(|s| !s.is_empty());
1119    let config = ConfigSet::load(Some(&repo.git_dir), true)?;
1120    let cfg_tree = config.get("attr.tree");
1121    if let Some(s) = source_arg.map(|s| s.to_string()) {
1122        return Ok((Some(s), false));
1123    }
1124    if let Some(s) = env_src {
1125        return Ok((Some(s), false));
1126    }
1127    if let Some(s) = cfg_tree {
1128        return Ok((Some(s), true));
1129    }
1130    Ok((None, false))
1131}
1132
1133/// Parse a revision to a tree OID for attribute loading.
1134pub fn resolve_tree_oid(repo: &Repository, spec: &str) -> std::result::Result<ObjectId, String> {
1135    let oid = resolve_revision(repo, spec).map_err(|e| e.to_string())?;
1136    let obj = repo.read_replaced(&oid).map_err(|e| e.to_string())?;
1137    match obj.kind {
1138        ObjectKind::Commit => {
1139            let c = crate::objects::parse_commit(&obj.data).map_err(|e| e.to_string())?;
1140            Ok(c.tree)
1141        }
1142        ObjectKind::Tree => Ok(oid),
1143        _ => Err("revision is not a commit or tree".to_string()),
1144    }
1145}
1146
1147/// Load attributes from the index (stage 0) for `.gitattributes` paths only.
1148pub fn load_gitattributes_from_index(
1149    index: &Index,
1150    odb: &Odb,
1151    work_tree: &Path,
1152) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
1153    let mut merged = ParsedGitAttributes::default();
1154    let mut paths: Vec<Vec<u8>> = index
1155        .entries
1156        .iter()
1157        .filter(|e| e.stage() == 0 && e.path.ends_with(b".gitattributes"))
1158        .map(|e| e.path.clone())
1159        .collect();
1160    paths.sort();
1161    for path_bytes in paths {
1162        let Ok(rel) = std::str::from_utf8(&path_bytes) else {
1163            continue;
1164        };
1165        let Some(entry) = index.get(&path_bytes, 0) else {
1166            continue;
1167        };
1168        let obj = odb.read(&entry.oid)?;
1169        if obj.data.len() > MAX_ATTR_FILE_BYTES {
1170            merged.warnings.push(format!(
1171                "warning: ignoring overly large gitattributes blob '{}'",
1172                rel
1173            ));
1174            continue;
1175        }
1176        let content = String::from_utf8_lossy(&obj.data);
1177        let attr_base = Path::new(rel)
1178            .parent()
1179            .map(|p| p.to_string_lossy().replace('\\', "/"))
1180            .unwrap_or_default();
1181        let mut p = parse_gitattributes_content_impl(&content, rel, true, &attr_base);
1182        merged.rules.append(&mut p.rules);
1183        merged.macros.defs.extend(p.macros.defs.drain());
1184        merged.warnings.append(&mut p.warnings);
1185    }
1186    let _ = work_tree;
1187    Ok(merged)
1188}
1189
1190/// Return `builtin_objectmode` value for a path (working tree), or `None` if unavailable.
1191///
1192/// Submodule checkout directories (`.git` is a file containing `gitdir:`) report `160000`
1193/// like Git, not `040000`.
1194#[must_use]
1195pub fn builtin_objectmode_worktree(repo: &Repository, rel_path: &str) -> Option<String> {
1196    let wt = repo.work_tree.as_ref()?;
1197    let p = wt.join(rel_path);
1198    let meta = fs::symlink_metadata(&p).ok()?;
1199    let ft = meta.file_type();
1200    if ft.is_symlink() {
1201        return Some("120000".to_string());
1202    }
1203    if ft.is_dir() {
1204        let git = p.join(".git");
1205        if let Ok(git_meta) = fs::symlink_metadata(&git) {
1206            if !git_meta.file_type().is_dir() {
1207                if let Ok(content) = fs::read_to_string(&git) {
1208                    if content.starts_with("gitdir:") {
1209                        return Some("160000".to_string());
1210                    }
1211                }
1212            }
1213        }
1214        return Some("040000".to_string());
1215    }
1216    #[cfg(unix)]
1217    {
1218        use std::os::unix::fs::MetadataExt;
1219        let m = normalize_mode(meta.mode());
1220        Some(format!("{:06o}", m))
1221    }
1222    #[cfg(not(unix))]
1223    {
1224        let _ = repo;
1225        None
1226    }
1227}
1228
1229/// `builtin_objectmode` from the index when `--cached` is used.
1230#[must_use]
1231pub fn builtin_objectmode_index(index: &Index, rel_path: &str) -> Option<String> {
1232    let key = rel_path.as_bytes();
1233    let e = index.get(key, 0)?;
1234    let m = e.mode;
1235    if m == MODE_SYMLINK {
1236        return Some("120000".to_string());
1237    }
1238    if m == MODE_GITLINK {
1239        return Some("160000".to_string());
1240    }
1241    if m == MODE_TREE {
1242        return Some("040000".to_string());
1243    }
1244    if m == MODE_EXECUTABLE {
1245        return Some("100755".to_string());
1246    }
1247    if m == MODE_REGULAR {
1248        return Some("100644".to_string());
1249    }
1250    Some(format!("{:06o}", m))
1251}
1252
1253#[cfg(test)]
1254mod tests {
1255    use super::*;
1256
1257    #[test]
1258    fn d_yes_rule_clears_test_after_d_star() {
1259        let mut merged = ParsedGitAttributes::default();
1260        let root = parse_gitattributes_file_content("[attr]notest !test\n", ".gitattributes");
1261        merged.macros.defs.extend(root.macros.defs);
1262        let mut ab = parse_gitattributes_file_content_with_base(
1263            "h test=a/b/h\nd/* test=a/b/d/*\nd/yes notest\n",
1264            "a/b/.gitattributes",
1265            "a/b",
1266        );
1267        assert_eq!(ab.rules.len(), 3);
1268        merged.rules.append(&mut ab.rules);
1269        merged.macros.defs.extend(ab.macros.defs);
1270        let d_yes = merged
1271            .rules
1272            .iter()
1273            .find(|r| r.pattern == "d/yes")
1274            .expect("d/yes rule");
1275        assert!(attr_rule_matches(d_yes, "a/b/d/yes", false));
1276        let m = collect_attrs_for_path(&merged.rules, &merged.macros, "a/b/d/yes", false);
1277        assert!(
1278            m.get("test").is_none(),
1279            "expected test cleared by notest macro, got {:?}",
1280            m.get("test")
1281        );
1282    }
1283}