Skip to main content

grit_lib/
attributes.rs

1//! Gitattributes parsing and pattern matching for `check-attr` and validation.
2//!
3//! Implements Git-consistent rule ordering, macro expansion (`[attr]`), `binary`
4//! expansion, `**` globbing via [`crate::wildmatch`], and optional case folding
5//! for `core.ignorecase`.
6
7use crate::config::parse_path;
8use crate::config::ConfigSet;
9use crate::index::normalize_mode;
10use crate::index::Index;
11use crate::index::MODE_EXECUTABLE;
12use crate::index::MODE_GITLINK;
13use crate::index::MODE_REGULAR;
14use crate::index::MODE_SYMLINK;
15use crate::index::MODE_TREE;
16use crate::objects::parse_tree;
17use crate::objects::ObjectId;
18use crate::objects::ObjectKind;
19use crate::odb::Odb;
20use crate::repo::Repository;
21use crate::rev_parse::resolve_revision;
22use crate::wildmatch::{wildmatch, WM_CASEFOLD, WM_PATHNAME};
23use std::borrow::Cow;
24use std::collections::HashMap;
25use std::ffi::OsStr;
26use std::fs;
27use std::path::{Component, Path, PathBuf};
28
29/// Maximum length of a single `.gitattributes` line (bytes), matching Git (`ATTR_MAX_LINE_LENGTH`).
30/// Lines of this length or longer are ignored with a warning.
31pub const MAX_ATTR_LINE_BYTES: usize = 2048;
32
33/// Maximum `.gitattributes` file size (bytes) before Git ignores the file.
34pub const MAX_ATTR_FILE_BYTES: usize = 100 * 1024 * 1024;
35
36/// Parsed attribute value for display (`check-attr` output).
37#[derive(Debug, Clone, PartialEq, Eq)]
38pub enum AttrValue {
39    Set,
40    /// Explicit `-attr` in a rule — `check-attr` prints `unset`.
41    Unset,
42    /// Macro body `!attr` — clears the attribute to *unspecified* (not `unset`).
43    Clear,
44    Value(String),
45}
46
47impl AttrValue {
48    /// Text form as printed by `git check-attr`.
49    #[must_use]
50    pub fn display(&self) -> &str {
51        match self {
52            AttrValue::Set => "set",
53            AttrValue::Unset => "unset",
54            AttrValue::Clear => "unspecified",
55            AttrValue::Value(v) => v.as_str(),
56        }
57    }
58}
59
60/// Pattern flags after Git `parse_path_pattern` (`dir.c`).
61const PAT_NODIR: u32 = 1;
62const PAT_MUSTBEDIR: u32 = 2;
63const PAT_ENDSWITH: u32 = 4;
64
65#[inline]
66fn is_glob_special_attr(c: u8) -> bool {
67    matches!(c, b'*' | b'?' | b'[' | b'\\')
68}
69
70/// Length of initial literal segment before the first glob special (Git `simple_length`).
71fn simple_length_pat(s: &str) -> usize {
72    let b = s.as_bytes();
73    let mut i = 0;
74    while i < b.len() {
75        if is_glob_special_attr(b[i]) {
76            return i;
77        }
78        i += 1;
79    }
80    i
81}
82
83/// Parse pattern text like Git `parse_path_pattern` (after `!` and unquoting are handled).
84fn parse_attr_pattern_fields(pat: &str) -> (String, u32, usize) {
85    let mut flags = 0u32;
86    let mut len = pat.len();
87    if len > 0 && pat.as_bytes()[len - 1] == b'/' {
88        len -= 1;
89        flags |= PAT_MUSTBEDIR;
90    }
91    let p = &pat[..len];
92    let has_slash = p.as_bytes().contains(&b'/');
93    if !has_slash {
94        flags |= PAT_NODIR;
95    }
96    if let Some(rest) = p.strip_prefix('*') {
97        if !rest.is_empty() && simple_length_pat(rest) == rest.len() {
98            flags |= PAT_ENDSWITH;
99        }
100    }
101    let mut nowild = simple_length_pat(p);
102    if nowild > len {
103        nowild = len;
104    }
105    (p.to_string(), flags, nowild)
106}
107
108/// One line in a gitattributes file.
109#[derive(Debug, Clone)]
110pub struct AttrRule {
111    /// Directory of the `.gitattributes` file that defined this rule (repo-relative, `/`,
112    /// no trailing slash). Empty for the repository root file.
113    pub attr_base: String,
114    /// Pattern body (no leading `!`; trailing `/` stripped; same as Git after `parse_path_pattern` prep).
115    pub pattern: String,
116    /// From `parse_path_pattern`: basename-only match vs full path under `attr_base`.
117    pub pattern_flags: u32,
118    /// Length of leading literal segment before first wildcard (Git `nowildcardlen`).
119    pub nowildcardlen: usize,
120    /// If true, this rule was discarded (negative pattern) after emitting a warning.
121    pub skip: bool,
122    /// 1-based line number in the source file.
123    pub line: usize,
124    /// Attribute assignments in source order (last wins for duplicates on this line).
125    pub attrs: Vec<(String, AttrValue)>,
126}
127
128/// Macro definitions from `[attr]name ...` lines.
129#[derive(Debug, Clone, Default)]
130pub struct MacroTable {
131    /// Maps macro name → list of assignments (e.g. `!test` → unset test).
132    pub defs: HashMap<String, Vec<(String, AttrValue)>>,
133}
134
135/// Result of parsing a gitattributes file.
136#[derive(Debug, Default)]
137pub struct ParsedGitAttributes {
138    pub rules: Vec<AttrRule>,
139    pub macros: MacroTable,
140    pub warnings: Vec<String>,
141}
142
143/// Returns true if `name` is reserved (`builtin_*` except the real builtin names Git allows).
144#[must_use]
145pub fn is_reserved_builtin_name(name: &str) -> bool {
146    let Some(rest) = name.strip_prefix("builtin_") else {
147        return false;
148    };
149    matches!(rest, "objectmode")
150}
151
152/// Validate user-defined attribute names in parsed rules (for `git add`).
153///
154/// Returns an error string matching Git when a rule uses an invalid `builtin_*` name.
155pub fn validate_rules_for_add(
156    rules: &[AttrRule],
157    display_path: &str,
158) -> std::result::Result<(), String> {
159    for rule in rules {
160        if rule.skip {
161            continue;
162        }
163        for (name, _) in &rule.attrs {
164            if name.starts_with("builtin_") && !is_reserved_builtin_name(name) {
165                return Err(format!(
166                    "{name} is not a valid attribute name: {display_path}:{}",
167                    rule.line
168                ));
169            }
170        }
171    }
172    Ok(())
173}
174
175/// Collect warnings for invalid `builtin_*` assignments (check-attr continues).
176pub fn builtin_warnings_for_rules(rules: &[AttrRule], display_path: &str) -> Vec<String> {
177    let mut w = Vec::new();
178    for rule in rules {
179        if rule.skip {
180            continue;
181        }
182        for (name, _) in &rule.attrs {
183            if name == "builtin_objectmode" {
184                w.push(format!(
185                    "builtin_objectmode is not a valid attribute name: {display_path}:{}",
186                    rule.line
187                ));
188            } else if name.starts_with("builtin_") && !is_reserved_builtin_name(name) {
189                w.push(format!(
190                    "{name} is not a valid attribute name: {display_path}:{}",
191                    rule.line
192                ));
193            }
194        }
195    }
196    w
197}
198
199fn default_global_attributes_path() -> Option<PathBuf> {
200    let home = std::env::var("HOME").ok()?;
201    if let Ok(xdg) = std::env::var("XDG_CONFIG_HOME") {
202        if !xdg.is_empty() {
203            return Some(PathBuf::from(xdg).join("git/attributes"));
204        }
205    }
206    Some(PathBuf::from(home).join(".config/git/attributes"))
207}
208
209fn global_attributes_path(
210    repo: &Repository,
211) -> std::result::Result<Option<PathBuf>, crate::error::Error> {
212    let config = ConfigSet::load(Some(&repo.git_dir), true)?;
213    if let Some(path) = config.get("core.attributesfile") {
214        return Ok(Some(PathBuf::from(parse_path(&path))));
215    }
216    Ok(default_global_attributes_path())
217}
218
219/// Read a `.gitattributes` path; if it is a symlink, record an error and skip (in-tree rules).
220fn read_gitattributes_maybe_symlink(
221    path: &Path,
222    display: &str,
223    warnings: &mut Vec<String>,
224) -> Option<String> {
225    let meta = fs::symlink_metadata(path).ok()?;
226    if meta.file_type().is_symlink() {
227        warnings.push(format!(
228            "unable to access '{display}': Too many levels of symbolic links"
229        ));
230        return None;
231    }
232    fs::read_to_string(path).ok()
233}
234
235/// Parse one gitattributes file from disk (patterns are relative to `attr_base`, the directory
236/// containing the file — use `""` for the repository root file).
237pub fn parse_gitattributes_file_content(content: &str, display_path: &str) -> ParsedGitAttributes {
238    parse_gitattributes_content_impl(content, display_path, false, "")
239}
240
241/// Parse attributes defined in a `.gitattributes` file located in `attr_base` (repo-relative,
242/// `/` separators, no trailing slash; empty string for the repository root).
243pub fn parse_gitattributes_file_content_with_base(
244    content: &str,
245    display_path: &str,
246    attr_base: &str,
247) -> ParsedGitAttributes {
248    parse_gitattributes_content_impl(content, display_path, false, attr_base)
249}
250
251fn preprocess_gitattributes_blob_text(content: &str) -> Cow<'_, str> {
252    if !content.contains("\\n") {
253        return Cow::Borrowed(content);
254    }
255    Cow::Owned(content.replace("\\n", "\n"))
256}
257
258fn parse_gitattributes_content_impl(
259    content: &str,
260    display_path: &str,
261    from_blob: bool,
262    attr_base: &str,
263) -> ParsedGitAttributes {
264    let preprocessed = if from_blob {
265        preprocess_gitattributes_blob_text(content)
266    } else {
267        Cow::Borrowed(content)
268    };
269    let content = preprocessed.as_ref();
270
271    let mut out = ParsedGitAttributes::default();
272    for (idx, raw_line) in content.lines().enumerate() {
273        let line_no = idx + 1;
274        let line_bytes = raw_line.as_bytes();
275        if line_bytes.len() >= MAX_ATTR_LINE_BYTES {
276            out.warnings.push(format!(
277                "warning: ignoring overly long attributes line {line_no}"
278            ));
279            continue;
280        }
281        parse_one_line(
282            raw_line,
283            line_no,
284            display_path,
285            from_blob,
286            attr_base,
287            &mut out,
288        );
289    }
290    out.warnings
291        .extend(builtin_warnings_for_rules(&out.rules, display_path));
292    out
293}
294
295/// Skip leading ASCII blanks only (matches Git's `blank` in `attr.c`).
296fn skip_ascii_blank(s: &str) -> &str {
297    s.trim_start_matches([' ', '\t', '\r', '\n'])
298}
299
300/// First whitespace-delimited token and the remainder (Git `strcspn` on `blank`).
301fn split_at_first_blank(s: &str) -> (&str, &str) {
302    let bytes = s.as_bytes();
303    let n = bytes
304        .iter()
305        .position(|&b| matches!(b, b' ' | b'\t' | b'\r' | b'\n'))
306        .unwrap_or(bytes.len());
307    s.split_at(n)
308}
309
310/// C-style unquote for a pattern that starts with `"` (see Git `unquote_c_style` in `quote.c`).
311fn unquote_c_style(quoted: &str) -> Result<(String, &str), ()> {
312    let b = quoted.as_bytes();
313    if b.is_empty() || b[0] != b'"' {
314        return Err(());
315    }
316    let mut q = &b[1..];
317    let mut out = Vec::new();
318    loop {
319        let len = q
320            .iter()
321            .position(|&c| c == b'"' || c == b'\\')
322            .unwrap_or(q.len());
323        out.extend_from_slice(&q[..len]);
324        q = &q[len..];
325        if q.is_empty() {
326            return Err(());
327        }
328        match q[0] {
329            b'"' => {
330                let rest = std::str::from_utf8(&q[1..]).map_err(|_| ())?;
331                return Ok((String::from_utf8(out).map_err(|_| ())?, rest));
332            }
333            b'\\' => {
334                q = &q[1..];
335                if q.is_empty() {
336                    return Err(());
337                }
338                let ch = q[0];
339                q = &q[1..];
340                match ch {
341                    b'a' => out.push(0x07),
342                    b'b' => out.push(0x08),
343                    b'f' => out.push(0x0c),
344                    b'n' => out.push(b'\n'),
345                    b'r' => out.push(b'\r'),
346                    b't' => out.push(b'\t'),
347                    b'v' => out.push(0x0b),
348                    b'\\' => out.push(b'\\'),
349                    b'"' => out.push(b'"'),
350                    b'0'..=b'3' => {
351                        let mut ac = u32::from(ch - b'0') << 6;
352                        if q.len() < 2 {
353                            return Err(());
354                        }
355                        let ch2 = q[0];
356                        let ch3 = q[1];
357                        if !(b'0'..=b'7').contains(&ch2) || !(b'0'..=b'7').contains(&ch3) {
358                            return Err(());
359                        }
360                        ac |= u32::from(ch2 - b'0') << 3;
361                        ac |= u32::from(ch3 - b'0');
362                        q = &q[2..];
363                        out.push(ac as u8);
364                    }
365                    _ => return Err(()),
366                }
367            }
368            _ => return Err(()),
369        }
370    }
371}
372
373/// One attribute assignment token (`parse_attr` in Git `attr.c`).
374fn parse_one_attr_token_git(s: &str) -> (&str, Option<&str>, &str) {
375    let bytes = s.as_bytes();
376    let token_end = bytes
377        .iter()
378        .position(|&b| matches!(b, b' ' | b'\t' | b'\r' | b'\n'))
379        .unwrap_or(bytes.len());
380    let eq_pos = s.find('=');
381    let eq_in_token = eq_pos.filter(|&eq| eq < token_end);
382    let (name, val) = if let Some(eq) = eq_in_token {
383        (&s[..eq], Some(&s[eq + 1..token_end]))
384    } else {
385        (&s[..token_end], None)
386    };
387    let rest = skip_ascii_blank(&s[token_end..]);
388    (name, val, rest)
389}
390
391fn accumulate_attr_states(
392    mut states: &str,
393    attrs: &mut Vec<(String, AttrValue)>,
394    macros: &MacroTable,
395    in_macro_def: bool,
396) {
397    loop {
398        states = skip_ascii_blank(states);
399        if states.is_empty() {
400            break;
401        }
402        let (name, val, rest) = parse_one_attr_token_git(states);
403        states = rest;
404        let tok = match val {
405            Some(v) => format!("{name}={v}"),
406            None => name.to_string(),
407        };
408        push_attr_token(&tok, attrs, macros, in_macro_def);
409    }
410}
411
412const ATTR_MACRO_PREFIX: &str = "[attr]";
413
414fn parse_one_line(
415    raw_line: &str,
416    line_no: usize,
417    display_path: &str,
418    from_blob: bool,
419    attr_base: &str,
420    out: &mut ParsedGitAttributes,
421) {
422    let _ = display_path;
423    let _ = from_blob;
424    let cp = skip_ascii_blank(raw_line);
425    if cp.is_empty() || cp.starts_with('#') {
426        return;
427    }
428
429    let (pattern_token, states) = if cp.as_bytes().first() == Some(&b'"') {
430        match unquote_c_style(cp) {
431            Ok((pat, rest)) => (pat, rest),
432            Err(()) => {
433                let (a, b) = split_at_first_blank(cp);
434                (a.to_string(), b)
435            }
436        }
437    } else {
438        let (a, b) = split_at_first_blank(cp);
439        (a.to_string(), b)
440    };
441
442    if pattern_token.len() > ATTR_MACRO_PREFIX.len() && pattern_token.starts_with(ATTR_MACRO_PREFIX)
443    {
444        let rest = skip_ascii_blank(&pattern_token[ATTR_MACRO_PREFIX.len()..]);
445        let (macro_name, leftover) = split_at_first_blank(rest);
446        if !leftover.is_empty() || macro_name.is_empty() {
447            return;
448        }
449        let mut attrs = Vec::new();
450        accumulate_attr_states(states, &mut attrs, &out.macros, true);
451        out.macros.defs.insert(macro_name.to_string(), attrs);
452        return;
453    }
454
455    if pattern_token.starts_with('!') && !pattern_token.starts_with("\\!") {
456        out.warnings
457            .push("Negative patterns are ignored".to_string());
458        return;
459    }
460    let pattern_raw = pattern_token.replace("\\!", "!");
461    let (pattern, pattern_flags, nowildcardlen) = parse_attr_pattern_fields(&pattern_raw);
462    let mut attrs = Vec::new();
463    accumulate_attr_states(states, &mut attrs, &out.macros, false);
464    if attrs.is_empty() {
465        return;
466    }
467    out.rules.push(AttrRule {
468        attr_base: attr_base.to_string(),
469        pattern,
470        pattern_flags,
471        nowildcardlen,
472        skip: false,
473        line: line_no,
474        attrs,
475    });
476}
477
478fn push_attr_token(
479    tok: &str,
480    attrs: &mut Vec<(String, AttrValue)>,
481    _macros: &MacroTable,
482    in_macro_def: bool,
483) {
484    if tok == "binary" {
485        attrs.push(("text".into(), AttrValue::Unset));
486        attrs.push(("diff".into(), AttrValue::Unset));
487        attrs.push(("merge".into(), AttrValue::Unset));
488        attrs.push(("binary".into(), AttrValue::Set));
489        return;
490    }
491    if in_macro_def {
492        if let Some(rest) = tok.strip_prefix('!') {
493            attrs.push((rest.to_string(), AttrValue::Clear));
494            return;
495        }
496    }
497    if let Some(rest) = tok.strip_prefix('-') {
498        attrs.push((rest.to_string(), AttrValue::Unset));
499        return;
500    }
501    if let Some((k, v)) = tok.split_once('=') {
502        let v = v.trim_end_matches(|c: char| {
503            matches!(c, ' ' | '\t' | '\r' | '\n') || c == '\u{000b}' || c == '\u{000c}'
504        });
505        attrs.push((k.to_string(), AttrValue::Value(v.to_string())));
506        return;
507    }
508    attrs.push((tok.to_string(), AttrValue::Set));
509}
510
511fn fspathncmp(a: &[u8], b: &[u8], count: usize, icase: bool) -> bool {
512    if a.len() < count || b.len() < count {
513        return false;
514    }
515    if icase {
516        a[..count]
517            .iter()
518            .zip(&b[..count])
519            .all(|(x, y)| x.eq_ignore_ascii_case(y))
520    } else {
521        a[..count] == b[..count]
522    }
523}
524
525/// Git `match_basename` (`dir.c`) for attribute patterns.
526fn match_basename_git(
527    basename: &[u8],
528    pattern: &[u8],
529    prefix: usize,
530    patternlen: usize,
531    pat_flags: u32,
532    icase: bool,
533) -> bool {
534    let basenamelen = basename.len();
535    let wm_flags = if icase { WM_CASEFOLD } else { 0 };
536    if prefix == patternlen {
537        return patternlen == basenamelen && fspathncmp(pattern, basename, basenamelen, icase);
538    }
539    if (pat_flags & PAT_ENDSWITH) != 0 {
540        if patternlen <= 1 {
541            return false;
542        }
543        let lit_len = patternlen - 1;
544        if lit_len > basenamelen {
545            return false;
546        }
547        return fspathncmp(
548            &pattern[1..patternlen],
549            &basename[basenamelen - lit_len..],
550            lit_len,
551            icase,
552        );
553    }
554    wildmatch(&pattern[..patternlen], basename, wm_flags)
555}
556
557/// Git `match_pathname` (`dir.c`) for attribute patterns.
558#[allow(clippy::too_many_arguments)]
559fn match_pathname_git(
560    pathname: &[u8],
561    pathlen: usize,
562    base: &[u8],
563    baselen: usize,
564    mut pattern: &[u8],
565    mut prefix: usize,
566    mut patternlen: usize,
567    icase: bool,
568) -> bool {
569    let pathname = &pathname[..pathlen.min(pathname.len())];
570
571    if !pattern.is_empty() && pattern[0] == b'/' {
572        pattern = &pattern[1..];
573        patternlen -= 1;
574        prefix = prefix.saturating_sub(1);
575    }
576
577    if pathlen < baselen + 1 {
578        return false;
579    }
580    if baselen > 0 && pathname[baselen] != b'/' {
581        return false;
582    }
583    if !fspathncmp(pathname, base, baselen, icase) {
584        return false;
585    }
586
587    let namelen = if baselen == 0 {
588        pathlen
589    } else {
590        pathlen - baselen - 1
591    };
592    let name = &pathname[pathlen - namelen..];
593
594    if prefix > 0 {
595        if prefix > namelen {
596            return false;
597        }
598        if !fspathncmp(pattern, name, prefix, icase) {
599            return false;
600        }
601        if patternlen == prefix && namelen == prefix {
602            return true;
603        }
604        let advance = prefix - 1;
605        pattern = &pattern[advance..];
606        patternlen -= advance;
607        let name = &name[advance..];
608        let wm_flags = WM_PATHNAME | if icase { WM_CASEFOLD } else { 0 };
609        return wildmatch(&pattern[..patternlen], name, wm_flags);
610    }
611
612    let wm_flags = WM_PATHNAME | if icase { WM_CASEFOLD } else { 0 };
613    wildmatch(&pattern[..patternlen], name, wm_flags)
614}
615
616/// Directory prefix of `rel_path` (no trailing slash), or `""` for a top-level file.
617fn path_dir_prefix(rel_path: &str) -> &str {
618    match rel_path.rfind('/') {
619        Some(i) => &rel_path[..i],
620        None => "",
621    }
622}
623
624/// Whether a rule from `dir/.gitattributes` may apply to `rel_path` (Git `prepare_attr_stack`).
625///
626/// Rules from nested attribute files only affect paths inside that directory tree.
627#[must_use]
628pub fn attr_rule_applies_to_path(attr_base: &str, rel_path: &str, icase: bool) -> bool {
629    if attr_base.is_empty() {
630        return true;
631    }
632    let dir = path_dir_prefix(rel_path);
633    if dir.is_empty() {
634        return false;
635    }
636    let prefix_eq = |d: &str, b: &str| {
637        if icase {
638            d.eq_ignore_ascii_case(b)
639        } else {
640            d == b
641        }
642    };
643    if prefix_eq(dir, attr_base) {
644        return true;
645    }
646    let bl = attr_base.len();
647    if dir.len() > bl && dir.as_bytes()[bl] == b'/' && prefix_eq(&dir[..bl], attr_base) {
648        return true;
649    }
650    false
651}
652
653/// Match one parsed rule against a repo-relative path (Git `path_matches` / `attr.c`).
654#[must_use]
655pub fn attr_rule_matches(rule: &AttrRule, rel_path: &str, icase: bool) -> bool {
656    if !attr_rule_applies_to_path(&rule.attr_base, rel_path, icase) {
657        return false;
658    }
659    let pathname = rel_path.as_bytes();
660    let pathlen = pathname.len();
661    let isdir = pathlen > 0 && pathname[pathlen - 1] == b'/';
662
663    if (rule.pattern_flags & PAT_MUSTBEDIR) != 0 && !isdir {
664        return false;
665    }
666
667    let eff_pathlen = if isdir { pathlen - 1 } else { pathlen };
668    let pathname_trim = &pathname[..eff_pathlen];
669
670    let basename_offset = pathname_trim
671        .iter()
672        .rposition(|&b| b == b'/')
673        .map(|i| i + 1)
674        .unwrap_or(0);
675
676    let pat = rule.pattern.as_bytes();
677    let prefix = rule.nowildcardlen.min(pat.len());
678    let patternlen = pat.len();
679
680    if (rule.pattern_flags & PAT_NODIR) != 0 {
681        let bn = &pathname_trim[basename_offset..];
682        return match_basename_git(bn, pat, prefix, patternlen, rule.pattern_flags, icase);
683    }
684
685    let base = rule.attr_base.as_bytes();
686    match_pathname_git(
687        pathname_trim,
688        eff_pathlen,
689        base,
690        base.len(),
691        pat,
692        prefix,
693        patternlen,
694        icase,
695    )
696}
697
698/// Expand macros and `binary` for one rule's assignments into source-order operations.
699///
700/// These must be applied in order to the same map as later rules (not folded into a local map),
701/// so `!attr` / macro clears remove attributes set by earlier rules on the same path.
702fn expand_rule_attrs_flat(rule: &AttrRule, macros: &MacroTable) -> Vec<(String, AttrValue)> {
703    let mut flat: Vec<(String, AttrValue)> = Vec::new();
704    for (name, val) in &rule.attrs {
705        if name == "binary" {
706            flat.push(("text".into(), AttrValue::Unset));
707            flat.push(("diff".into(), AttrValue::Unset));
708            flat.push(("merge".into(), AttrValue::Unset));
709            flat.push(("binary".into(), AttrValue::Set));
710            continue;
711        }
712        if let Some(exp) = macros.defs.get(name) {
713            flat.push((name.clone(), val.clone()));
714            for (n, v) in exp {
715                flat.push((n.clone(), v.clone()));
716            }
717        } else {
718            flat.push((name.clone(), val.clone()));
719        }
720    }
721    flat
722}
723
724/// Merge assignments: later rules override earlier; within one expanded rule, last wins.
725pub fn collect_attrs_for_path(
726    rules: &[AttrRule],
727    macros: &MacroTable,
728    rel_path: &str,
729    icase: bool,
730) -> HashMap<String, AttrValue> {
731    let mut map: HashMap<String, AttrValue> = HashMap::new();
732    for rule in rules {
733        if rule.skip {
734            continue;
735        }
736        if !attr_rule_matches(rule, rel_path, icase) {
737            continue;
738        }
739        let ops = expand_rule_attrs_flat(rule, macros);
740        for (n, v) in ops {
741            match v {
742                AttrValue::Clear => {
743                    map.remove(&n);
744                }
745                _ => {
746                    map.insert(n, v);
747                }
748            }
749        }
750    }
751    map
752}
753
754/// Quote a path for `check-attr` output (C-style) when needed.
755#[must_use]
756pub fn quote_path_for_check_attr(path: &str) -> String {
757    let needs = path
758        .chars()
759        .any(|c| c.is_control() || c == '"' || c == '\\');
760    if !needs {
761        return path.to_string();
762    }
763    let mut s = String::new();
764    s.push('"');
765    for c in path.chars() {
766        match c {
767            '"' => s.push_str("\\\""),
768            '\\' => s.push_str("\\\\"),
769            _ if c.is_control() => s.push_str(&format!("\\{:o}", c as u32)),
770            _ => s.push(c),
771        }
772    }
773    s.push('"');
774    s
775}
776
777/// Normalize `.` / `..` segments in a repo-relative path string.
778#[must_use]
779pub fn normalize_rel_path(path: &str) -> String {
780    let p = Path::new(path);
781    let mut stack: Vec<String> = Vec::new();
782    for c in p.components() {
783        match c {
784            Component::Normal(s) => stack.push(s.to_string_lossy().into_owned()),
785            Component::ParentDir => {
786                let _ = stack.pop();
787            }
788            Component::CurDir => {}
789            _ => {}
790        }
791    }
792    stack.join("/")
793}
794
795fn lexical_normalize_path(path: PathBuf) -> PathBuf {
796    let mut out = PathBuf::new();
797    for c in path.components() {
798        match c {
799            Component::Prefix(prefix) => out.push(prefix.as_os_str()),
800            Component::RootDir => out.push(c),
801            Component::CurDir => {}
802            Component::ParentDir => {
803                let _ = out.pop();
804            }
805            Component::Normal(_) => out.push(c),
806        }
807    }
808    out
809}
810
811/// Resolve a user path to a repo-relative path (forward slashes).
812///
813/// Uses [`std::fs::canonicalize`] when the target exists; otherwise resolves `..` lexically from the
814/// current directory so paths like `../f` work for missing files (Git `prefix_path`, t0003).
815pub fn path_relative_to_worktree(
816    repo: &Repository,
817    path_str: &str,
818) -> std::result::Result<String, String> {
819    let wt = repo
820        .work_tree
821        .as_ref()
822        .ok_or_else(|| "bare repository — no work tree".to_string())?;
823    let cwd = std::env::current_dir().map_err(|e| e.to_string())?;
824    let p = Path::new(path_str);
825    let combined = if p.is_absolute() {
826        p.to_path_buf()
827    } else {
828        cwd.join(p)
829    };
830
831    let wt_canon = wt.canonicalize().map_err(|e| e.to_string())?;
832
833    if let Ok(abs) = combined.canonicalize() {
834        let rel = abs
835            .strip_prefix(&wt_canon)
836            .map_err(|_| format!("path outside repository: {}", path_str))?;
837        return Ok(normalize_rel_path(
838            rel.to_str().ok_or_else(|| "invalid path".to_string())?,
839        ));
840    }
841
842    let abs_lex = lexical_normalize_path(combined);
843    let rel = abs_lex
844        .strip_prefix(&wt_canon)
845        .map_err(|_| format!("path outside repository: {}", path_str))?;
846    Ok(normalize_rel_path(
847        rel.to_str().ok_or_else(|| "invalid path".to_string())?,
848    ))
849}
850
851fn collect_nested_gitattributes_dirs(work_tree: &Path) -> Vec<PathBuf> {
852    let mut dirs: Vec<PathBuf> = Vec::new();
853    walk_dirs(work_tree, work_tree, &mut dirs);
854    dirs.sort_by(|a, b| {
855        let da = a.components().count();
856        let db = b.components().count();
857        da.cmp(&db).then_with(|| a.cmp(b))
858    });
859    dirs
860}
861
862fn walk_dirs(root: &Path, cur: &Path, dirs: &mut Vec<PathBuf>) {
863    let Ok(rd) = fs::read_dir(cur) else {
864        return;
865    };
866    for e in rd.flatten() {
867        let p = e.path();
868        let ft = e.file_type().ok();
869        if ft.is_some_and(|t| t.is_dir()) {
870            if p.file_name() == Some(OsStr::new(".git")) {
871                continue;
872            }
873            let rel = p.strip_prefix(root).unwrap_or(&p);
874            dirs.push(rel.to_path_buf());
875            walk_dirs(root, &p, dirs);
876        }
877    }
878}
879
880/// Load the full stack of attribute rules for a normal repository (working tree).
881pub fn load_gitattributes_stack(
882    repo: &Repository,
883    work_tree: &Path,
884) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
885    let mut merged = ParsedGitAttributes::default();
886
887    if let Some(g) = global_attributes_path(repo)? {
888        if g.exists() {
889            if let Ok(content) = fs::read_to_string(&g) {
890                if content.len() <= MAX_ATTR_FILE_BYTES {
891                    let mut p =
892                        parse_gitattributes_file_content(&content, g.to_string_lossy().as_ref());
893                    merged.rules.append(&mut p.rules);
894                    merged.macros.defs.extend(p.macros.defs.drain());
895                    merged.warnings.append(&mut p.warnings);
896                } else {
897                    merged.warnings.push(format!(
898                        "warning: ignoring overly large gitattributes file '{}'",
899                        g.display()
900                    ));
901                }
902            }
903        }
904    }
905
906    let root_ga = work_tree.join(".gitattributes");
907    if let Some(content) =
908        read_gitattributes_maybe_symlink(&root_ga, ".gitattributes", &mut merged.warnings)
909    {
910        if content.len() <= MAX_ATTR_FILE_BYTES {
911            let mut p = parse_gitattributes_file_content(&content, ".gitattributes");
912            merged.rules.append(&mut p.rules);
913            merged.macros.defs.extend(p.macros.defs.drain());
914            merged.warnings.append(&mut p.warnings);
915        } else {
916            merged.warnings.push(
917                "warning: ignoring overly large gitattributes file '.gitattributes'".to_string(),
918            );
919        }
920    }
921
922    for rel in collect_nested_gitattributes_dirs(work_tree) {
923        let ga = work_tree.join(&rel).join(".gitattributes");
924        if let Some(content) = read_gitattributes_maybe_symlink(
925            &ga,
926            &format!("{}/.gitattributes", rel.display()),
927            &mut merged.warnings,
928        ) {
929            if content.len() > MAX_ATTR_FILE_BYTES {
930                merged.warnings.push(format!(
931                    "warning: ignoring overly large gitattributes file '{}'",
932                    ga.display()
933                ));
934                continue;
935            }
936            let prefix = rel.to_string_lossy().replace('\\', "/");
937            let mut p = parse_gitattributes_file_content_with_base(
938                &content,
939                &ga.to_string_lossy(),
940                &prefix,
941            );
942            merged.rules.append(&mut p.rules);
943            merged.macros.defs.extend(p.macros.defs.drain());
944            merged.warnings.append(&mut p.warnings);
945        }
946    }
947
948    let info = repo.git_dir.join("info/attributes");
949    if info.exists() {
950        if let Ok(content) = fs::read_to_string(&info) {
951            if content.len() <= MAX_ATTR_FILE_BYTES {
952                let mut p = parse_gitattributes_file_content(&content, "info/attributes");
953                merged.rules.append(&mut p.rules);
954                merged.macros.defs.extend(p.macros.defs.drain());
955                merged.warnings.append(&mut p.warnings);
956            }
957        }
958    }
959
960    Ok(merged)
961}
962
963/// Bare repository: only `info/attributes` from disk (no in-repo `.gitattributes` file).
964pub fn load_gitattributes_bare(
965    repo: &Repository,
966) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
967    let mut merged = ParsedGitAttributes::default();
968    if let Some(g) = global_attributes_path(repo)? {
969        if g.exists() {
970            if let Ok(content) = fs::read_to_string(&g) {
971                if content.len() <= MAX_ATTR_FILE_BYTES {
972                    let mut p =
973                        parse_gitattributes_file_content(&content, g.to_string_lossy().as_ref());
974                    merged.rules.append(&mut p.rules);
975                    merged.macros.defs.extend(p.macros.defs.drain());
976                    merged.warnings.append(&mut p.warnings);
977                }
978            }
979        }
980    }
981    let info = repo.git_dir.join("info/attributes");
982    if info.exists() {
983        if let Ok(content) = fs::read_to_string(&info) {
984            if content.len() <= MAX_ATTR_FILE_BYTES {
985                let mut p = parse_gitattributes_file_content(&content, "info/attributes");
986                merged.rules.append(&mut p.rules);
987                merged.macros.defs.extend(p.macros.defs.drain());
988                merged.warnings.append(&mut p.warnings);
989            }
990        }
991    }
992    Ok(merged)
993}
994
995/// Read `.gitattributes` blob from a tree object at `tree_oid`, recursively.
996pub fn load_gitattributes_from_tree(
997    odb: &Odb,
998    tree_oid: &ObjectId,
999) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
1000    let mut merged = ParsedGitAttributes::default();
1001    walk_tree_attrs(odb, tree_oid, "", &mut merged)?;
1002    Ok(merged)
1003}
1004
1005fn walk_tree_attrs(
1006    odb: &Odb,
1007    tree_oid: &ObjectId,
1008    prefix: &str,
1009    merged: &mut ParsedGitAttributes,
1010) -> std::result::Result<(), crate::error::Error> {
1011    let obj = odb.read(tree_oid)?;
1012    if obj.kind != ObjectKind::Tree {
1013        return Ok(());
1014    }
1015    let entries = parse_tree(&obj.data)?;
1016    for e in entries {
1017        let name = String::from_utf8_lossy(&e.name).to_string();
1018        let path = if prefix.is_empty() {
1019            name.clone()
1020        } else {
1021            format!("{prefix}/{name}")
1022        };
1023        match e.mode {
1024            0o040000 => {
1025                walk_tree_attrs(odb, &e.oid, &path, merged)?;
1026            }
1027            0o100644 | 0o100755 | 0o120000 => {
1028                if name == ".gitattributes" {
1029                    let oid = e.oid;
1030                    {
1031                        let blob = odb.read(&oid)?;
1032                        if blob.kind != ObjectKind::Blob {
1033                            continue;
1034                        }
1035                        if blob.data.len() > MAX_ATTR_FILE_BYTES {
1036                            merged.warnings.push("warning: ignoring overly large gitattributes blob '.gitattributes'".to_string());
1037                            continue;
1038                        }
1039                        let content = String::from_utf8_lossy(&blob.data).into_owned();
1040                        let display = format!("{path} (tree)");
1041                        let attr_base = Path::new(&path)
1042                            .parent()
1043                            .map(|p| p.to_string_lossy().replace('\\', "/"))
1044                            .unwrap_or_default();
1045                        let mut p =
1046                            parse_gitattributes_content_impl(&content, &display, true, &attr_base);
1047                        merged.rules.append(&mut p.rules);
1048                        merged.macros.defs.extend(p.macros.defs.drain());
1049                        merged.warnings.append(&mut p.warnings);
1050                    }
1051                }
1052            }
1053            _ => {}
1054        }
1055    }
1056    Ok(())
1057}
1058
1059/// Load merged `.gitattributes` rules for diff and merge (respects `GIT_ATTR_SOURCE` / `attr.tree`).
1060///
1061/// Resolution order matches Git's attribute source for diff: optional tree from
1062/// [`resolve_attr_treeish`], then work tree stack (or bare `info/attributes` only).
1063///
1064/// # Errors
1065///
1066/// Returns an error when a tree-ish source is set from the environment or command line and cannot
1067/// be resolved (Git: *"bad --attr-source or GIT_ATTR_SOURCE"*).
1068pub fn load_gitattributes_for_diff(
1069    repo: &Repository,
1070) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
1071    let (treeish, ignore_bad_tree) = resolve_attr_treeish(repo, None)?;
1072    if let Some(spec) = treeish.filter(|s| !s.is_empty()) {
1073        match resolve_tree_oid(repo, &spec) {
1074            Ok(oid) => return load_gitattributes_from_tree(&repo.odb, &oid),
1075            Err(_) if ignore_bad_tree => {}
1076            Err(_) => {
1077                return Err(crate::error::Error::InvalidRef(format!(
1078                    "bad --attr-source or GIT_ATTR_SOURCE: {spec}"
1079                )));
1080            }
1081        }
1082    }
1083    if let Some(wt) = repo.work_tree.as_deref() {
1084        return load_gitattributes_stack(repo, wt);
1085    }
1086    load_gitattributes_bare(repo)
1087}
1088
1089/// Resolve `attr.tree`, `GIT_ATTR_SOURCE`, `--source` precedence for check-attr.
1090///
1091/// The second return value is `ignore_bad_resolution`: when true (only for `attr.tree` from
1092/// config), an unresolvable tree-ish falls back to reading `.gitattributes` from the work tree
1093/// or index instead of erroring (matches Git `compute_default_attr_source`).
1094pub fn resolve_attr_treeish(
1095    repo: &Repository,
1096    source_arg: Option<&str>,
1097) -> std::result::Result<(Option<String>, bool), crate::error::Error> {
1098    let env_src = std::env::var("GIT_ATTR_SOURCE")
1099        .ok()
1100        .filter(|s| !s.is_empty());
1101    let config = ConfigSet::load(Some(&repo.git_dir), true)?;
1102    let cfg_tree = config.get("attr.tree");
1103    if let Some(s) = source_arg.map(|s| s.to_string()) {
1104        return Ok((Some(s), false));
1105    }
1106    if let Some(s) = env_src {
1107        return Ok((Some(s), false));
1108    }
1109    if let Some(s) = cfg_tree {
1110        return Ok((Some(s), true));
1111    }
1112    Ok((None, false))
1113}
1114
1115/// Parse a revision to a tree OID for attribute loading.
1116pub fn resolve_tree_oid(repo: &Repository, spec: &str) -> std::result::Result<ObjectId, String> {
1117    let oid = resolve_revision(repo, spec).map_err(|e| e.to_string())?;
1118    let obj = repo.read_replaced(&oid).map_err(|e| e.to_string())?;
1119    match obj.kind {
1120        ObjectKind::Commit => {
1121            let c = crate::objects::parse_commit(&obj.data).map_err(|e| e.to_string())?;
1122            Ok(c.tree)
1123        }
1124        ObjectKind::Tree => Ok(oid),
1125        _ => Err("revision is not a commit or tree".to_string()),
1126    }
1127}
1128
1129/// Load attributes from the index (stage 0) for `.gitattributes` paths only.
1130pub fn load_gitattributes_from_index(
1131    index: &Index,
1132    odb: &Odb,
1133    work_tree: &Path,
1134) -> std::result::Result<ParsedGitAttributes, crate::error::Error> {
1135    let mut merged = ParsedGitAttributes::default();
1136    let mut paths: Vec<Vec<u8>> = index
1137        .entries
1138        .iter()
1139        .filter(|e| e.stage() == 0 && e.path.ends_with(b".gitattributes"))
1140        .map(|e| e.path.clone())
1141        .collect();
1142    paths.sort();
1143    for path_bytes in paths {
1144        let Ok(rel) = std::str::from_utf8(&path_bytes) else {
1145            continue;
1146        };
1147        let Some(entry) = index.get(&path_bytes, 0) else {
1148            continue;
1149        };
1150        let obj = odb.read(&entry.oid)?;
1151        if obj.data.len() > MAX_ATTR_FILE_BYTES {
1152            merged.warnings.push(format!(
1153                "warning: ignoring overly large gitattributes blob '{}'",
1154                rel
1155            ));
1156            continue;
1157        }
1158        let content = String::from_utf8_lossy(&obj.data);
1159        let attr_base = Path::new(rel)
1160            .parent()
1161            .map(|p| p.to_string_lossy().replace('\\', "/"))
1162            .unwrap_or_default();
1163        let mut p = parse_gitattributes_content_impl(&content, rel, true, &attr_base);
1164        merged.rules.append(&mut p.rules);
1165        merged.macros.defs.extend(p.macros.defs.drain());
1166        merged.warnings.append(&mut p.warnings);
1167    }
1168    let _ = work_tree;
1169    Ok(merged)
1170}
1171
1172/// Return `builtin_objectmode` value for a path (working tree), or `None` if unavailable.
1173///
1174/// Submodule checkout directories (`.git` is a file containing `gitdir:`) report `160000`
1175/// like Git, not `040000`.
1176#[must_use]
1177pub fn builtin_objectmode_worktree(repo: &Repository, rel_path: &str) -> Option<String> {
1178    let wt = repo.work_tree.as_ref()?;
1179    let p = wt.join(rel_path);
1180    let meta = fs::symlink_metadata(&p).ok()?;
1181    let ft = meta.file_type();
1182    if ft.is_symlink() {
1183        return Some("120000".to_string());
1184    }
1185    if ft.is_dir() {
1186        let git = p.join(".git");
1187        if let Ok(git_meta) = fs::symlink_metadata(&git) {
1188            if !git_meta.file_type().is_dir() {
1189                if let Ok(content) = fs::read_to_string(&git) {
1190                    if content.starts_with("gitdir:") {
1191                        return Some("160000".to_string());
1192                    }
1193                }
1194            }
1195        }
1196        return Some("040000".to_string());
1197    }
1198    #[cfg(unix)]
1199    {
1200        use std::os::unix::fs::MetadataExt;
1201        let m = normalize_mode(meta.mode());
1202        Some(format!("{:06o}", m))
1203    }
1204    #[cfg(not(unix))]
1205    {
1206        let _ = repo;
1207        None
1208    }
1209}
1210
1211/// `builtin_objectmode` from the index when `--cached` is used.
1212#[must_use]
1213pub fn builtin_objectmode_index(index: &Index, rel_path: &str) -> Option<String> {
1214    let key = rel_path.as_bytes();
1215    let e = index.get(key, 0)?;
1216    let m = e.mode;
1217    if m == MODE_SYMLINK {
1218        return Some("120000".to_string());
1219    }
1220    if m == MODE_GITLINK {
1221        return Some("160000".to_string());
1222    }
1223    if m == MODE_TREE {
1224        return Some("040000".to_string());
1225    }
1226    if m == MODE_EXECUTABLE {
1227        return Some("100755".to_string());
1228    }
1229    if m == MODE_REGULAR {
1230        return Some("100644".to_string());
1231    }
1232    Some(format!("{:06o}", m))
1233}
1234
1235#[cfg(test)]
1236mod tests {
1237    use super::*;
1238
1239    #[test]
1240    fn d_yes_rule_clears_test_after_d_star() {
1241        let mut merged = ParsedGitAttributes::default();
1242        let root = parse_gitattributes_file_content("[attr]notest !test\n", ".gitattributes");
1243        merged.macros.defs.extend(root.macros.defs);
1244        let mut ab = parse_gitattributes_file_content_with_base(
1245            "h test=a/b/h\nd/* test=a/b/d/*\nd/yes notest\n",
1246            "a/b/.gitattributes",
1247            "a/b",
1248        );
1249        assert_eq!(ab.rules.len(), 3);
1250        merged.rules.append(&mut ab.rules);
1251        merged.macros.defs.extend(ab.macros.defs);
1252        let d_yes = merged
1253            .rules
1254            .iter()
1255            .find(|r| r.pattern == "d/yes")
1256            .expect("d/yes rule");
1257        assert!(attr_rule_matches(d_yes, "a/b/d/yes", false));
1258        let m = collect_attrs_for_path(&merged.rules, &merged.macros, "a/b/d/yes", false);
1259        assert!(
1260            m.get("test").is_none(),
1261            "expected test cleared by notest macro, got {:?}",
1262            m.get("test")
1263        );
1264    }
1265}