Skip to main content

git_lfs_git/
attr.rs

1//! `.gitattributes` parsing and matching.
2//!
3//! Backed by `gix-attributes` + `gix-glob`, which together implement Git's
4//! wildmatch + macro + per-directory precedence semantics. The "shell out
5//! to git, not gix" rule in CLAUDE.md applies to runtime git operations
6//! (rev-list, cat-file, etc.), not to using gix-* crates as parsing libs.
7//!
8//! Two construction modes:
9//!
10//! - [`AttrSet::from_workdir`] — discover and load every `.gitattributes`
11//!   in the working tree, plus `.git/info/attributes`. Per-directory files
12//!   take precedence over `info/attributes`; deeper directories win over
13//!   shallower (Git's standard "more specific wins").
14//! - [`AttrSet::from_buffer`] — load from a single in-memory buffer. For
15//!   tests and one-shot matching that doesn't need a workdir.
16//!
17//! Once built, query with [`AttrSet::value`] / [`AttrSet::is_set`], plus
18//! the LFS-specific helpers [`AttrSet::is_lfs_tracked`] /
19//! [`AttrSet::is_lockable`].
20
21use std::ffi::OsStr;
22use std::fs;
23use std::io;
24use std::path::{Path, PathBuf};
25
26use bstr::ByteSlice;
27use gix_attributes::{
28    Search, StateRef,
29    search::{MetadataCollection, Outcome},
30};
31use gix_glob::pattern::Case;
32
33/// A queryable set of `.gitattributes` patterns.
34pub struct AttrSet {
35    search: Search,
36    collection: MetadataCollection,
37}
38
39impl AttrSet {
40    /// Empty set, seeded only with Git's built-in `[attr]binary` macro
41    /// (so patterns referencing `binary` resolve correctly).
42    pub fn empty() -> Self {
43        let mut collection = MetadataCollection::default();
44        let mut search = Search::default();
45        search.add_patterns_buffer(
46            b"[attr]binary -diff -merge -text",
47            "[builtin]".into(),
48            None,
49            &mut collection,
50            true,
51        );
52        Self { search, collection }
53    }
54
55    /// Build from a single `.gitattributes`-format buffer.
56    pub fn from_buffer(bytes: &[u8]) -> Self {
57        let mut me = Self::empty();
58        me.search
59            .add_patterns_buffer(bytes, "<memory>".into(), None, &mut me.collection, true);
60        me
61    }
62
63    /// Add a `.gitattributes` buffer that should match paths under
64    /// `dir` (forward-slash separated, no trailing slash, `""` for the
65    /// repo root). For per-commit evaluation during streaming
66    /// rewrites where the on-disk working tree isn't authoritative.
67    /// Order of calls matters — gix-attributes iterates lists in
68    /// reverse, so deeper directories should be added *after*
69    /// shallower ones to win precedence (matching Git's "more
70    /// specific path overrides shallower" semantics).
71    pub fn add_buffer_at(&mut self, bytes: &[u8], dir: &str) {
72        let virtual_root = std::path::PathBuf::from("/__lfs_virt");
73        let source = if dir.is_empty() {
74            virtual_root.join(".gitattributes")
75        } else {
76            virtual_root.join(dir).join(".gitattributes")
77        };
78        self.search.add_patterns_buffer(
79            bytes,
80            source,
81            Some(&virtual_root),
82            &mut self.collection,
83            true,
84        );
85    }
86
87    /// Discover every `.gitattributes` reachable from `repo_root` (skipping
88    /// the `.git/` directory) and load them along with `.git/info/attributes`
89    /// if it exists.
90    pub fn from_workdir(repo_root: &Path) -> io::Result<Self> {
91        let mut me = Self::empty();
92        let mut buf = Vec::new();
93
94        let info = repo_root.join(".git").join("info").join("attributes");
95        if info.exists() {
96            me.search
97                .add_patterns_file(info, true, None, &mut buf, &mut me.collection, true)?;
98        }
99
100        let mut found = Vec::new();
101        walk_for_gitattributes(repo_root, &mut found)?;
102        // Shallow → deep: gix-attributes iterates pattern lists in reverse
103        // when matching, so the last-added (deepest) wins — matching Git's
104        // "more specific path overrides shallower" semantics.
105        found.sort_by_key(|p| p.components().count());
106        for path in found {
107            // `root` is always the repo root. gix-glob computes each file's
108            // relative `base` by stripping the repo-root prefix from
109            // `source.parent()` — so root.gitattributes ends up with no base
110            // (matches paths directly) while sub/.gitattributes ends up with
111            // base `sub/` (strips `sub/` before matching).
112            me.search.add_patterns_file(
113                path,
114                true,
115                Some(repo_root),
116                &mut buf,
117                &mut me.collection,
118                true,
119            )?;
120        }
121        Ok(me)
122    }
123
124    /// Return the resolved value of `attr` for `path` (relative to the
125    /// repo root, with `/` separators). `None` for unspecified or unset.
126    /// `Set`/`Value(v)` map to `Some("true")` / `Some(v)`.
127    pub fn value(&self, path: &str, attr: &str) -> Option<String> {
128        let mut out = Outcome::default();
129        out.initialize_with_selection(&self.collection, [attr]);
130        self.search
131            .pattern_matching_relative_path(path.into(), Case::Sensitive, None, &mut out);
132        for m in out.iter_selected() {
133            if m.assignment.name.as_str() != attr {
134                continue;
135            }
136            return match m.assignment.state {
137                StateRef::Set => Some("true".into()),
138                StateRef::Value(v) => Some(v.as_bstr().to_str_lossy().into_owned()),
139                StateRef::Unset | StateRef::Unspecified => None,
140            };
141        }
142        None
143    }
144
145    /// True iff `attr` is set for `path` — that is, `attr` or `attr=<v>`
146    /// where `v` is anything other than the literal `"false"`.
147    pub fn is_set(&self, path: &str, attr: &str) -> bool {
148        matches!(self.value(path, attr).as_deref(), Some(v) if v != "false")
149    }
150
151    /// True iff `path` matches a `filter=lfs` line.
152    pub fn is_lfs_tracked(&self, path: &str) -> bool {
153        self.value(path, "filter").as_deref() == Some("lfs")
154    }
155
156    /// True iff `path` matches a `lockable` line.
157    pub fn is_lockable(&self, path: &str) -> bool {
158        self.is_set(path, "lockable")
159    }
160}
161
162/// A single LFS-related pattern line discovered while listing.
163#[derive(Debug, Clone, PartialEq, Eq)]
164pub struct PatternEntry {
165    /// The pattern text exactly as it appears in the file (with any
166    /// surrounding `"..."` quotes stripped).
167    pub pattern: String,
168    /// Path of the `.gitattributes` (or `.git/info/attributes`) file the
169    /// pattern was found in, relative to the repo root and with `/`
170    /// separators.
171    pub source: String,
172    /// True if the line establishes LFS tracking (`filter=lfs`); false if
173    /// it explicitly removes / unspecifies the filter (`-filter`,
174    /// `!filter`, `-filter=...`).
175    pub tracked: bool,
176    /// True if the same line carries the `lockable` attribute (in `set`
177    /// form — `lockable=false` is treated as not lockable).
178    pub lockable: bool,
179}
180
181/// All LFS-related patterns visible in a workdir, in load order
182/// (`.git/info/attributes` first, then `.gitattributes` from shallow to
183/// deep).
184#[derive(Debug, Default, PartialEq, Eq)]
185pub struct PatternListing {
186    pub patterns: Vec<PatternEntry>,
187}
188
189impl PatternListing {
190    /// Lines that establish LFS tracking (`filter=lfs`).
191    pub fn tracked(&self) -> impl Iterator<Item = &PatternEntry> {
192        self.patterns.iter().filter(|p| p.tracked)
193    }
194
195    /// Lines that explicitly remove / unspecify the LFS filter.
196    pub fn excluded(&self) -> impl Iterator<Item = &PatternEntry> {
197        self.patterns.iter().filter(|p| !p.tracked)
198    }
199}
200
201/// Walk `.gitattributes` across the workdir plus `.git/info/attributes`
202/// and the user's `core.attributesfile` (if configured), extracting
203/// LFS-related pattern lines for `git lfs track`'s listing mode.
204///
205/// Pattern matching is *not* needed here — we're just enumerating the raw
206/// pattern text per source file — so this uses a simple line tokenizer
207/// rather than [`AttrSet`]'s full wildmatch machinery.
208pub fn list_lfs_patterns(repo_root: &Path) -> io::Result<PatternListing> {
209    let mut listing = PatternListing::default();
210
211    // The user-level attributes file (`core.attributesfile`, default
212    // `~/.config/git/attributes`). Looked up before `.git/info/attributes`
213    // and the per-tree files so it shows up first in the listing —
214    // upstream lists global → repo-local → per-dir.
215    if let Ok(Some(path)) = crate::config::get_effective(repo_root, "core.attributesfile") {
216        let expanded = expand_tilde(&path);
217        if let Ok(bytes) = fs::read(&expanded) {
218            scan_attr_lines(&bytes, &path, &mut listing);
219        }
220    }
221
222    let info = repo_root.join(".git").join("info").join("attributes");
223    if info.exists() {
224        let bytes = fs::read(&info)?;
225        scan_attr_lines(&bytes, ".git/info/attributes", &mut listing);
226    }
227
228    let mut found = Vec::new();
229    walk_for_gitattributes(repo_root, &mut found)?;
230    found.sort_by_key(|p| p.components().count());
231    for path in found {
232        let bytes = fs::read(&path)?;
233        let rel = path
234            .strip_prefix(repo_root)
235            .unwrap_or(&path)
236            .to_string_lossy()
237            .replace('\\', "/");
238        scan_attr_lines(&bytes, &rel, &mut listing);
239    }
240    Ok(listing)
241}
242
243/// Resolve a leading `~` / `~/` to the user's home directory. Git's
244/// `core.attributesfile` accepts both forms, but Rust's `Path` doesn't
245/// expand them itself.
246fn expand_tilde(path: &str) -> PathBuf {
247    if let Some(rest) = path.strip_prefix("~/") {
248        if let Some(home) = std::env::var_os("HOME") {
249            return PathBuf::from(home).join(rest);
250        }
251    } else if path == "~"
252        && let Some(home) = std::env::var_os("HOME")
253    {
254        return PathBuf::from(home);
255    }
256    PathBuf::from(path)
257}
258
259fn scan_attr_lines(bytes: &[u8], source: &str, listing: &mut PatternListing) {
260    for raw in bytes.split(|&b| b == b'\n') {
261        let line = String::from_utf8_lossy(raw);
262        // Per `gitattributes(5)`, `#` only starts a comment when it's
263        // the first non-whitespace character on the line — patterns like
264        // `\#` are valid and must not be cropped here.
265        let body = line.trim();
266        if body.is_empty() || body.starts_with('#') || body.starts_with("[attr]") {
267            continue;
268        }
269        let mut tokens = body.split_whitespace();
270        let Some(pattern) = tokens.next() else {
271            continue;
272        };
273        let mut filter: Option<bool> = None;
274        let mut lockable = false;
275        for tok in tokens {
276            if tok == "filter=lfs" {
277                filter = Some(true);
278            } else if tok == "-filter" || tok == "!filter" || tok.starts_with("-filter=") {
279                filter = Some(false);
280            } else if tok == "lockable" {
281                lockable = true;
282            }
283        }
284        if let Some(tracked) = filter {
285            listing.patterns.push(PatternEntry {
286                pattern: pattern.to_owned(),
287                source: source.to_owned(),
288                tracked,
289                lockable,
290            });
291        }
292    }
293}
294
295fn walk_for_gitattributes(dir: &Path, out: &mut Vec<PathBuf>) -> io::Result<()> {
296    for entry in fs::read_dir(dir)? {
297        let entry = entry?;
298        let ft = entry.file_type()?;
299        let name = entry.file_name();
300        if name == OsStr::new(".git") {
301            continue;
302        }
303        let path = entry.path();
304        if ft.is_dir() {
305            walk_for_gitattributes(&path, out)?;
306        } else if ft.is_file() && name == OsStr::new(".gitattributes") {
307            out.push(path);
308        }
309    }
310    Ok(())
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316    use tempfile::TempDir;
317
318    #[test]
319    fn empty_set_has_no_matches() {
320        let s = AttrSet::empty();
321        assert_eq!(s.value("foo.txt", "filter"), None);
322        assert!(!s.is_lfs_tracked("foo.txt"));
323        assert!(!s.is_lockable("foo.txt"));
324    }
325
326    #[test]
327    fn buffer_basename_match() {
328        let s = AttrSet::from_buffer(b"*.bin filter=lfs diff=lfs merge=lfs -text\n");
329        assert!(s.is_lfs_tracked("foo.bin"));
330        assert!(s.is_lfs_tracked("nested/dir/foo.bin"));
331        assert!(!s.is_lfs_tracked("foo.txt"));
332    }
333
334    #[test]
335    fn value_returns_raw_string() {
336        let s = AttrSet::from_buffer(b"*.txt eol=lf\n");
337        assert_eq!(s.value("a.txt", "eol").as_deref(), Some("lf"));
338    }
339
340    #[test]
341    fn unset_attribute_via_dash_prefix() {
342        let s = AttrSet::from_buffer(
343            b"*.txt filter=lfs\n\
344              special.txt -filter\n",
345        );
346        assert!(s.is_lfs_tracked("a.txt"));
347        // `special.txt -filter` removes the filter attribute → value is None.
348        assert_eq!(s.value("special.txt", "filter"), None);
349        assert!(!s.is_lfs_tracked("special.txt"));
350    }
351
352    #[test]
353    fn lockable_set_form() {
354        let s = AttrSet::from_buffer(b"*.psd lockable\n");
355        assert!(s.is_lockable("art/cover.psd"));
356        assert!(!s.is_lockable("readme.txt"));
357    }
358
359    #[test]
360    fn is_set_treats_false_value_as_unset() {
361        let s = AttrSet::from_buffer(
362            b"truthy lockable\n\
363              falsy  lockable=false\n",
364        );
365        assert!(s.is_set("truthy", "lockable"));
366        assert!(!s.is_set("falsy", "lockable"));
367    }
368
369    #[test]
370    fn rooted_pattern_only_matches_top_level() {
371        let s = AttrSet::from_buffer(b"/top.bin filter=lfs\n");
372        assert!(s.is_lfs_tracked("top.bin"));
373        assert!(!s.is_lfs_tracked("nested/top.bin"));
374    }
375
376    #[test]
377    fn workdir_loads_root_gitattributes() {
378        let tmp = TempDir::new().unwrap();
379        std::fs::create_dir_all(tmp.path().join(".git/info")).unwrap();
380        std::fs::write(
381            tmp.path().join(".gitattributes"),
382            "*.bin filter=lfs diff=lfs merge=lfs -text\n",
383        )
384        .unwrap();
385
386        let s = AttrSet::from_workdir(tmp.path()).unwrap();
387        assert!(s.is_lfs_tracked("a.bin"));
388        assert!(s.is_lfs_tracked("sub/a.bin"));
389    }
390
391    #[test]
392    fn deeper_gitattributes_overrides_root() {
393        let tmp = TempDir::new().unwrap();
394        std::fs::create_dir_all(tmp.path().join("sub/.git_placeholder")).unwrap();
395        std::fs::write(tmp.path().join(".gitattributes"), "*.bin filter=lfs\n").unwrap();
396        std::fs::write(tmp.path().join("sub/.gitattributes"), "*.bin -filter\n").unwrap();
397
398        let s = AttrSet::from_workdir(tmp.path()).unwrap();
399        assert!(s.is_lfs_tracked("a.bin"));
400        // Deeper -filter wins for paths within sub/.
401        assert!(!s.is_lfs_tracked("sub/a.bin"));
402    }
403
404    #[test]
405    fn info_attributes_loaded_from_dotgit() {
406        let tmp = TempDir::new().unwrap();
407        std::fs::create_dir_all(tmp.path().join(".git/info")).unwrap();
408        std::fs::write(
409            tmp.path().join(".git/info/attributes"),
410            "*.bin filter=lfs\n",
411        )
412        .unwrap();
413
414        let s = AttrSet::from_workdir(tmp.path()).unwrap();
415        assert!(s.is_lfs_tracked("a.bin"));
416    }
417
418    #[test]
419    fn list_lfs_patterns_recursive() {
420        // Mirror upstream t-track.sh's "track" test fixture: root
421        // .gitattributes + .git/info/attributes + nested per-directory
422        // files, with one nested dir adding `-filter` exclusions.
423        let tmp = TempDir::new().unwrap();
424        std::fs::create_dir_all(tmp.path().join(".git/info")).unwrap();
425        std::fs::create_dir_all(tmp.path().join("a/b")).unwrap();
426        std::fs::write(
427            tmp.path().join(".gitattributes"),
428            "* text=auto\n\
429             *.jpg filter=lfs diff=lfs merge=lfs -text\n",
430        )
431        .unwrap();
432        std::fs::write(
433            tmp.path().join(".git/info/attributes"),
434            "*.mov filter=lfs -text\n",
435        )
436        .unwrap();
437        std::fs::write(
438            tmp.path().join("a/.gitattributes"),
439            "*.gif filter=lfs -text\n",
440        )
441        .unwrap();
442        std::fs::write(
443            tmp.path().join("a/b/.gitattributes"),
444            "*.png filter=lfs -text\n\
445             *.gif -filter -text\n\
446             *.mov -filter=lfs -text\n",
447        )
448        .unwrap();
449
450        let listing = list_lfs_patterns(tmp.path()).unwrap();
451        let tracked: Vec<(&str, &str)> = listing
452            .tracked()
453            .map(|p| (p.pattern.as_str(), p.source.as_str()))
454            .collect();
455        let excluded: Vec<(&str, &str)> = listing
456            .excluded()
457            .map(|p| (p.pattern.as_str(), p.source.as_str()))
458            .collect();
459
460        // info/attributes is loaded first, then root → deepest .gitattributes.
461        assert_eq!(
462            tracked,
463            vec![
464                ("*.mov", ".git/info/attributes"),
465                ("*.jpg", ".gitattributes"),
466                ("*.gif", "a/.gitattributes"),
467                ("*.png", "a/b/.gitattributes"),
468            ]
469        );
470        assert_eq!(
471            excluded,
472            vec![
473                ("*.gif", "a/b/.gitattributes"),
474                ("*.mov", "a/b/.gitattributes"),
475            ]
476        );
477    }
478
479    #[test]
480    fn list_lfs_patterns_skips_macros_and_comments() {
481        let tmp = TempDir::new().unwrap();
482        std::fs::write(
483            tmp.path().join(".gitattributes"),
484            "[attr]binary -diff -merge -text\n\
485             # *.jpg filter=lfs\n\
486             *.bin filter=lfs -text\n",
487        )
488        .unwrap();
489        let listing = list_lfs_patterns(tmp.path()).unwrap();
490        let tracked: Vec<&PatternEntry> = listing.tracked().collect();
491        assert_eq!(tracked.len(), 1);
492        assert_eq!(tracked[0].pattern, "*.bin");
493    }
494
495    #[test]
496    fn list_picks_up_lockable_attribute() {
497        let tmp = TempDir::new().unwrap();
498        std::fs::write(
499            tmp.path().join(".gitattributes"),
500            "*.psd filter=lfs diff=lfs merge=lfs lockable\n\
501             *.bin filter=lfs diff=lfs merge=lfs\n",
502        )
503        .unwrap();
504        let listing = list_lfs_patterns(tmp.path()).unwrap();
505        assert_eq!(listing.patterns.len(), 2);
506        assert_eq!(listing.patterns[0].pattern, "*.psd");
507        assert!(listing.patterns[0].lockable);
508        assert_eq!(listing.patterns[1].pattern, "*.bin");
509        assert!(!listing.patterns[1].lockable);
510    }
511
512    #[test]
513    fn bang_filter_treated_as_excluded() {
514        let tmp = TempDir::new().unwrap();
515        std::fs::write(
516            tmp.path().join(".gitattributes"),
517            "*.dat filter=lfs\n\
518             a.dat !filter\n",
519        )
520        .unwrap();
521        let listing = list_lfs_patterns(tmp.path()).unwrap();
522        assert_eq!(listing.patterns.len(), 2);
523        assert!(listing.patterns[0].tracked);
524        assert_eq!(listing.patterns[1].pattern, "a.dat");
525        assert!(!listing.patterns[1].tracked);
526    }
527
528    #[test]
529    fn workdir_skips_dotgit_directory() {
530        // A .gitattributes inside .git/ must NOT be picked up — only
531        // .git/info/attributes is, and it's loaded explicitly above.
532        let tmp = TempDir::new().unwrap();
533        std::fs::create_dir_all(tmp.path().join(".git")).unwrap();
534        std::fs::write(tmp.path().join(".git/.gitattributes"), "*.bin filter=lfs\n").unwrap();
535
536        let s = AttrSet::from_workdir(tmp.path()).unwrap();
537        assert!(!s.is_lfs_tracked("a.bin"));
538    }
539}