Skip to main content

git_lfs_git/
attr.rs

1//! `.gitattributes` parsing and matching.
2//!
3//! Backed by `gix-attributes` + `gix-glob`, which together implement Git's
4//! wildmatch + macro + per-directory precedence semantics. The "shell out
5//! to git, not gix" rule in CLAUDE.md applies to runtime git operations
6//! (rev-list, cat-file, etc.), not to using gix-* crates as parsing libs.
7//!
8//! Two construction modes:
9//!
10//! - [`AttrSet::from_workdir`] — discover and load every `.gitattributes`
11//!   in the working tree, plus `.git/info/attributes`. Per-directory files
12//!   take precedence over `info/attributes`; deeper directories win over
13//!   shallower (Git's standard "more specific wins").
14//! - [`AttrSet::from_buffer`] — load from a single in-memory buffer. For
15//!   tests and one-shot matching that doesn't need a workdir.
16//!
17//! Once built, query with [`AttrSet::value`] / [`AttrSet::is_set`], plus
18//! the LFS-specific helpers [`AttrSet::is_lfs_tracked`] /
19//! [`AttrSet::is_lockable`].
20
21use std::ffi::OsStr;
22use std::fs;
23use std::io;
24use std::path::{Path, PathBuf};
25
26use bstr::ByteSlice;
27use gix_attributes::{
28    Search, StateRef,
29    search::{MetadataCollection, Outcome},
30};
31use gix_glob::pattern::Case;
32
33/// A queryable set of `.gitattributes` patterns.
34pub struct AttrSet {
35    search: Search,
36    collection: MetadataCollection,
37}
38
39impl AttrSet {
40    /// Empty set, seeded only with Git's built-in `[attr]binary` macro
41    /// (so patterns referencing `binary` resolve correctly).
42    pub fn empty() -> Self {
43        let mut collection = MetadataCollection::default();
44        let mut search = Search::default();
45        search.add_patterns_buffer(
46            b"[attr]binary -diff -merge -text",
47            "[builtin]".into(),
48            None,
49            &mut collection,
50            true,
51        );
52        Self { search, collection }
53    }
54
55    /// Build from a single `.gitattributes`-format buffer.
56    pub fn from_buffer(bytes: &[u8]) -> Self {
57        let mut me = Self::empty();
58        me.search.add_patterns_buffer(
59            bytes,
60            "<memory>".into(),
61            None,
62            &mut me.collection,
63            true,
64        );
65        me
66    }
67
68    /// Discover every `.gitattributes` reachable from `repo_root` (skipping
69    /// the `.git/` directory) and load them along with `.git/info/attributes`
70    /// if it exists.
71    pub fn from_workdir(repo_root: &Path) -> io::Result<Self> {
72        let mut me = Self::empty();
73        let mut buf = Vec::new();
74
75        let info = repo_root.join(".git").join("info").join("attributes");
76        if info.exists() {
77            me.search.add_patterns_file(
78                info,
79                true,
80                None,
81                &mut buf,
82                &mut me.collection,
83                true,
84            )?;
85        }
86
87        let mut found = Vec::new();
88        walk_for_gitattributes(repo_root, &mut found)?;
89        // Shallow → deep: gix-attributes iterates pattern lists in reverse
90        // when matching, so the last-added (deepest) wins — matching Git's
91        // "more specific path overrides shallower" semantics.
92        found.sort_by_key(|p| p.components().count());
93        for path in found {
94            // `root` is always the repo root. gix-glob computes each file's
95            // relative `base` by stripping the repo-root prefix from
96            // `source.parent()` — so root.gitattributes ends up with no base
97            // (matches paths directly) while sub/.gitattributes ends up with
98            // base `sub/` (strips `sub/` before matching).
99            me.search.add_patterns_file(
100                path,
101                true,
102                Some(repo_root),
103                &mut buf,
104                &mut me.collection,
105                true,
106            )?;
107        }
108        Ok(me)
109    }
110
111    /// Return the resolved value of `attr` for `path` (relative to the
112    /// repo root, with `/` separators). `None` for unspecified or unset.
113    /// `Set`/`Value(v)` map to `Some("true")` / `Some(v)`.
114    pub fn value(&self, path: &str, attr: &str) -> Option<String> {
115        let mut out = Outcome::default();
116        out.initialize_with_selection(&self.collection, [attr]);
117        self.search.pattern_matching_relative_path(
118            path.into(),
119            Case::Sensitive,
120            None,
121            &mut out,
122        );
123        for m in out.iter_selected() {
124            if m.assignment.name.as_str() != attr {
125                continue;
126            }
127            return match m.assignment.state {
128                StateRef::Set => Some("true".into()),
129                StateRef::Value(v) => Some(v.as_bstr().to_str_lossy().into_owned()),
130                StateRef::Unset | StateRef::Unspecified => None,
131            };
132        }
133        None
134    }
135
136    /// True iff `attr` is set for `path` — that is, `attr` or `attr=<v>`
137    /// where `v` is anything other than the literal `"false"`.
138    pub fn is_set(&self, path: &str, attr: &str) -> bool {
139        matches!(self.value(path, attr).as_deref(), Some(v) if v != "false")
140    }
141
142    /// True iff `path` matches a `filter=lfs` line.
143    pub fn is_lfs_tracked(&self, path: &str) -> bool {
144        self.value(path, "filter").as_deref() == Some("lfs")
145    }
146
147    /// True iff `path` matches a `lockable` line.
148    pub fn is_lockable(&self, path: &str) -> bool {
149        self.is_set(path, "lockable")
150    }
151}
152
153/// A single LFS-related pattern line discovered while listing.
154#[derive(Debug, Clone, PartialEq, Eq)]
155pub struct PatternEntry {
156    /// The pattern text exactly as it appears in the file (with any
157    /// surrounding `"..."` quotes stripped).
158    pub pattern: String,
159    /// Path of the `.gitattributes` (or `.git/info/attributes`) file the
160    /// pattern was found in, relative to the repo root and with `/`
161    /// separators.
162    pub source: String,
163    /// True if the line establishes LFS tracking (`filter=lfs`); false if
164    /// it explicitly removes / unspecifies the filter (`-filter`,
165    /// `!filter`, `-filter=...`).
166    pub tracked: bool,
167    /// True if the same line carries the `lockable` attribute (in `set`
168    /// form — `lockable=false` is treated as not lockable).
169    pub lockable: bool,
170}
171
172/// All LFS-related patterns visible in a workdir, in load order
173/// (`.git/info/attributes` first, then `.gitattributes` from shallow to
174/// deep).
175#[derive(Debug, Default, PartialEq, Eq)]
176pub struct PatternListing {
177    pub patterns: Vec<PatternEntry>,
178}
179
180impl PatternListing {
181    /// Lines that establish LFS tracking (`filter=lfs`).
182    pub fn tracked(&self) -> impl Iterator<Item = &PatternEntry> {
183        self.patterns.iter().filter(|p| p.tracked)
184    }
185
186    /// Lines that explicitly remove / unspecify the LFS filter.
187    pub fn excluded(&self) -> impl Iterator<Item = &PatternEntry> {
188        self.patterns.iter().filter(|p| !p.tracked)
189    }
190}
191
192/// Walk `.gitattributes` across the workdir plus `.git/info/attributes`,
193/// extracting LFS-related pattern lines for `git lfs track`'s listing mode.
194///
195/// Pattern matching is *not* needed here — we're just enumerating the raw
196/// pattern text per source file — so this uses a simple line tokenizer
197/// rather than [`AttrSet`]'s full wildmatch machinery.
198pub fn list_lfs_patterns(repo_root: &Path) -> io::Result<PatternListing> {
199    let mut listing = PatternListing::default();
200
201    let info = repo_root.join(".git").join("info").join("attributes");
202    if info.exists() {
203        let bytes = fs::read(&info)?;
204        scan_attr_lines(&bytes, ".git/info/attributes", &mut listing);
205    }
206
207    let mut found = Vec::new();
208    walk_for_gitattributes(repo_root, &mut found)?;
209    found.sort_by_key(|p| p.components().count());
210    for path in found {
211        let bytes = fs::read(&path)?;
212        let rel = path
213            .strip_prefix(repo_root)
214            .unwrap_or(&path)
215            .to_string_lossy()
216            .replace('\\', "/");
217        scan_attr_lines(&bytes, &rel, &mut listing);
218    }
219    Ok(listing)
220}
221
222fn scan_attr_lines(bytes: &[u8], source: &str, listing: &mut PatternListing) {
223    for raw in bytes.split(|&b| b == b'\n') {
224        let line = String::from_utf8_lossy(raw);
225        // Per `gitattributes(5)`, `#` only starts a comment when it's
226        // the first non-whitespace character on the line — patterns like
227        // `\#` are valid and must not be cropped here.
228        let body = line.trim();
229        if body.is_empty() || body.starts_with('#') || body.starts_with("[attr]") {
230            continue;
231        }
232        let mut tokens = body.split_whitespace();
233        let Some(pattern) = tokens.next() else {
234            continue;
235        };
236        let mut filter: Option<bool> = None;
237        let mut lockable = false;
238        for tok in tokens {
239            if tok == "filter=lfs" {
240                filter = Some(true);
241            } else if tok == "-filter"
242                || tok == "!filter"
243                || tok.starts_with("-filter=")
244            {
245                filter = Some(false);
246            } else if tok == "lockable" {
247                lockable = true;
248            }
249        }
250        if let Some(tracked) = filter {
251            listing.patterns.push(PatternEntry {
252                pattern: pattern.to_owned(),
253                source: source.to_owned(),
254                tracked,
255                lockable,
256            });
257        }
258    }
259}
260
261fn walk_for_gitattributes(dir: &Path, out: &mut Vec<PathBuf>) -> io::Result<()> {
262    for entry in fs::read_dir(dir)? {
263        let entry = entry?;
264        let ft = entry.file_type()?;
265        let name = entry.file_name();
266        if name == OsStr::new(".git") {
267            continue;
268        }
269        let path = entry.path();
270        if ft.is_dir() {
271            walk_for_gitattributes(&path, out)?;
272        } else if ft.is_file() && name == OsStr::new(".gitattributes") {
273            out.push(path);
274        }
275    }
276    Ok(())
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282    use tempfile::TempDir;
283
284    #[test]
285    fn empty_set_has_no_matches() {
286        let s = AttrSet::empty();
287        assert_eq!(s.value("foo.txt", "filter"), None);
288        assert!(!s.is_lfs_tracked("foo.txt"));
289        assert!(!s.is_lockable("foo.txt"));
290    }
291
292    #[test]
293    fn buffer_basename_match() {
294        let s = AttrSet::from_buffer(b"*.bin filter=lfs diff=lfs merge=lfs -text\n");
295        assert!(s.is_lfs_tracked("foo.bin"));
296        assert!(s.is_lfs_tracked("nested/dir/foo.bin"));
297        assert!(!s.is_lfs_tracked("foo.txt"));
298    }
299
300    #[test]
301    fn value_returns_raw_string() {
302        let s = AttrSet::from_buffer(b"*.txt eol=lf\n");
303        assert_eq!(s.value("a.txt", "eol").as_deref(), Some("lf"));
304    }
305
306    #[test]
307    fn unset_attribute_via_dash_prefix() {
308        let s = AttrSet::from_buffer(
309            b"*.txt filter=lfs\n\
310              special.txt -filter\n",
311        );
312        assert!(s.is_lfs_tracked("a.txt"));
313        // `special.txt -filter` removes the filter attribute → value is None.
314        assert_eq!(s.value("special.txt", "filter"), None);
315        assert!(!s.is_lfs_tracked("special.txt"));
316    }
317
318    #[test]
319    fn lockable_set_form() {
320        let s = AttrSet::from_buffer(b"*.psd lockable\n");
321        assert!(s.is_lockable("art/cover.psd"));
322        assert!(!s.is_lockable("readme.txt"));
323    }
324
325    #[test]
326    fn is_set_treats_false_value_as_unset() {
327        let s = AttrSet::from_buffer(
328            b"truthy lockable\n\
329              falsy  lockable=false\n",
330        );
331        assert!(s.is_set("truthy", "lockable"));
332        assert!(!s.is_set("falsy", "lockable"));
333    }
334
335    #[test]
336    fn rooted_pattern_only_matches_top_level() {
337        let s = AttrSet::from_buffer(b"/top.bin filter=lfs\n");
338        assert!(s.is_lfs_tracked("top.bin"));
339        assert!(!s.is_lfs_tracked("nested/top.bin"));
340    }
341
342    #[test]
343    fn workdir_loads_root_gitattributes() {
344        let tmp = TempDir::new().unwrap();
345        std::fs::create_dir_all(tmp.path().join(".git/info")).unwrap();
346        std::fs::write(
347            tmp.path().join(".gitattributes"),
348            "*.bin filter=lfs diff=lfs merge=lfs -text\n",
349        )
350        .unwrap();
351
352        let s = AttrSet::from_workdir(tmp.path()).unwrap();
353        assert!(s.is_lfs_tracked("a.bin"));
354        assert!(s.is_lfs_tracked("sub/a.bin"));
355    }
356
357    #[test]
358    fn deeper_gitattributes_overrides_root() {
359        let tmp = TempDir::new().unwrap();
360        std::fs::create_dir_all(tmp.path().join("sub/.git_placeholder")).unwrap();
361        std::fs::write(
362            tmp.path().join(".gitattributes"),
363            "*.bin filter=lfs\n",
364        )
365        .unwrap();
366        std::fs::write(
367            tmp.path().join("sub/.gitattributes"),
368            "*.bin -filter\n",
369        )
370        .unwrap();
371
372        let s = AttrSet::from_workdir(tmp.path()).unwrap();
373        assert!(s.is_lfs_tracked("a.bin"));
374        // Deeper -filter wins for paths within sub/.
375        assert!(!s.is_lfs_tracked("sub/a.bin"));
376    }
377
378    #[test]
379    fn info_attributes_loaded_from_dotgit() {
380        let tmp = TempDir::new().unwrap();
381        std::fs::create_dir_all(tmp.path().join(".git/info")).unwrap();
382        std::fs::write(
383            tmp.path().join(".git/info/attributes"),
384            "*.bin filter=lfs\n",
385        )
386        .unwrap();
387
388        let s = AttrSet::from_workdir(tmp.path()).unwrap();
389        assert!(s.is_lfs_tracked("a.bin"));
390    }
391
392    #[test]
393    fn list_lfs_patterns_recursive() {
394        // Mirror upstream t-track.sh's "track" test fixture: root
395        // .gitattributes + .git/info/attributes + nested per-directory
396        // files, with one nested dir adding `-filter` exclusions.
397        let tmp = TempDir::new().unwrap();
398        std::fs::create_dir_all(tmp.path().join(".git/info")).unwrap();
399        std::fs::create_dir_all(tmp.path().join("a/b")).unwrap();
400        std::fs::write(
401            tmp.path().join(".gitattributes"),
402            "* text=auto\n\
403             *.jpg filter=lfs diff=lfs merge=lfs -text\n",
404        )
405        .unwrap();
406        std::fs::write(
407            tmp.path().join(".git/info/attributes"),
408            "*.mov filter=lfs -text\n",
409        )
410        .unwrap();
411        std::fs::write(
412            tmp.path().join("a/.gitattributes"),
413            "*.gif filter=lfs -text\n",
414        )
415        .unwrap();
416        std::fs::write(
417            tmp.path().join("a/b/.gitattributes"),
418            "*.png filter=lfs -text\n\
419             *.gif -filter -text\n\
420             *.mov -filter=lfs -text\n",
421        )
422        .unwrap();
423
424        let listing = list_lfs_patterns(tmp.path()).unwrap();
425        let tracked: Vec<(&str, &str)> = listing
426            .tracked()
427            .map(|p| (p.pattern.as_str(), p.source.as_str()))
428            .collect();
429        let excluded: Vec<(&str, &str)> = listing
430            .excluded()
431            .map(|p| (p.pattern.as_str(), p.source.as_str()))
432            .collect();
433
434        // info/attributes is loaded first, then root → deepest .gitattributes.
435        assert_eq!(
436            tracked,
437            vec![
438                ("*.mov", ".git/info/attributes"),
439                ("*.jpg", ".gitattributes"),
440                ("*.gif", "a/.gitattributes"),
441                ("*.png", "a/b/.gitattributes"),
442            ]
443        );
444        assert_eq!(
445            excluded,
446            vec![
447                ("*.gif", "a/b/.gitattributes"),
448                ("*.mov", "a/b/.gitattributes"),
449            ]
450        );
451    }
452
453    #[test]
454    fn list_lfs_patterns_skips_macros_and_comments() {
455        let tmp = TempDir::new().unwrap();
456        std::fs::write(
457            tmp.path().join(".gitattributes"),
458            "[attr]binary -diff -merge -text\n\
459             # *.jpg filter=lfs\n\
460             *.bin filter=lfs -text\n",
461        )
462        .unwrap();
463        let listing = list_lfs_patterns(tmp.path()).unwrap();
464        let tracked: Vec<&PatternEntry> = listing.tracked().collect();
465        assert_eq!(tracked.len(), 1);
466        assert_eq!(tracked[0].pattern, "*.bin");
467    }
468
469    #[test]
470    fn list_picks_up_lockable_attribute() {
471        let tmp = TempDir::new().unwrap();
472        std::fs::write(
473            tmp.path().join(".gitattributes"),
474            "*.psd filter=lfs diff=lfs merge=lfs lockable\n\
475             *.bin filter=lfs diff=lfs merge=lfs\n",
476        )
477        .unwrap();
478        let listing = list_lfs_patterns(tmp.path()).unwrap();
479        assert_eq!(listing.patterns.len(), 2);
480        assert_eq!(listing.patterns[0].pattern, "*.psd");
481        assert!(listing.patterns[0].lockable);
482        assert_eq!(listing.patterns[1].pattern, "*.bin");
483        assert!(!listing.patterns[1].lockable);
484    }
485
486    #[test]
487    fn bang_filter_treated_as_excluded() {
488        let tmp = TempDir::new().unwrap();
489        std::fs::write(
490            tmp.path().join(".gitattributes"),
491            "*.dat filter=lfs\n\
492             a.dat !filter\n",
493        )
494        .unwrap();
495        let listing = list_lfs_patterns(tmp.path()).unwrap();
496        assert_eq!(listing.patterns.len(), 2);
497        assert!(listing.patterns[0].tracked);
498        assert_eq!(listing.patterns[1].pattern, "a.dat");
499        assert!(!listing.patterns[1].tracked);
500    }
501
502    #[test]
503    fn workdir_skips_dotgit_directory() {
504        // A .gitattributes inside .git/ must NOT be picked up — only
505        // .git/info/attributes is, and it's loaded explicitly above.
506        let tmp = TempDir::new().unwrap();
507        std::fs::create_dir_all(tmp.path().join(".git")).unwrap();
508        std::fs::write(
509            tmp.path().join(".git/.gitattributes"),
510            "*.bin filter=lfs\n",
511        )
512        .unwrap();
513
514        let s = AttrSet::from_workdir(tmp.path()).unwrap();
515        assert!(!s.is_lfs_tracked("a.bin"));
516    }
517}