use std::collections::HashMap;
use std::ffi::OsStr;
use std::fs;
use std::io;
use std::path::{Path, PathBuf};
use bstr::ByteSlice;
use gix_attributes::{
Search, StateRef,
search::{MetadataCollection, Outcome},
};
use gix_glob::pattern::Case;
pub struct AttrSet {
search: Search,
collection: MetadataCollection,
macros: HashMap<String, Vec<String>>,
}
impl AttrSet {
pub fn empty() -> Self {
let mut collection = MetadataCollection::default();
let mut search = Search::default();
search.add_patterns_buffer(
b"[attr]binary -diff -merge -text",
"[builtin]".into(),
None,
&mut collection,
true,
);
let mut macros = HashMap::new();
macros.insert(
"binary".to_string(),
vec!["diff".into(), "merge".into(), "text".into()],
);
Self {
search,
collection,
macros,
}
}
pub fn from_buffer(bytes: &[u8]) -> Self {
let mut me = Self::empty();
let rewritten = me.intake_buffer(bytes);
me.search.add_patterns_buffer(
&rewritten,
"<memory>".into(),
None,
&mut me.collection,
true,
);
me
}
pub fn add_buffer_at(&mut self, bytes: &[u8], dir: &str) {
let virtual_root = std::path::PathBuf::from("/__lfs_virt");
let source = if dir.is_empty() {
virtual_root.join(".gitattributes")
} else {
virtual_root.join(dir).join(".gitattributes")
};
let rewritten = self.intake_buffer(bytes);
self.search.add_patterns_buffer(
&rewritten,
source,
Some(&virtual_root),
&mut self.collection,
true,
);
}
pub fn from_workdir(repo_root: &Path) -> io::Result<Self> {
let mut me = Self::empty();
let info = repo_root.join(".git").join("info").join("attributes");
if info.exists() {
let bytes = fs::read(&info)?;
let rewritten = me.intake_buffer(&bytes);
me.search
.add_patterns_buffer(&rewritten, info, None, &mut me.collection, true);
}
let mut found = Vec::new();
walk_for_gitattributes(repo_root, &mut found)?;
found.sort_by_key(|p| p.components().count());
for path in found {
let bytes = fs::read(&path)?;
let rewritten = me.intake_buffer(&bytes);
me.search.add_patterns_buffer(
&rewritten,
path,
Some(repo_root),
&mut me.collection,
true,
);
}
Ok(me)
}
fn intake_buffer(&mut self, bytes: &[u8]) -> Vec<u8> {
let Ok(s) = std::str::from_utf8(bytes) else {
return bytes.to_vec();
};
let mut out = Vec::with_capacity(bytes.len());
for line in s.split('\n') {
let trimmed = line.trim_start();
if let Some(rest) = trimmed.strip_prefix("[attr]") {
let mut tokens = rest.split_whitespace();
if let Some(name) = tokens.next() {
let attrs: Vec<String> = tokens
.map(|t| {
let key = t.trim_start_matches(['-', '!']);
key.split_once('=')
.map(|(k, _)| k)
.unwrap_or(key)
.to_string()
})
.filter(|k| !k.is_empty())
.collect();
if !attrs.is_empty() {
self.macros.insert(name.to_string(), attrs);
}
}
out.extend_from_slice(line.as_bytes());
out.push(b'\n');
continue;
}
if trimmed.is_empty() || trimmed.starts_with('#') {
out.extend_from_slice(line.as_bytes());
out.push(b'\n');
continue;
}
let leading_ws_len = line.len() - trimmed.len();
out.extend_from_slice(&line.as_bytes()[..leading_ws_len]);
let mut tokens = trimmed.split_whitespace();
if let Some(pattern) = tokens.next() {
out.extend_from_slice(pattern.as_bytes());
for tok in tokens {
if let Some(name) = tok.strip_prefix('!')
&& let Some(macro_attrs) = self.macros.get(name)
{
for k in macro_attrs {
out.push(b' ');
out.push(b'!');
out.extend_from_slice(k.as_bytes());
}
continue;
}
out.push(b' ');
out.extend_from_slice(tok.as_bytes());
}
}
out.push(b'\n');
}
out
}
pub fn value(&self, path: &str, attr: &str) -> Option<String> {
let mut out = Outcome::default();
out.initialize_with_selection(&self.collection, [attr]);
self.search
.pattern_matching_relative_path(path.into(), Case::Sensitive, None, &mut out);
for m in out.iter_selected() {
if m.assignment.name.as_str() != attr {
continue;
}
return match m.assignment.state {
StateRef::Set => Some("true".into()),
StateRef::Value(v) => Some(v.as_bstr().to_str_lossy().into_owned()),
StateRef::Unset | StateRef::Unspecified => None,
};
}
None
}
pub fn is_set(&self, path: &str, attr: &str) -> bool {
matches!(self.value(path, attr).as_deref(), Some(v) if v != "false")
}
pub fn is_lfs_tracked(&self, path: &str) -> bool {
self.value(path, "filter").as_deref() == Some("lfs")
}
pub fn is_lockable(&self, path: &str) -> bool {
self.is_set(path, "lockable")
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PatternEntry {
pub pattern: String,
pub source: String,
pub tracked: bool,
pub lockable: bool,
}
#[derive(Debug, Default, PartialEq, Eq)]
pub struct PatternListing {
pub patterns: Vec<PatternEntry>,
}
impl PatternListing {
pub fn tracked(&self) -> impl Iterator<Item = &PatternEntry> {
self.patterns.iter().filter(|p| p.tracked)
}
pub fn excluded(&self) -> impl Iterator<Item = &PatternEntry> {
self.patterns.iter().filter(|p| !p.tracked)
}
}
#[derive(Default)]
struct MacroState {
enables_lfs: std::collections::HashSet<String>,
}
impl MacroState {
fn ingest(&mut self, line: &str) {
let trimmed = line.trim_start();
let Some(rest) = trimmed.strip_prefix("[attr]") else {
return;
};
let mut tokens = rest.split_whitespace();
let Some(name) = tokens.next() else {
return;
};
let mut enables = false;
for tok in tokens {
match self.classify(tok) {
FilterEffect::SetLfs => enables = true,
FilterEffect::Clear => enables = false,
FilterEffect::None => {}
}
}
if enables {
self.enables_lfs.insert(name.to_owned());
} else {
self.enables_lfs.remove(name);
}
}
fn classify(&self, tok: &str) -> FilterEffect {
if tok == "filter=lfs" {
return FilterEffect::SetLfs;
}
if tok == "-filter" || tok == "!filter" || tok.starts_with("-filter=") {
return FilterEffect::Clear;
}
if let Some(name) = tok.strip_prefix('-').or_else(|| tok.strip_prefix('!')) {
if self.enables_lfs.contains(name) {
return FilterEffect::Clear;
}
return FilterEffect::None;
}
if self.enables_lfs.contains(tok) {
return FilterEffect::SetLfs;
}
FilterEffect::None
}
}
enum FilterEffect {
SetLfs,
Clear,
None,
}
pub fn list_lfs_patterns(repo_root: &Path) -> io::Result<PatternListing> {
let mut listing = PatternListing::default();
let mut macros = MacroState::default();
if let Some((path, bytes)) = read_global_attributes(repo_root) {
scan_attr_lines(&bytes, &path, &mut listing, &mut macros, true);
}
let info = repo_root.join(".git").join("info").join("attributes");
if info.exists() {
let bytes = fs::read(&info)?;
scan_attr_lines(
&bytes,
".git/info/attributes",
&mut listing,
&mut macros,
true,
);
}
let mut found = Vec::new();
walk_for_gitattributes(repo_root, &mut found)?;
found.sort_by_key(|p| p.components().count());
for path in found {
let bytes = fs::read(&path)?;
let rel = path
.strip_prefix(repo_root)
.unwrap_or(&path)
.to_string_lossy()
.replace('\\', "/");
let is_root = !rel.contains('/');
scan_attr_lines(&bytes, &rel, &mut listing, &mut macros, is_root);
}
Ok(listing)
}
fn read_global_attributes(repo_root: &Path) -> Option<(String, Vec<u8>)> {
if let Ok(Some(path)) = crate::config::get_effective(repo_root, "core.attributesfile") {
let expanded = expand_tilde(&path);
if let Ok(bytes) = fs::read(&expanded) {
return Some((path, bytes));
}
}
let xdg = std::env::var_os("XDG_CONFIG_HOME")
.filter(|v| !v.is_empty())
.map(PathBuf::from)
.or_else(|| std::env::var_os("HOME").map(|h| PathBuf::from(h).join(".config")))?;
let path = xdg.join("git").join("attributes");
let bytes = fs::read(&path).ok()?;
Some((path.to_string_lossy().into_owned(), bytes))
}
fn expand_tilde(path: &str) -> PathBuf {
if let Some(rest) = path.strip_prefix("~/") {
if let Some(home) = std::env::var_os("HOME") {
return PathBuf::from(home).join(rest);
}
} else if path == "~"
&& let Some(home) = std::env::var_os("HOME")
{
return PathBuf::from(home);
}
PathBuf::from(path)
}
fn scan_attr_lines(
bytes: &[u8],
source: &str,
listing: &mut PatternListing,
macros: &mut MacroState,
allow_macros: bool,
) {
for raw in bytes.split(|&b| b == b'\n') {
let line = String::from_utf8_lossy(raw);
let body = line.trim();
if body.is_empty() || body.starts_with('#') {
continue;
}
if body.starts_with("[attr]") {
if allow_macros {
macros.ingest(body);
}
continue;
}
let mut tokens = body.split_whitespace();
let Some(pattern) = tokens.next() else {
continue;
};
let mut filter: Option<bool> = None;
let mut lockable = false;
for tok in tokens {
match macros.classify(tok) {
FilterEffect::SetLfs => filter = Some(true),
FilterEffect::Clear => filter = Some(false),
FilterEffect::None => {}
}
if tok == "lockable" {
lockable = true;
}
}
if let Some(tracked) = filter {
listing.patterns.push(PatternEntry {
pattern: pattern.to_owned(),
source: source.to_owned(),
tracked,
lockable,
});
}
}
}
fn walk_for_gitattributes(dir: &Path, out: &mut Vec<PathBuf>) -> io::Result<()> {
for entry in fs::read_dir(dir)? {
let entry = entry?;
let ft = entry.file_type()?;
let name = entry.file_name();
if name == OsStr::new(".git") {
continue;
}
let path = entry.path();
if ft.is_dir() {
walk_for_gitattributes(&path, out)?;
} else if ft.is_file() && name == OsStr::new(".gitattributes") {
out.push(path);
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn negated_macro_unsets_constituent_attrs() {
let s = AttrSet::from_buffer(
b"[attr]lfs filter=lfs diff=lfs merge=lfs -text\n\
*.dat lfs\n\
b.dat !lfs\n",
);
assert_eq!(s.value("a.dat", "filter").as_deref(), Some("lfs"));
assert_eq!(s.value("b.dat", "filter"), None);
assert!(s.is_lfs_tracked("a.dat"));
assert!(!s.is_lfs_tracked("b.dat"));
}
#[test]
fn empty_set_has_no_matches() {
let s = AttrSet::empty();
assert_eq!(s.value("foo.txt", "filter"), None);
assert!(!s.is_lfs_tracked("foo.txt"));
assert!(!s.is_lockable("foo.txt"));
}
#[test]
fn buffer_basename_match() {
let s = AttrSet::from_buffer(b"*.bin filter=lfs diff=lfs merge=lfs -text\n");
assert!(s.is_lfs_tracked("foo.bin"));
assert!(s.is_lfs_tracked("nested/dir/foo.bin"));
assert!(!s.is_lfs_tracked("foo.txt"));
}
#[test]
fn value_returns_raw_string() {
let s = AttrSet::from_buffer(b"*.txt eol=lf\n");
assert_eq!(s.value("a.txt", "eol").as_deref(), Some("lf"));
}
#[test]
fn unset_attribute_via_dash_prefix() {
let s = AttrSet::from_buffer(
b"*.txt filter=lfs\n\
special.txt -filter\n",
);
assert!(s.is_lfs_tracked("a.txt"));
assert_eq!(s.value("special.txt", "filter"), None);
assert!(!s.is_lfs_tracked("special.txt"));
}
#[test]
fn lockable_set_form() {
let s = AttrSet::from_buffer(b"*.psd lockable\n");
assert!(s.is_lockable("art/cover.psd"));
assert!(!s.is_lockable("readme.txt"));
}
#[test]
fn is_set_treats_false_value_as_unset() {
let s = AttrSet::from_buffer(
b"truthy lockable\n\
falsy lockable=false\n",
);
assert!(s.is_set("truthy", "lockable"));
assert!(!s.is_set("falsy", "lockable"));
}
#[test]
fn rooted_pattern_only_matches_top_level() {
let s = AttrSet::from_buffer(b"/top.bin filter=lfs\n");
assert!(s.is_lfs_tracked("top.bin"));
assert!(!s.is_lfs_tracked("nested/top.bin"));
}
#[test]
fn workdir_loads_root_gitattributes() {
let tmp = TempDir::new().unwrap();
std::fs::create_dir_all(tmp.path().join(".git/info")).unwrap();
std::fs::write(
tmp.path().join(".gitattributes"),
"*.bin filter=lfs diff=lfs merge=lfs -text\n",
)
.unwrap();
let s = AttrSet::from_workdir(tmp.path()).unwrap();
assert!(s.is_lfs_tracked("a.bin"));
assert!(s.is_lfs_tracked("sub/a.bin"));
}
#[test]
fn deeper_gitattributes_overrides_root() {
let tmp = TempDir::new().unwrap();
std::fs::create_dir_all(tmp.path().join("sub/.git_placeholder")).unwrap();
std::fs::write(tmp.path().join(".gitattributes"), "*.bin filter=lfs\n").unwrap();
std::fs::write(tmp.path().join("sub/.gitattributes"), "*.bin -filter\n").unwrap();
let s = AttrSet::from_workdir(tmp.path()).unwrap();
assert!(s.is_lfs_tracked("a.bin"));
assert!(!s.is_lfs_tracked("sub/a.bin"));
}
#[test]
fn info_attributes_loaded_from_dotgit() {
let tmp = TempDir::new().unwrap();
std::fs::create_dir_all(tmp.path().join(".git/info")).unwrap();
std::fs::write(
tmp.path().join(".git/info/attributes"),
"*.bin filter=lfs\n",
)
.unwrap();
let s = AttrSet::from_workdir(tmp.path()).unwrap();
assert!(s.is_lfs_tracked("a.bin"));
}
#[test]
fn list_lfs_patterns_recursive() {
let tmp = TempDir::new().unwrap();
std::fs::create_dir_all(tmp.path().join(".git/info")).unwrap();
std::fs::create_dir_all(tmp.path().join("a/b")).unwrap();
std::fs::write(
tmp.path().join(".gitattributes"),
"* text=auto\n\
*.jpg filter=lfs diff=lfs merge=lfs -text\n",
)
.unwrap();
std::fs::write(
tmp.path().join(".git/info/attributes"),
"*.mov filter=lfs -text\n",
)
.unwrap();
std::fs::write(
tmp.path().join("a/.gitattributes"),
"*.gif filter=lfs -text\n",
)
.unwrap();
std::fs::write(
tmp.path().join("a/b/.gitattributes"),
"*.png filter=lfs -text\n\
*.gif -filter -text\n\
*.mov -filter=lfs -text\n",
)
.unwrap();
let listing = list_lfs_patterns(tmp.path()).unwrap();
let tracked: Vec<(&str, &str)> = listing
.tracked()
.map(|p| (p.pattern.as_str(), p.source.as_str()))
.collect();
let excluded: Vec<(&str, &str)> = listing
.excluded()
.map(|p| (p.pattern.as_str(), p.source.as_str()))
.collect();
assert_eq!(
tracked,
vec![
("*.mov", ".git/info/attributes"),
("*.jpg", ".gitattributes"),
("*.gif", "a/.gitattributes"),
("*.png", "a/b/.gitattributes"),
]
);
assert_eq!(
excluded,
vec![
("*.gif", "a/b/.gitattributes"),
("*.mov", "a/b/.gitattributes"),
]
);
}
#[test]
fn list_lfs_patterns_skips_macros_and_comments() {
let tmp = TempDir::new().unwrap();
std::fs::write(
tmp.path().join(".gitattributes"),
"[attr]binary -diff -merge -text\n\
# *.jpg filter=lfs\n\
*.bin filter=lfs -text\n",
)
.unwrap();
let listing = list_lfs_patterns(tmp.path()).unwrap();
let tracked: Vec<&PatternEntry> = listing.tracked().collect();
assert_eq!(tracked.len(), 1);
assert_eq!(tracked[0].pattern, "*.bin");
}
#[test]
fn list_picks_up_lockable_attribute() {
let tmp = TempDir::new().unwrap();
std::fs::write(
tmp.path().join(".gitattributes"),
"*.psd filter=lfs diff=lfs merge=lfs lockable\n\
*.bin filter=lfs diff=lfs merge=lfs\n",
)
.unwrap();
let listing = list_lfs_patterns(tmp.path()).unwrap();
assert_eq!(listing.patterns.len(), 2);
assert_eq!(listing.patterns[0].pattern, "*.psd");
assert!(listing.patterns[0].lockable);
assert_eq!(listing.patterns[1].pattern, "*.bin");
assert!(!listing.patterns[1].lockable);
}
#[test]
fn list_expands_macro_to_lfs() {
let tmp = TempDir::new().unwrap();
std::fs::write(
tmp.path().join(".gitattributes"),
"[attr]lfs filter=lfs diff=lfs merge=lfs -text\n\
*.dat lfs\n",
)
.unwrap();
let listing = list_lfs_patterns(tmp.path()).unwrap();
let tracked: Vec<&str> = listing.tracked().map(|p| p.pattern.as_str()).collect();
assert_eq!(tracked, vec!["*.dat"]);
}
#[test]
fn list_expands_macro_defined_in_earlier_file() {
let tmp = TempDir::new().unwrap();
std::fs::create_dir_all(tmp.path().join(".git/info")).unwrap();
std::fs::write(
tmp.path().join(".git/info/attributes"),
"[attr]lfs filter=lfs diff=lfs merge=lfs -text\n",
)
.unwrap();
std::fs::write(tmp.path().join(".gitattributes"), "*.dat lfs\n").unwrap();
let listing = list_lfs_patterns(tmp.path()).unwrap();
let tracked: Vec<&str> = listing.tracked().map(|p| p.pattern.as_str()).collect();
assert_eq!(tracked, vec!["*.dat"]);
}
#[test]
fn list_negated_macro_marks_excluded() {
let tmp = TempDir::new().unwrap();
std::fs::write(
tmp.path().join(".gitattributes"),
"[attr]lfs filter=lfs diff=lfs merge=lfs -text\n\
**/*.dat lfs\n\
other.dat !lfs\n",
)
.unwrap();
let listing = list_lfs_patterns(tmp.path()).unwrap();
let tracked: Vec<&str> = listing.tracked().map(|p| p.pattern.as_str()).collect();
let excluded: Vec<&str> = listing.excluded().map(|p| p.pattern.as_str()).collect();
assert_eq!(tracked, vec!["**/*.dat"]);
assert_eq!(excluded, vec!["other.dat"]);
}
#[test]
fn bang_filter_treated_as_excluded() {
let tmp = TempDir::new().unwrap();
std::fs::write(
tmp.path().join(".gitattributes"),
"*.dat filter=lfs\n\
a.dat !filter\n",
)
.unwrap();
let listing = list_lfs_patterns(tmp.path()).unwrap();
assert_eq!(listing.patterns.len(), 2);
assert!(listing.patterns[0].tracked);
assert_eq!(listing.patterns[1].pattern, "a.dat");
assert!(!listing.patterns[1].tracked);
}
#[test]
fn workdir_skips_dotgit_directory() {
let tmp = TempDir::new().unwrap();
std::fs::create_dir_all(tmp.path().join(".git")).unwrap();
std::fs::write(tmp.path().join(".git/.gitattributes"), "*.bin filter=lfs\n").unwrap();
let s = AttrSet::from_workdir(tmp.path()).unwrap();
assert!(!s.is_lfs_tracked("a.bin"));
}
}