Skip to main content

git_filter_tree/
lib.rs

1//! Filter Git tree objects by glob patterns, gitattributes, or a custom predicate.
2//!
3//! This crate exposes the [`FilterTree`] trait, implemented on
4//! [`git2::Repository`], which produces a new tree containing only the entries
5//! that match either a set of **glob patterns**, a set of **gitattributes**,
6//! or an arbitrary **predicate function**.
7//! Trees are walked recursively; patterns are matched against full paths from
8//! the tree root.
9//!
10//! It is the plumbing library behind the `git filter-tree` command and the
11//! [`git-rewrite`](https://docs.rs/git-rewrite) porcelain.
12//!
13//! # Filter by Pattern
14//!
15//! ```no_run
16//! use git_filter_tree::FilterTree as _;
17//!
18//! let repo = git2::Repository::open_from_env()?;
19//! let tree = repo.head()?.peel_to_tree()?;
20//!
21//! // Produce a new tree that contains only Rust source files.
22//! let filtered = repo.filter_by_patterns(&tree, &["**/*.rs"])?;
23//! println!("tree sha: {}", filtered.id());
24//! # Ok::<(), Box<dyn std::error::Error>>(())
25//! ```
26//!
27//! A trailing `/` is expanded to `dir/**`, so `"src/"` keeps all files under
28//! `src/`. Multiple patterns are OR-ed together.
29//!
30//! # Filter by Attributes
31//!
32//! ```no_run
33//! use git_filter_tree::FilterTree as _;
34//!
35//! let repo = git2::Repository::open_from_env()?;
36//! let tree = repo.head()?.peel_to_tree()?;
37//!
38//! // Keep only entries that have the `export` attribute set in .gitattributes.
39//! let filtered = repo.filter_by_attributes(&tree, &["export"])?;
40//! println!("tree sha: {}", filtered.id());
41//! # Ok::<(), Box<dyn std::error::Error>>(())
42//! ```
43//!
44//! All listed attributes must be set (AND semantics). Entries with an
45//! attribute explicitly unset (`-export`) or unspecified are excluded.
46//!
47//! # Filter by Predicate
48//!
49//! ```no_run
50//! use git_filter_tree::FilterTree as _;
51//! use std::path::Path;
52//!
53//! let repo = git2::Repository::open_from_env()?;
54//! let tree = repo.head()?.peel_to_tree()?;
55//!
56//! // Keep only files whose path contains "generated".
57//! let filtered = repo.filter_by_predicate(&tree, |_repo, path| {
58//!     path.to_str().is_some_and(|s| s.contains("generated"))
59//! })?;
60//! println!("tree sha: {}", filtered.id());
61//! # Ok::<(), Box<dyn std::error::Error>>(())
62//! ```
63//!
64//! The predicate receives the repository and the full path of each blob entry
65//! relative to the tree root. Subtrees are included as long as at least one
66//! descendant matches.
67pub mod exe;
68use std::path::Path;
69
70pub use git2::{Error, Repository};
71use globset::GlobSetBuilder;
72
73pub trait FilterTree {
74    /// Filters tree entries by gitattributes-style patterns and returns a new tree with contents
75    /// filtered through the provided patterns. Recursively walks the tree and matches patterns
76    /// against full paths from the tree root.
77    ///
78    /// The `patterns` type is an array of string slices and not a glob type because Git has
79    /// specific glob syntax that differs from standard shell syntax.
80    fn filter_by_patterns<'a>(
81        &'a self,
82        tree: &'a git2::Tree<'a>,
83        patterns: &[&str], // TODO create a `git-glob` crate to handle patterns more gracefully
84    ) -> Result<git2::Tree<'a>, Error>;
85
86    /// Filters tree entries by gitattributes and returns a new tree with contents filtered.
87    /// Recursively walks the tree and matches attributes against full paths from the tree root.
88    ///
89    /// The `attributes` type is an array of string slices. For attributes which have values,
90    /// not simply set or unset, use typical `.gitattributes` syntax.
91    fn filter_by_attributes<'a>(
92        &'a self,
93        tree: &'a git2::Tree<'a>,
94        attributes: &[&str],
95    ) -> Result<git2::Tree<'a>, Error>;
96
97    /// Filters tree entries using an arbitrary predicate and returns a new tree.
98    /// Recursively walks the tree; the predicate is called for each blob entry
99    /// with the repository and the entry's full path relative to the tree root.
100    /// Subtrees are retained as long as at least one descendant matches.
101    fn filter_by_predicate<'a, F>(
102        &'a self,
103        tree: &'a git2::Tree<'a>,
104        predicate: F,
105    ) -> Result<git2::Tree<'a>, Error>
106    where
107        F: Fn(&git2::Repository, &Path) -> bool;
108}
109
110impl FilterTree for git2::Repository {
111    fn filter_by_patterns<'a>(
112        &'a self,
113        tree: &'a git2::Tree<'a>,
114        patterns: &[&str],
115    ) -> Result<git2::Tree<'a>, Error> {
116        if patterns.is_empty() {
117            return Err(Error::from_str("At least one pattern is required"));
118        }
119
120        // Build GlobSet matcher
121        let mut glob_builder = GlobSetBuilder::new();
122        for pattern in patterns {
123            // A trailing `/` means "this directory" in gitattributes/gitignore
124            // semantics.  Normalize to `dir/**` so globset matches all files
125            // under the directory recursively.
126            let normalized: String;
127            let pat = if pattern.ends_with('/') {
128                normalized = format!("{}**", pattern);
129                normalized.as_str()
130            } else {
131                pattern
132            };
133            let glob = globset::Glob::new(pat)
134                .map_err(|e| Error::from_str(&format!("Invalid pattern '{}': {}", pattern, e)))?;
135            glob_builder.add(glob);
136        }
137
138        let matcher = glob_builder
139            .build()
140            .map_err(|e| Error::from_str(&e.to_string()))?;
141
142        // Recursively filter the tree
143        filter_tree_recursive(self, tree, None, &|_repo, path| matcher.is_match(path))
144    }
145
146    fn filter_by_predicate<'a, F>(
147        &'a self,
148        tree: &'a git2::Tree<'a>,
149        predicate: F,
150    ) -> Result<git2::Tree<'a>, Error>
151    where
152        F: Fn(&git2::Repository, &Path) -> bool,
153    {
154        filter_tree_recursive(self, tree, None, &predicate)
155    }
156
157    fn filter_by_attributes<'a>(
158        &'a self,
159        tree: &'a git2::Tree<'a>,
160        attributes: &[&str],
161    ) -> Result<git2::Tree<'a>, Error> {
162        if attributes.is_empty() {
163            return Err(git2::Error::from_str("at least one attribute is required"));
164        }
165
166        filter_tree_recursive(self, tree, None, &|repo, path| {
167            for attribute in attributes {
168                match repo.get_attr(path, attribute, git2::AttrCheckFlags::FILE_THEN_INDEX) {
169                    Ok(Some(value)) => {
170                        let value = git2::AttrValue::from_string(Some(value));
171                        match value {
172                            git2::AttrValue::Unspecified => return false,
173                            git2::AttrValue::False => return false,
174                            _ => {}
175                        }
176                    }
177                    Ok(None) => return false,
178                    Err(_) => return false,
179                }
180            }
181
182            true
183        })
184    }
185}
186
187/// Recursively filters a tree, matching patterns against full paths.
188/// Returns a new tree containing only entries that match or have matching descendants.
189fn filter_tree_recursive<'a, F>(
190    repo: &'a Repository,
191    tree: &'a git2::Tree<'a>,
192    prefix: Option<&str>,
193    predicate: &F,
194) -> Result<git2::Tree<'a>, Error>
195where
196    F: Fn(&Repository, &Path) -> bool,
197{
198    let mut builder = repo.treebuilder(None)?;
199
200    for entry in tree.iter() {
201        let Some(name) = entry.name() else {
202            return Err(Error::from_str("name has invalid UTF-8"));
203        };
204
205        let git_path = match prefix {
206            Some(dir) => format!("{}/{}", dir, name),
207            None => name.to_string(),
208        };
209        let full_path = Path::new(&git_path);
210
211        match entry.kind() {
212            Some(git2::ObjectType::Blob) => {
213                if predicate(repo, &full_path) {
214                    builder.insert(name, entry.id(), entry.filemode())?;
215                }
216            }
217            Some(git2::ObjectType::Tree) => {
218                let subtree = entry.to_object(repo)?.peel_to_tree()?;
219                let filtered_subtree =
220                    filter_tree_recursive(repo, &subtree, Some(&git_path), predicate)?;
221                if !filtered_subtree.is_empty() {
222                    builder.insert(name, filtered_subtree.id(), entry.filemode())?;
223                }
224            }
225            // Skip submodule commit pointers, tags, and any other unexpected
226            // object types that can appear as tree entries.
227            _ => continue,
228        }
229    }
230
231    let tree_oid = builder.write()?;
232    repo.find_tree(tree_oid)
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238    use std::fs;
239    use std::path::PathBuf;
240
241    fn setup_test_repo() -> (Repository, PathBuf) {
242        let thread_id = std::thread::current().id();
243        let temp_path = std::env::temp_dir().join(format!("git-filter-tree-test-{:?}", thread_id));
244        let _ = fs::remove_dir_all(&temp_path);
245        fs::create_dir_all(&temp_path).unwrap();
246        let repo = Repository::init_bare(&temp_path).unwrap();
247        (repo, temp_path)
248    }
249
250    fn cleanup_test_repo(path: PathBuf) {
251        let _ = fs::remove_dir_all(path);
252    }
253
254    fn create_test_tree<'a>(repo: &'a Repository) -> Result<git2::Tree<'a>, Error> {
255        let mut tree_builder = repo.treebuilder(None)?;
256
257        // Create some blob entries
258        let blob1 = repo.blob(b"content1")?;
259        let blob2 = repo.blob(b"content2")?;
260        let blob3 = repo.blob(b"content3")?;
261
262        tree_builder.insert("file1.txt", blob1, 0o100644)?;
263        tree_builder.insert("file2.rs", blob2, 0o100644)?;
264        tree_builder.insert("test.md", blob3, 0o100644)?;
265
266        let tree_oid = tree_builder.write()?;
267        repo.find_tree(tree_oid)
268    }
269
270    #[test]
271    fn test_filter_single_pattern() -> Result<(), Error> {
272        let (repo, temp_path) = setup_test_repo();
273
274        let tree = create_test_tree(&repo)?;
275        assert_eq!(tree.len(), 3);
276
277        // Filter for .txt files only
278        let filtered = repo.filter_by_patterns(&tree, &["*.txt"])?;
279        assert_eq!(filtered.len(), 1);
280        assert!(filtered.get_name("file1.txt").is_some());
281        assert!(filtered.get_name("file2.rs").is_none());
282        assert!(filtered.get_name("test.md").is_none());
283
284        cleanup_test_repo(temp_path);
285        Ok(())
286    }
287
288    #[test]
289    fn test_filter_multiple_patterns() -> Result<(), Error> {
290        let (repo, temp_path) = setup_test_repo();
291
292        let tree = create_test_tree(&repo)?;
293
294        // Filter for .txt and .rs files
295        let filtered = repo.filter_by_patterns(&tree, &["*.txt", "*.rs"])?;
296        assert_eq!(filtered.len(), 2);
297        assert!(filtered.get_name("file1.txt").is_some());
298        assert!(filtered.get_name("file2.rs").is_some());
299        assert!(filtered.get_name("test.md").is_none());
300
301        cleanup_test_repo(temp_path);
302        Ok(())
303    }
304
305    #[test]
306    fn test_filter_exact_match() -> Result<(), Error> {
307        let (repo, temp_path) = setup_test_repo();
308
309        let tree = create_test_tree(&repo)?;
310
311        // Filter for exact filename
312        let filtered = repo.filter_by_patterns(&tree, &["file1.txt"])?;
313        assert_eq!(filtered.len(), 1);
314        assert!(filtered.get_name("file1.txt").is_some());
315
316        cleanup_test_repo(temp_path);
317        Ok(())
318    }
319
320    #[test]
321    fn test_filter_wildcard_patterns() -> Result<(), Error> {
322        let (repo, temp_path) = setup_test_repo();
323
324        let tree = create_test_tree(&repo)?;
325
326        // Filter with wildcard pattern
327        let filtered = repo.filter_by_patterns(&tree, &["file*"])?;
328        assert_eq!(filtered.len(), 2);
329        assert!(filtered.get_name("file1.txt").is_some());
330        assert!(filtered.get_name("file2.rs").is_some());
331        assert!(filtered.get_name("test.md").is_none());
332
333        cleanup_test_repo(temp_path);
334        Ok(())
335    }
336
337    #[test]
338    fn test_filter_no_matches() -> Result<(), Error> {
339        let (repo, temp_path) = setup_test_repo();
340
341        let tree = create_test_tree(&repo)?;
342
343        // Filter with pattern that matches nothing
344        let filtered = repo.filter_by_patterns(&tree, &["*.nonexistent"])?;
345        assert_eq!(filtered.len(), 0);
346
347        cleanup_test_repo(temp_path);
348        Ok(())
349    }
350
351    #[test]
352    fn test_filter_all_matches() -> Result<(), Error> {
353        let (repo, temp_path) = setup_test_repo();
354
355        let tree = create_test_tree(&repo)?;
356
357        // Filter with pattern that matches everything
358        let filtered = repo.filter_by_patterns(&tree, &["*"])?;
359        assert_eq!(filtered.len(), 3);
360
361        cleanup_test_repo(temp_path);
362        Ok(())
363    }
364
365    #[test]
366    fn test_filter_empty_patterns_error() {
367        let (repo, temp_path) = setup_test_repo();
368
369        let tree = create_test_tree(&repo).unwrap();
370
371        // Empty patterns should return an error
372        let result = repo.filter_by_patterns(&tree, &[]);
373        assert!(result.is_err());
374        assert_eq!(
375            result.unwrap_err().message(),
376            "At least one pattern is required"
377        );
378
379        cleanup_test_repo(temp_path);
380    }
381
382    #[test]
383    fn test_filter_invalid_pattern_error() {
384        let (repo, temp_path) = setup_test_repo();
385
386        let tree = create_test_tree(&repo).unwrap();
387
388        // Invalid glob pattern should return an error
389        let result = repo.filter_by_patterns(&tree, &["[invalid"]);
390        assert!(result.is_err());
391
392        cleanup_test_repo(temp_path);
393    }
394
395    #[test]
396    fn test_filter_with_nested_tree() -> Result<(), Error> {
397        let (repo, temp_path) = setup_test_repo();
398
399        let mut tree_builder = repo.treebuilder(None)?;
400
401        // Create a nested tree
402        let mut subtree_builder = repo.treebuilder(None)?;
403        let blob = repo.blob(b"nested content")?;
404        subtree_builder.insert("nested.txt", blob, 0o100644)?;
405        let subtree_oid = subtree_builder.write()?;
406
407        // Add files and subtree to main tree
408        let blob1 = repo.blob(b"content1")?;
409        tree_builder.insert("file1.txt", blob1, 0o100644)?;
410        tree_builder.insert("subdir", subtree_oid, 0o040000)?;
411
412        let tree_oid = tree_builder.write()?;
413        let tree = repo.find_tree(tree_oid)?;
414
415        // Filter - should keep both file and directory
416        let filtered = repo.filter_by_patterns(&tree, &["*"])?;
417        assert_eq!(filtered.len(), 2);
418
419        cleanup_test_repo(temp_path);
420        Ok(())
421    }
422
423    #[test]
424    fn test_filter_preserves_empty_tree() -> Result<(), Error> {
425        let (repo, temp_path) = setup_test_repo();
426
427        // Create an empty tree
428        let tree_builder = repo.treebuilder(None)?;
429        let tree_oid = tree_builder.write()?;
430        let tree = repo.find_tree(tree_oid)?;
431
432        assert_eq!(tree.len(), 0);
433
434        // Filter empty tree
435        let filtered = repo.filter_by_patterns(&tree, &["*"])?;
436        assert_eq!(filtered.len(), 0);
437
438        cleanup_test_repo(temp_path);
439        Ok(())
440    }
441
442    #[test]
443    fn test_filter_case_sensitive() -> Result<(), Error> {
444        let (repo, temp_path) = setup_test_repo();
445
446        let mut tree_builder = repo.treebuilder(None)?;
447        let blob1 = repo.blob(b"content1")?;
448        let blob2 = repo.blob(b"content2")?;
449
450        tree_builder.insert("File.txt", blob1, 0o100644)?;
451        tree_builder.insert("file.txt", blob2, 0o100644)?;
452
453        let tree_oid = tree_builder.write()?;
454        let tree = repo.find_tree(tree_oid)?;
455
456        // Filter with exact case match
457        let filtered = repo.filter_by_patterns(&tree, &["file.txt"])?;
458        assert_eq!(filtered.len(), 1);
459        assert!(filtered.get_name("file.txt").is_some());
460
461        cleanup_test_repo(temp_path);
462        Ok(())
463    }
464
465    #[test]
466    fn test_filter_complex_patterns() -> Result<(), Error> {
467        let (repo, temp_path) = setup_test_repo();
468
469        let mut tree_builder = repo.treebuilder(None)?;
470        let blob = repo.blob(b"content")?;
471
472        tree_builder.insert("test1.txt", blob, 0o100644)?;
473        tree_builder.insert("test2.rs", blob, 0o100644)?;
474        tree_builder.insert("data.json", blob, 0o100644)?;
475        tree_builder.insert("README.md", blob, 0o100644)?;
476
477        let tree_oid = tree_builder.write()?;
478        let tree = repo.find_tree(tree_oid)?;
479
480        // Multiple patterns with different wildcards
481        let filtered = repo.filter_by_patterns(&tree, &["test*", "*.md"])?;
482        assert_eq!(filtered.len(), 3);
483        assert!(filtered.get_name("test1.txt").is_some());
484        assert!(filtered.get_name("test2.rs").is_some());
485        assert!(filtered.get_name("README.md").is_some());
486        assert!(filtered.get_name("data.json").is_none());
487
488        cleanup_test_repo(temp_path);
489        Ok(())
490    }
491
492    #[test]
493    fn test_filter_trailing_slash_matches_directory_contents() -> Result<(), Error> {
494        let (repo, temp_path) = setup_test_repo();
495
496        // Build a tree with a subdirectory: pyo3/Cargo.toml, pyo3/src/lib.rs,
497        // and a top-level file that should NOT match.
498        let blob = repo.blob(b"content")?;
499
500        let mut src_builder = repo.treebuilder(None)?;
501        src_builder.insert("lib.rs", blob, 0o100644)?;
502        let src_oid = src_builder.write()?;
503
504        let mut pyo3_builder = repo.treebuilder(None)?;
505        pyo3_builder.insert("Cargo.toml", blob, 0o100644)?;
506        pyo3_builder.insert("src", src_oid, 0o040000)?;
507        let pyo3_oid = pyo3_builder.write()?;
508
509        let mut root_builder = repo.treebuilder(None)?;
510        root_builder.insert("pyo3", pyo3_oid, 0o040000)?;
511        root_builder.insert("README.md", blob, 0o100644)?;
512        let root_oid = root_builder.write()?;
513        let tree = repo.find_tree(root_oid)?;
514
515        // "pyo3/" (trailing slash) must match all files under pyo3/.
516        let filtered = repo.filter_by_patterns(&tree, &["pyo3/"])?;
517        assert_eq!(filtered.len(), 1, "only the pyo3 dir should remain");
518        assert!(filtered.get_name("pyo3").is_some());
519        assert!(filtered.get_name("README.md").is_none());
520
521        // The pyo3 subtree itself must retain both entries.
522        let pyo3_entry = filtered.get_name("pyo3").unwrap();
523        let pyo3_tree = repo.find_tree(pyo3_entry.id())?;
524        assert!(pyo3_tree.get_name("Cargo.toml").is_some());
525        assert!(pyo3_tree.get_name("src").is_some());
526
527        cleanup_test_repo(temp_path);
528        Ok(())
529    }
530
531    // -----------------------------------------------------------------------
532    // Helpers and tests for filter_by_attributes
533    // -----------------------------------------------------------------------
534
535    /// Initializes a non-bare repository so that `.gitattributes` written to
536    /// its working directory are picked up by `repo.get_attr(…)`.
537    fn setup_attr_test_repo() -> (Repository, PathBuf) {
538        let thread_id = std::thread::current().id();
539        let temp_path = std::env::temp_dir().join(format!("git-filter-attr-test-{:?}", thread_id));
540        let _ = fs::remove_dir_all(&temp_path);
541        fs::create_dir_all(&temp_path).unwrap();
542        let repo = Repository::init(&temp_path).unwrap();
543        (repo, temp_path)
544    }
545
546    fn write_gitattributes(repo_path: &Path, content: &str) {
547        fs::write(repo_path.join(".gitattributes"), content).unwrap();
548    }
549
550    // --- filter_by_attributes: error cases ---------------------------------
551
552    #[test]
553    fn test_filter_by_attributes_empty_returns_error() {
554        let (repo, temp_path) = setup_attr_test_repo();
555        write_gitattributes(&temp_path, "");
556
557        let tree = create_test_tree(&repo).unwrap();
558        let result = repo.filter_by_attributes(&tree, &[]);
559        assert!(result.is_err());
560        assert_eq!(
561            result.unwrap_err().message(),
562            "at least one attribute is required"
563        );
564
565        cleanup_test_repo(temp_path);
566    }
567
568    // --- filter_by_attributes: single attribute ----------------------------
569
570    #[test]
571    fn test_filter_by_attributes_set_attribute_includes_matching_files() -> Result<(), Error> {
572        let (repo, temp_path) = setup_attr_test_repo();
573        // Only .txt files carry the export-ignore attribute.
574        write_gitattributes(&temp_path, "*.txt export-ignore\n");
575
576        let blob = repo.blob(b"content")?;
577        let mut builder = repo.treebuilder(None)?;
578        builder.insert("readme.txt", blob, 0o100644)?;
579        builder.insert("main.rs", blob, 0o100644)?;
580        builder.insert("data.json", blob, 0o100644)?;
581        let tree = repo.find_tree(builder.write()?)?;
582
583        let filtered = repo.filter_by_attributes(&tree, &["export-ignore"])?;
584        assert_eq!(filtered.len(), 1);
585        assert!(filtered.get_name("readme.txt").is_some());
586        assert!(filtered.get_name("main.rs").is_none());
587        assert!(filtered.get_name("data.json").is_none());
588
589        cleanup_test_repo(temp_path);
590        Ok(())
591    }
592
593    #[test]
594    fn test_filter_by_attributes_explicitly_unset_attribute_excluded() -> Result<(), Error> {
595        let (repo, temp_path) = setup_attr_test_repo();
596        // .txt gets the attribute; .md explicitly has it unset with `-`.
597        write_gitattributes(&temp_path, "*.txt custom-attr\n*.md -custom-attr\n");
598
599        let blob = repo.blob(b"content")?;
600        let mut builder = repo.treebuilder(None)?;
601        builder.insert("readme.txt", blob, 0o100644)?;
602        builder.insert("notes.md", blob, 0o100644)?;
603        builder.insert("main.rs", blob, 0o100644)?;
604        let tree = repo.find_tree(builder.write()?)?;
605
606        let filtered = repo.filter_by_attributes(&tree, &["custom-attr"])?;
607        // .txt is set, .md is explicitly unset, .rs is unspecified
608        assert_eq!(filtered.len(), 1);
609        assert!(filtered.get_name("readme.txt").is_some());
610        assert!(filtered.get_name("notes.md").is_none());
611        assert!(filtered.get_name("main.rs").is_none());
612
613        cleanup_test_repo(temp_path);
614        Ok(())
615    }
616
617    #[test]
618    fn test_filter_by_attributes_no_attributes_set_returns_empty_tree() -> Result<(), Error> {
619        let (repo, temp_path) = setup_attr_test_repo();
620        // Empty .gitattributes — nothing is attributed.
621        write_gitattributes(&temp_path, "");
622
623        let blob = repo.blob(b"content")?;
624        let mut builder = repo.treebuilder(None)?;
625        builder.insert("file.txt", blob, 0o100644)?;
626        builder.insert("file.rs", blob, 0o100644)?;
627        let tree = repo.find_tree(builder.write()?)?;
628
629        let filtered = repo.filter_by_attributes(&tree, &["export-ignore"])?;
630        assert_eq!(filtered.len(), 0);
631
632        cleanup_test_repo(temp_path);
633        Ok(())
634    }
635
636    #[test]
637    fn test_filter_by_attributes_multiple_attributes_all_required() -> Result<(), Error> {
638        let (repo, temp_path) = setup_attr_test_repo();
639        // .txt has both attributes; .rs has only one.
640        write_gitattributes(&temp_path, "*.txt attr-a attr-b\n*.rs attr-a\n");
641
642        let blob = repo.blob(b"content")?;
643        let mut builder = repo.treebuilder(None)?;
644        builder.insert("file.txt", blob, 0o100644)?;
645        builder.insert("file.rs", blob, 0o100644)?;
646        builder.insert("file.md", blob, 0o100644)?;
647        let tree = repo.find_tree(builder.write()?)?;
648
649        // Both attributes must be present for a file to be included.
650        let filtered = repo.filter_by_attributes(&tree, &["attr-a", "attr-b"])?;
651        assert_eq!(filtered.len(), 1);
652        assert!(filtered.get_name("file.txt").is_some());
653        assert!(filtered.get_name("file.rs").is_none());
654        assert!(filtered.get_name("file.md").is_none());
655
656        cleanup_test_repo(temp_path);
657        Ok(())
658    }
659
660    #[test]
661    fn test_filter_by_attributes_attribute_with_value() -> Result<(), Error> {
662        let (repo, temp_path) = setup_attr_test_repo();
663        // linguist-language is set to a string value on .rs files.
664        write_gitattributes(&temp_path, "*.rs linguist-language=Rust\n");
665
666        let blob = repo.blob(b"content")?;
667        let mut builder = repo.treebuilder(None)?;
668        builder.insert("main.rs", blob, 0o100644)?;
669        builder.insert("main.py", blob, 0o100644)?;
670        let tree = repo.find_tree(builder.write()?)?;
671
672        // An attribute with any value (including a string) counts as "set".
673        let filtered = repo.filter_by_attributes(&tree, &["linguist-language"])?;
674        assert_eq!(filtered.len(), 1);
675        assert!(filtered.get_name("main.rs").is_some());
676        assert!(filtered.get_name("main.py").is_none());
677
678        cleanup_test_repo(temp_path);
679        Ok(())
680    }
681
682    #[test]
683    fn test_filter_by_attributes_all_files_match() -> Result<(), Error> {
684        let (repo, temp_path) = setup_attr_test_repo();
685        // Wildcard rule sets the attribute on every file.
686        write_gitattributes(&temp_path, "* generated\n");
687
688        let blob = repo.blob(b"content")?;
689        let mut builder = repo.treebuilder(None)?;
690        builder.insert("a.txt", blob, 0o100644)?;
691        builder.insert("b.rs", blob, 0o100644)?;
692        builder.insert("c.md", blob, 0o100644)?;
693        let tree = repo.find_tree(builder.write()?)?;
694
695        let filtered = repo.filter_by_attributes(&tree, &["generated"])?;
696        assert_eq!(filtered.len(), 3);
697
698        cleanup_test_repo(temp_path);
699        Ok(())
700    }
701
702    #[test]
703    fn test_filter_by_attributes_nested_tree_filters_recursively() -> Result<(), Error> {
704        let (repo, temp_path) = setup_attr_test_repo();
705        // Only .proto files carry the attribute.
706        write_gitattributes(&temp_path, "*.proto linguist-generated\n");
707
708        let blob = repo.blob(b"content")?;
709
710        // src/api.proto and src/main.rs
711        let mut src_builder = repo.treebuilder(None)?;
712        src_builder.insert("api.proto", blob, 0o100644)?;
713        src_builder.insert("main.rs", blob, 0o100644)?;
714        let src_oid = src_builder.write()?;
715
716        let mut root_builder = repo.treebuilder(None)?;
717        root_builder.insert("src", src_oid, 0o040000)?;
718        root_builder.insert("README.md", blob, 0o100644)?;
719        let tree = repo.find_tree(root_builder.write()?)?;
720
721        let filtered = repo.filter_by_attributes(&tree, &["linguist-generated"])?;
722
723        // Top-level README.md must be gone; src/ must survive because it has
724        // at least one matching descendant.
725        assert_eq!(filtered.len(), 1);
726        assert!(filtered.get_name("src").is_some());
727        assert!(filtered.get_name("README.md").is_none());
728
729        let src_entry = filtered.get_name("src").unwrap();
730        let src_tree = repo.find_tree(src_entry.id())?;
731        assert_eq!(src_tree.len(), 1);
732        assert!(src_tree.get_name("api.proto").is_some());
733        assert!(src_tree.get_name("main.rs").is_none());
734
735        cleanup_test_repo(temp_path);
736        Ok(())
737    }
738
739    #[test]
740    fn test_filter_by_attributes_empty_tree_stays_empty() -> Result<(), Error> {
741        let (repo, temp_path) = setup_attr_test_repo();
742        write_gitattributes(&temp_path, "* export-ignore\n");
743
744        let tree = repo.find_tree(repo.treebuilder(None)?.write()?)?;
745        assert_eq!(tree.len(), 0);
746
747        let filtered = repo.filter_by_attributes(&tree, &["export-ignore"])?;
748        assert_eq!(filtered.len(), 0);
749
750        cleanup_test_repo(temp_path);
751        Ok(())
752    }
753
754    #[test]
755    fn test_filter_by_attributes_subdirectory_excluded_when_all_children_unmatched()
756    -> Result<(), Error> {
757        let (repo, temp_path) = setup_attr_test_repo();
758        // Only .txt files match; the `docs/` sub-tree contains only .md files.
759        write_gitattributes(&temp_path, "*.txt export-ignore\n");
760
761        let blob = repo.blob(b"content")?;
762
763        let mut docs_builder = repo.treebuilder(None)?;
764        docs_builder.insert("guide.md", blob, 0o100644)?;
765        docs_builder.insert("api.md", blob, 0o100644)?;
766        let docs_oid = docs_builder.write()?;
767
768        let mut root_builder = repo.treebuilder(None)?;
769        root_builder.insert("docs", docs_oid, 0o040000)?;
770        root_builder.insert("notes.txt", blob, 0o100644)?;
771        let tree = repo.find_tree(root_builder.write()?)?;
772
773        let filtered = repo.filter_by_attributes(&tree, &["export-ignore"])?;
774
775        // `docs/` should be pruned entirely because none of its children matched.
776        assert_eq!(filtered.len(), 1);
777        assert!(filtered.get_name("notes.txt").is_some());
778        assert!(filtered.get_name("docs").is_none());
779
780        cleanup_test_repo(temp_path);
781        Ok(())
782    }
783
784    #[test]
785    fn test_filter_by_predicate_always_false_returns_empty_tree() -> Result<(), Error> {
786        let (repo, temp_path) = setup_test_repo();
787        let tree = create_test_tree(&repo)?;
788
789        let filtered = repo.filter_by_predicate(&tree, |_repo, _path| false)?;
790        assert_eq!(filtered.len(), 0);
791
792        cleanup_test_repo(temp_path);
793        Ok(())
794    }
795
796    #[test]
797    fn test_filter_by_predicate_always_true_returns_full_tree() -> Result<(), Error> {
798        let (repo, temp_path) = setup_test_repo();
799        let tree = create_test_tree(&repo)?;
800
801        let filtered = repo.filter_by_predicate(&tree, |_repo, _path| true)?;
802        assert_eq!(filtered.len(), tree.len());
803
804        cleanup_test_repo(temp_path);
805        Ok(())
806    }
807
808    #[test]
809    fn test_filter_by_predicate_matches_on_path() -> Result<(), Error> {
810        let (repo, temp_path) = setup_test_repo();
811        let tree = create_test_tree(&repo)?;
812
813        // Keep only entries whose path contains "file"
814        let filtered = repo.filter_by_predicate(&tree, |_repo, path| {
815            path.to_str().is_some_and(|s| s.contains("file"))
816        })?;
817
818        assert_eq!(filtered.len(), 2);
819        assert!(filtered.get_name("file1.txt").is_some());
820        assert!(filtered.get_name("file2.rs").is_some());
821        assert!(filtered.get_name("test.md").is_none());
822
823        cleanup_test_repo(temp_path);
824        Ok(())
825    }
826
827    #[test]
828    fn test_filter_by_predicate_receives_full_nested_path() -> Result<(), Error> {
829        let (repo, temp_path) = setup_test_repo();
830
831        let blob = repo.blob(b"content")?;
832
833        let mut sub_builder = repo.treebuilder(None)?;
834        sub_builder.insert("deep.rs", blob, 0o100644)?;
835        sub_builder.insert("deep.txt", blob, 0o100644)?;
836        let sub_oid = sub_builder.write()?;
837
838        let mut root_builder = repo.treebuilder(None)?;
839        root_builder.insert("top.rs", blob, 0o100644)?;
840        root_builder.insert("src", sub_oid, 0o040000)?;
841        let tree = repo.find_tree(root_builder.write()?)?;
842
843        let seen_paths = std::cell::RefCell::new(Vec::new());
844        let _ = repo.filter_by_predicate(&tree, |_repo, path| {
845            seen_paths
846                .borrow_mut()
847                .push(path.to_str().unwrap().to_string());
848            true
849        });
850        let seen_paths = seen_paths.into_inner();
851
852        assert!(seen_paths.contains(&"top.rs".to_string()));
853
854        assert!(seen_paths.contains(&"src/deep.rs".to_string()));
855        assert!(seen_paths.contains(&"src/deep.txt".to_string()));
856
857        cleanup_test_repo(temp_path);
858        Ok(())
859    }
860
861    #[test]
862    fn test_filter_by_predicate_prunes_subtree_when_no_descendants_match() -> Result<(), Error> {
863        let (repo, temp_path) = setup_test_repo();
864
865        let blob = repo.blob(b"content")?;
866
867        let mut sub_builder = repo.treebuilder(None)?;
868        sub_builder.insert("a.txt", blob, 0o100644)?;
869        sub_builder.insert("b.txt", blob, 0o100644)?;
870        let sub_oid = sub_builder.write()?;
871
872        let mut root_builder = repo.treebuilder(None)?;
873        root_builder.insert("keep.rs", blob, 0o100644)?;
874        root_builder.insert("docs", sub_oid, 0o040000)?;
875        let tree = repo.find_tree(root_builder.write()?)?;
876
877        // Only keep .rs files — docs/ subtree should be pruned entirely
878        let filtered = repo.filter_by_predicate(&tree, |_repo, path| {
879            path.extension().is_some_and(|e| e == "rs")
880        })?;
881
882        assert_eq!(filtered.len(), 1);
883        assert!(filtered.get_name("keep.rs").is_some());
884        assert!(filtered.get_name("docs").is_none());
885
886        cleanup_test_repo(temp_path);
887        Ok(())
888    }
889
890    #[test]
891    fn test_filter_by_predicate_empty_tree_stays_empty() -> Result<(), Error> {
892        let (repo, temp_path) = setup_test_repo();
893
894        let tree = repo.find_tree(repo.treebuilder(None)?.write()?)?;
895        assert_eq!(tree.len(), 0);
896
897        let filtered = repo.filter_by_predicate(&tree, |_repo, _path| true)?;
898        assert_eq!(filtered.len(), 0);
899
900        cleanup_test_repo(temp_path);
901        Ok(())
902    }
903}