Skip to main content

git_filter_tree/
lib.rs

1//! Filter Git tree objects by glob patterns, gitattributes, or a custom predicate.
2//!
3//! This crate exposes the [`FilterTree`] trait, implemented on
4//! [`git2::Repository`], which produces a new tree containing only the entries
5//! that match either a set of **glob patterns**, a set of **gitattributes**,
6//! or an arbitrary **predicate function**.
7//! Trees are walked recursively; patterns are matched against full paths from
8//! the tree root.
9//!
10//! It is the plumbing library behind the `git filter-tree` command and the
11//! [`git-rewrite`](https://docs.rs/git-rewrite) porcelain.
12//!
13//! # Filter by Pattern
14//!
15//! ```no_run
16//! use git_filter_tree::FilterTree as _;
17//!
18//! let repo = git2::Repository::open_from_env()?;
19//! let tree = repo.head()?.peel_to_tree()?;
20//!
21//! // Produce a new tree that contains only Rust source files.
22//! let filtered = repo.filter_by_patterns(&tree, &["**/*.rs"])?;
23//! println!("tree sha: {}", filtered.id());
24//! # Ok::<(), Box<dyn std::error::Error>>(())
25//! ```
26//!
27//! A trailing `/` is expanded to `dir/**`, so `"src/"` keeps all files under
28//! `src/`. Multiple patterns are OR-ed together.
29//!
30//! # Filter by Attributes
31//!
32//! ```no_run
33//! use git_filter_tree::FilterTree as _;
34//!
35//! let repo = git2::Repository::open_from_env()?;
36//! let tree = repo.head()?.peel_to_tree()?;
37//!
38//! // Keep only entries that have the `export` attribute set in .gitattributes.
39//! let filtered = repo.filter_by_attributes(&tree, &["export"])?;
40//! println!("tree sha: {}", filtered.id());
41//! # Ok::<(), Box<dyn std::error::Error>>(())
42//! ```
43//!
44//! All listed attributes must be set (AND semantics). Entries with an
45//! attribute explicitly unset (`-export`) or unspecified are excluded.
46//!
47//! # Filter by Predicate
48//!
49//! ```no_run
50//! use git_filter_tree::FilterTree as _;
51//! use std::path::Path;
52//!
53//! let repo = git2::Repository::open_from_env()?;
54//! let tree = repo.head()?.peel_to_tree()?;
55//!
56//! // Keep only files whose path contains "generated".
57//! let filtered = repo.filter_by_predicate(&tree, |_repo, path| {
58//!     path.to_str().is_some_and(|s| s.contains("generated"))
59//! })?;
60//! println!("tree sha: {}", filtered.id());
61//! # Ok::<(), Box<dyn std::error::Error>>(())
62//! ```
63//!
64//! The predicate receives the repository and the full path of each blob entry
65//! relative to the tree root. Subtrees are included as long as at least one
66//! descendant matches.
67pub mod exe;
68use std::path::{Path, PathBuf};
69
70pub use git2::{Error, Repository};
71use globset::GlobSetBuilder;
72
73pub trait FilterTree {
74    /// Filters tree entries by gitattributes-style patterns and returns a new tree with contents
75    /// filtered through the provided patterns. Recursively walks the tree and matches patterns
76    /// against full paths from the tree root.
77    ///
78    /// The `patterns` type is an array of string slices and not a glob type because Git has
79    /// specific glob syntax that differs from standard shell syntax.
80    fn filter_by_patterns<'a>(
81        &'a self,
82        tree: &'a git2::Tree<'a>,
83        patterns: &[&str], // TODO create a `git-glob` crate to handle patterns more gracefully
84    ) -> Result<git2::Tree<'a>, Error>;
85
86    /// Filters tree entries by gitattributes and returns a new tree with contents filtered.
87    /// Recursively walks the tree and matches attributes against full paths from the tree root.
88    ///
89    /// The `attributes` type is an array of string slices. For attributes which have values,
90    /// not simply set or unset, use typical `.gitattributes` syntax.
91    fn filter_by_attributes<'a>(
92        &'a self,
93        tree: &'a git2::Tree<'a>,
94        attributes: &[&str],
95    ) -> Result<git2::Tree<'a>, Error>;
96
97    /// Filters tree entries using an arbitrary predicate and returns a new tree.
98    /// Recursively walks the tree; the predicate is called for each blob entry
99    /// with the repository and the entry's full path relative to the tree root.
100    /// Subtrees are retained as long as at least one descendant matches.
101    fn filter_by_predicate<'a, F>(
102        &'a self,
103        tree: &'a git2::Tree<'a>,
104        predicate: F,
105    ) -> Result<git2::Tree<'a>, Error>
106    where
107        F: Fn(&git2::Repository, &Path) -> bool;
108}
109
110impl FilterTree for git2::Repository {
111    fn filter_by_patterns<'a>(
112        &'a self,
113        tree: &'a git2::Tree<'a>,
114        patterns: &[&str],
115    ) -> Result<git2::Tree<'a>, Error> {
116        if patterns.is_empty() {
117            return Err(Error::from_str("At least one pattern is required"));
118        }
119
120        // Build GlobSet matcher
121        let mut glob_builder = GlobSetBuilder::new();
122        for pattern in patterns {
123            // A trailing `/` means "this directory" in gitattributes/gitignore
124            // semantics.  Normalize to `dir/**` so globset matches all files
125            // under the directory recursively.
126            let normalized: String;
127            let pat = if pattern.ends_with('/') {
128                normalized = format!("{}**", pattern);
129                normalized.as_str()
130            } else {
131                pattern
132            };
133            let glob = globset::Glob::new(pat)
134                .map_err(|e| Error::from_str(&format!("Invalid pattern '{}': {}", pattern, e)))?;
135            glob_builder.add(glob);
136        }
137
138        let matcher = glob_builder
139            .build()
140            .map_err(|e| Error::from_str(&e.to_string()))?;
141
142        // Recursively filter the tree
143        filter_tree_recursive(self, tree, None, &|_repo, path| matcher.is_match(path))
144    }
145
146    fn filter_by_predicate<'a, F>(
147        &'a self,
148        tree: &'a git2::Tree<'a>,
149        predicate: F,
150    ) -> Result<git2::Tree<'a>, Error>
151    where
152        F: Fn(&git2::Repository, &Path) -> bool,
153    {
154        filter_tree_recursive(self, tree, None, &predicate)
155    }
156
157    fn filter_by_attributes<'a>(
158        &'a self,
159        tree: &'a git2::Tree<'a>,
160        attributes: &[&str],
161    ) -> Result<git2::Tree<'a>, Error> {
162        if attributes.is_empty() {
163            return Err(git2::Error::from_str("at least one attribute is required"));
164        }
165
166        filter_tree_recursive(self, tree, None, &|repo, path| {
167            for attribute in attributes {
168                match repo.get_attr(path, attribute, git2::AttrCheckFlags::FILE_THEN_INDEX) {
169                    Ok(Some(value)) => {
170                        let value = git2::AttrValue::from_string(Some(value));
171                        match value {
172                            git2::AttrValue::Unspecified => return false,
173                            git2::AttrValue::False => return false,
174                            _ => {}
175                        }
176                    }
177                    Ok(None) => return false,
178                    Err(_) => return false,
179                }
180            }
181
182            true
183        })
184    }
185}
186
187/// Recursively filters a tree, matching patterns against full paths.
188/// Returns a new tree containing only entries that match or have matching descendants.
189fn filter_tree_recursive<'a, F>(
190    repo: &'a Repository,
191    tree: &'a git2::Tree<'a>,
192    prefix: Option<&Path>,
193    predicate: &F,
194) -> Result<git2::Tree<'a>, Error>
195where
196    F: Fn(&Repository, &Path) -> bool,
197{
198    let mut builder = repo.treebuilder(None)?;
199
200    for entry in tree.iter() {
201        let Some(name) = entry.name() else {
202            return Err(Error::from_str("name has invalid UTF-8"));
203        };
204
205        let full_path = match prefix {
206            Some(subdir) => subdir.join(name),
207            None => PathBuf::from(name.to_string()),
208        };
209
210        match entry.kind() {
211            Some(git2::ObjectType::Blob) => {
212                if predicate(repo, &full_path) {
213                    builder.insert(name, entry.id(), entry.filemode())?;
214                }
215            }
216            Some(git2::ObjectType::Tree) => {
217                let subtree = entry.to_object(repo)?.peel_to_tree()?;
218                let filtered_subtree =
219                    filter_tree_recursive(repo, &subtree, Some(&full_path), predicate)?;
220                if !filtered_subtree.is_empty() {
221                    builder.insert(name, filtered_subtree.id(), entry.filemode())?;
222                }
223            }
224            // Skip submodule commit pointers, tags, and any other unexpected
225            // object types that can appear as tree entries.
226            _ => continue,
227        }
228    }
229
230    let tree_oid = builder.write()?;
231    repo.find_tree(tree_oid)
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237    use std::fs;
238    use std::path::PathBuf;
239
240    fn setup_test_repo() -> (Repository, PathBuf) {
241        let thread_id = std::thread::current().id();
242        let temp_path = std::env::temp_dir().join(format!("git-filter-tree-test-{:?}", thread_id));
243        let _ = fs::remove_dir_all(&temp_path);
244        fs::create_dir_all(&temp_path).unwrap();
245        let repo = Repository::init_bare(&temp_path).unwrap();
246        (repo, temp_path)
247    }
248
249    fn cleanup_test_repo(path: PathBuf) {
250        let _ = fs::remove_dir_all(path);
251    }
252
253    fn create_test_tree<'a>(repo: &'a Repository) -> Result<git2::Tree<'a>, Error> {
254        let mut tree_builder = repo.treebuilder(None)?;
255
256        // Create some blob entries
257        let blob1 = repo.blob(b"content1")?;
258        let blob2 = repo.blob(b"content2")?;
259        let blob3 = repo.blob(b"content3")?;
260
261        tree_builder.insert("file1.txt", blob1, 0o100644)?;
262        tree_builder.insert("file2.rs", blob2, 0o100644)?;
263        tree_builder.insert("test.md", blob3, 0o100644)?;
264
265        let tree_oid = tree_builder.write()?;
266        repo.find_tree(tree_oid)
267    }
268
269    #[test]
270    fn test_filter_single_pattern() -> Result<(), Error> {
271        let (repo, temp_path) = setup_test_repo();
272
273        let tree = create_test_tree(&repo)?;
274        assert_eq!(tree.len(), 3);
275
276        // Filter for .txt files only
277        let filtered = repo.filter_by_patterns(&tree, &["*.txt"])?;
278        assert_eq!(filtered.len(), 1);
279        assert!(filtered.get_name("file1.txt").is_some());
280        assert!(filtered.get_name("file2.rs").is_none());
281        assert!(filtered.get_name("test.md").is_none());
282
283        cleanup_test_repo(temp_path);
284        Ok(())
285    }
286
287    #[test]
288    fn test_filter_multiple_patterns() -> Result<(), Error> {
289        let (repo, temp_path) = setup_test_repo();
290
291        let tree = create_test_tree(&repo)?;
292
293        // Filter for .txt and .rs files
294        let filtered = repo.filter_by_patterns(&tree, &["*.txt", "*.rs"])?;
295        assert_eq!(filtered.len(), 2);
296        assert!(filtered.get_name("file1.txt").is_some());
297        assert!(filtered.get_name("file2.rs").is_some());
298        assert!(filtered.get_name("test.md").is_none());
299
300        cleanup_test_repo(temp_path);
301        Ok(())
302    }
303
304    #[test]
305    fn test_filter_exact_match() -> Result<(), Error> {
306        let (repo, temp_path) = setup_test_repo();
307
308        let tree = create_test_tree(&repo)?;
309
310        // Filter for exact filename
311        let filtered = repo.filter_by_patterns(&tree, &["file1.txt"])?;
312        assert_eq!(filtered.len(), 1);
313        assert!(filtered.get_name("file1.txt").is_some());
314
315        cleanup_test_repo(temp_path);
316        Ok(())
317    }
318
319    #[test]
320    fn test_filter_wildcard_patterns() -> Result<(), Error> {
321        let (repo, temp_path) = setup_test_repo();
322
323        let tree = create_test_tree(&repo)?;
324
325        // Filter with wildcard pattern
326        let filtered = repo.filter_by_patterns(&tree, &["file*"])?;
327        assert_eq!(filtered.len(), 2);
328        assert!(filtered.get_name("file1.txt").is_some());
329        assert!(filtered.get_name("file2.rs").is_some());
330        assert!(filtered.get_name("test.md").is_none());
331
332        cleanup_test_repo(temp_path);
333        Ok(())
334    }
335
336    #[test]
337    fn test_filter_no_matches() -> Result<(), Error> {
338        let (repo, temp_path) = setup_test_repo();
339
340        let tree = create_test_tree(&repo)?;
341
342        // Filter with pattern that matches nothing
343        let filtered = repo.filter_by_patterns(&tree, &["*.nonexistent"])?;
344        assert_eq!(filtered.len(), 0);
345
346        cleanup_test_repo(temp_path);
347        Ok(())
348    }
349
350    #[test]
351    fn test_filter_all_matches() -> Result<(), Error> {
352        let (repo, temp_path) = setup_test_repo();
353
354        let tree = create_test_tree(&repo)?;
355
356        // Filter with pattern that matches everything
357        let filtered = repo.filter_by_patterns(&tree, &["*"])?;
358        assert_eq!(filtered.len(), 3);
359
360        cleanup_test_repo(temp_path);
361        Ok(())
362    }
363
364    #[test]
365    fn test_filter_empty_patterns_error() {
366        let (repo, temp_path) = setup_test_repo();
367
368        let tree = create_test_tree(&repo).unwrap();
369
370        // Empty patterns should return an error
371        let result = repo.filter_by_patterns(&tree, &[]);
372        assert!(result.is_err());
373        assert_eq!(
374            result.unwrap_err().message(),
375            "At least one pattern is required"
376        );
377
378        cleanup_test_repo(temp_path);
379    }
380
381    #[test]
382    fn test_filter_invalid_pattern_error() {
383        let (repo, temp_path) = setup_test_repo();
384
385        let tree = create_test_tree(&repo).unwrap();
386
387        // Invalid glob pattern should return an error
388        let result = repo.filter_by_patterns(&tree, &["[invalid"]);
389        assert!(result.is_err());
390
391        cleanup_test_repo(temp_path);
392    }
393
394    #[test]
395    fn test_filter_with_nested_tree() -> Result<(), Error> {
396        let (repo, temp_path) = setup_test_repo();
397
398        let mut tree_builder = repo.treebuilder(None)?;
399
400        // Create a nested tree
401        let mut subtree_builder = repo.treebuilder(None)?;
402        let blob = repo.blob(b"nested content")?;
403        subtree_builder.insert("nested.txt", blob, 0o100644)?;
404        let subtree_oid = subtree_builder.write()?;
405
406        // Add files and subtree to main tree
407        let blob1 = repo.blob(b"content1")?;
408        tree_builder.insert("file1.txt", blob1, 0o100644)?;
409        tree_builder.insert("subdir", subtree_oid, 0o040000)?;
410
411        let tree_oid = tree_builder.write()?;
412        let tree = repo.find_tree(tree_oid)?;
413
414        // Filter - should keep both file and directory
415        let filtered = repo.filter_by_patterns(&tree, &["*"])?;
416        assert_eq!(filtered.len(), 2);
417
418        cleanup_test_repo(temp_path);
419        Ok(())
420    }
421
422    #[test]
423    fn test_filter_preserves_empty_tree() -> Result<(), Error> {
424        let (repo, temp_path) = setup_test_repo();
425
426        // Create an empty tree
427        let tree_builder = repo.treebuilder(None)?;
428        let tree_oid = tree_builder.write()?;
429        let tree = repo.find_tree(tree_oid)?;
430
431        assert_eq!(tree.len(), 0);
432
433        // Filter empty tree
434        let filtered = repo.filter_by_patterns(&tree, &["*"])?;
435        assert_eq!(filtered.len(), 0);
436
437        cleanup_test_repo(temp_path);
438        Ok(())
439    }
440
441    #[test]
442    fn test_filter_case_sensitive() -> Result<(), Error> {
443        let (repo, temp_path) = setup_test_repo();
444
445        let mut tree_builder = repo.treebuilder(None)?;
446        let blob1 = repo.blob(b"content1")?;
447        let blob2 = repo.blob(b"content2")?;
448
449        tree_builder.insert("File.txt", blob1, 0o100644)?;
450        tree_builder.insert("file.txt", blob2, 0o100644)?;
451
452        let tree_oid = tree_builder.write()?;
453        let tree = repo.find_tree(tree_oid)?;
454
455        // Filter with exact case match
456        let filtered = repo.filter_by_patterns(&tree, &["file.txt"])?;
457        assert_eq!(filtered.len(), 1);
458        assert!(filtered.get_name("file.txt").is_some());
459
460        cleanup_test_repo(temp_path);
461        Ok(())
462    }
463
464    #[test]
465    fn test_filter_complex_patterns() -> Result<(), Error> {
466        let (repo, temp_path) = setup_test_repo();
467
468        let mut tree_builder = repo.treebuilder(None)?;
469        let blob = repo.blob(b"content")?;
470
471        tree_builder.insert("test1.txt", blob, 0o100644)?;
472        tree_builder.insert("test2.rs", blob, 0o100644)?;
473        tree_builder.insert("data.json", blob, 0o100644)?;
474        tree_builder.insert("README.md", blob, 0o100644)?;
475
476        let tree_oid = tree_builder.write()?;
477        let tree = repo.find_tree(tree_oid)?;
478
479        // Multiple patterns with different wildcards
480        let filtered = repo.filter_by_patterns(&tree, &["test*", "*.md"])?;
481        assert_eq!(filtered.len(), 3);
482        assert!(filtered.get_name("test1.txt").is_some());
483        assert!(filtered.get_name("test2.rs").is_some());
484        assert!(filtered.get_name("README.md").is_some());
485        assert!(filtered.get_name("data.json").is_none());
486
487        cleanup_test_repo(temp_path);
488        Ok(())
489    }
490
491    #[test]
492    fn test_filter_trailing_slash_matches_directory_contents() -> Result<(), Error> {
493        let (repo, temp_path) = setup_test_repo();
494
495        // Build a tree with a subdirectory: pyo3/Cargo.toml, pyo3/src/lib.rs,
496        // and a top-level file that should NOT match.
497        let blob = repo.blob(b"content")?;
498
499        let mut src_builder = repo.treebuilder(None)?;
500        src_builder.insert("lib.rs", blob, 0o100644)?;
501        let src_oid = src_builder.write()?;
502
503        let mut pyo3_builder = repo.treebuilder(None)?;
504        pyo3_builder.insert("Cargo.toml", blob, 0o100644)?;
505        pyo3_builder.insert("src", src_oid, 0o040000)?;
506        let pyo3_oid = pyo3_builder.write()?;
507
508        let mut root_builder = repo.treebuilder(None)?;
509        root_builder.insert("pyo3", pyo3_oid, 0o040000)?;
510        root_builder.insert("README.md", blob, 0o100644)?;
511        let root_oid = root_builder.write()?;
512        let tree = repo.find_tree(root_oid)?;
513
514        // "pyo3/" (trailing slash) must match all files under pyo3/.
515        let filtered = repo.filter_by_patterns(&tree, &["pyo3/"])?;
516        assert_eq!(filtered.len(), 1, "only the pyo3 dir should remain");
517        assert!(filtered.get_name("pyo3").is_some());
518        assert!(filtered.get_name("README.md").is_none());
519
520        // The pyo3 subtree itself must retain both entries.
521        let pyo3_entry = filtered.get_name("pyo3").unwrap();
522        let pyo3_tree = repo.find_tree(pyo3_entry.id())?;
523        assert!(pyo3_tree.get_name("Cargo.toml").is_some());
524        assert!(pyo3_tree.get_name("src").is_some());
525
526        cleanup_test_repo(temp_path);
527        Ok(())
528    }
529
530    // -----------------------------------------------------------------------
531    // Helpers and tests for filter_by_attributes
532    // -----------------------------------------------------------------------
533
534    /// Initializes a non-bare repository so that `.gitattributes` written to
535    /// its working directory are picked up by `repo.get_attr(…)`.
536    fn setup_attr_test_repo() -> (Repository, PathBuf) {
537        let thread_id = std::thread::current().id();
538        let temp_path = std::env::temp_dir().join(format!("git-filter-attr-test-{:?}", thread_id));
539        let _ = fs::remove_dir_all(&temp_path);
540        fs::create_dir_all(&temp_path).unwrap();
541        let repo = Repository::init(&temp_path).unwrap();
542        (repo, temp_path)
543    }
544
545    fn write_gitattributes(repo_path: &Path, content: &str) {
546        fs::write(repo_path.join(".gitattributes"), content).unwrap();
547    }
548
549    // --- filter_by_attributes: error cases ---------------------------------
550
551    #[test]
552    fn test_filter_by_attributes_empty_returns_error() {
553        let (repo, temp_path) = setup_attr_test_repo();
554        write_gitattributes(&temp_path, "");
555
556        let tree = create_test_tree(&repo).unwrap();
557        let result = repo.filter_by_attributes(&tree, &[]);
558        assert!(result.is_err());
559        assert_eq!(
560            result.unwrap_err().message(),
561            "at least one attribute is required"
562        );
563
564        cleanup_test_repo(temp_path);
565    }
566
567    // --- filter_by_attributes: single attribute ----------------------------
568
569    #[test]
570    fn test_filter_by_attributes_set_attribute_includes_matching_files() -> Result<(), Error> {
571        let (repo, temp_path) = setup_attr_test_repo();
572        // Only .txt files carry the export-ignore attribute.
573        write_gitattributes(&temp_path, "*.txt export-ignore\n");
574
575        let blob = repo.blob(b"content")?;
576        let mut builder = repo.treebuilder(None)?;
577        builder.insert("readme.txt", blob, 0o100644)?;
578        builder.insert("main.rs", blob, 0o100644)?;
579        builder.insert("data.json", blob, 0o100644)?;
580        let tree = repo.find_tree(builder.write()?)?;
581
582        let filtered = repo.filter_by_attributes(&tree, &["export-ignore"])?;
583        assert_eq!(filtered.len(), 1);
584        assert!(filtered.get_name("readme.txt").is_some());
585        assert!(filtered.get_name("main.rs").is_none());
586        assert!(filtered.get_name("data.json").is_none());
587
588        cleanup_test_repo(temp_path);
589        Ok(())
590    }
591
592    #[test]
593    fn test_filter_by_attributes_explicitly_unset_attribute_excluded() -> Result<(), Error> {
594        let (repo, temp_path) = setup_attr_test_repo();
595        // .txt gets the attribute; .md explicitly has it unset with `-`.
596        write_gitattributes(&temp_path, "*.txt custom-attr\n*.md -custom-attr\n");
597
598        let blob = repo.blob(b"content")?;
599        let mut builder = repo.treebuilder(None)?;
600        builder.insert("readme.txt", blob, 0o100644)?;
601        builder.insert("notes.md", blob, 0o100644)?;
602        builder.insert("main.rs", blob, 0o100644)?;
603        let tree = repo.find_tree(builder.write()?)?;
604
605        let filtered = repo.filter_by_attributes(&tree, &["custom-attr"])?;
606        // .txt is set, .md is explicitly unset, .rs is unspecified
607        assert_eq!(filtered.len(), 1);
608        assert!(filtered.get_name("readme.txt").is_some());
609        assert!(filtered.get_name("notes.md").is_none());
610        assert!(filtered.get_name("main.rs").is_none());
611
612        cleanup_test_repo(temp_path);
613        Ok(())
614    }
615
616    #[test]
617    fn test_filter_by_attributes_no_attributes_set_returns_empty_tree() -> Result<(), Error> {
618        let (repo, temp_path) = setup_attr_test_repo();
619        // Empty .gitattributes — nothing is attributed.
620        write_gitattributes(&temp_path, "");
621
622        let blob = repo.blob(b"content")?;
623        let mut builder = repo.treebuilder(None)?;
624        builder.insert("file.txt", blob, 0o100644)?;
625        builder.insert("file.rs", blob, 0o100644)?;
626        let tree = repo.find_tree(builder.write()?)?;
627
628        let filtered = repo.filter_by_attributes(&tree, &["export-ignore"])?;
629        assert_eq!(filtered.len(), 0);
630
631        cleanup_test_repo(temp_path);
632        Ok(())
633    }
634
635    #[test]
636    fn test_filter_by_attributes_multiple_attributes_all_required() -> Result<(), Error> {
637        let (repo, temp_path) = setup_attr_test_repo();
638        // .txt has both attributes; .rs has only one.
639        write_gitattributes(&temp_path, "*.txt attr-a attr-b\n*.rs attr-a\n");
640
641        let blob = repo.blob(b"content")?;
642        let mut builder = repo.treebuilder(None)?;
643        builder.insert("file.txt", blob, 0o100644)?;
644        builder.insert("file.rs", blob, 0o100644)?;
645        builder.insert("file.md", blob, 0o100644)?;
646        let tree = repo.find_tree(builder.write()?)?;
647
648        // Both attributes must be present for a file to be included.
649        let filtered = repo.filter_by_attributes(&tree, &["attr-a", "attr-b"])?;
650        assert_eq!(filtered.len(), 1);
651        assert!(filtered.get_name("file.txt").is_some());
652        assert!(filtered.get_name("file.rs").is_none());
653        assert!(filtered.get_name("file.md").is_none());
654
655        cleanup_test_repo(temp_path);
656        Ok(())
657    }
658
659    #[test]
660    fn test_filter_by_attributes_attribute_with_value() -> Result<(), Error> {
661        let (repo, temp_path) = setup_attr_test_repo();
662        // linguist-language is set to a string value on .rs files.
663        write_gitattributes(&temp_path, "*.rs linguist-language=Rust\n");
664
665        let blob = repo.blob(b"content")?;
666        let mut builder = repo.treebuilder(None)?;
667        builder.insert("main.rs", blob, 0o100644)?;
668        builder.insert("main.py", blob, 0o100644)?;
669        let tree = repo.find_tree(builder.write()?)?;
670
671        // An attribute with any value (including a string) counts as "set".
672        let filtered = repo.filter_by_attributes(&tree, &["linguist-language"])?;
673        assert_eq!(filtered.len(), 1);
674        assert!(filtered.get_name("main.rs").is_some());
675        assert!(filtered.get_name("main.py").is_none());
676
677        cleanup_test_repo(temp_path);
678        Ok(())
679    }
680
681    #[test]
682    fn test_filter_by_attributes_all_files_match() -> Result<(), Error> {
683        let (repo, temp_path) = setup_attr_test_repo();
684        // Wildcard rule sets the attribute on every file.
685        write_gitattributes(&temp_path, "* generated\n");
686
687        let blob = repo.blob(b"content")?;
688        let mut builder = repo.treebuilder(None)?;
689        builder.insert("a.txt", blob, 0o100644)?;
690        builder.insert("b.rs", blob, 0o100644)?;
691        builder.insert("c.md", blob, 0o100644)?;
692        let tree = repo.find_tree(builder.write()?)?;
693
694        let filtered = repo.filter_by_attributes(&tree, &["generated"])?;
695        assert_eq!(filtered.len(), 3);
696
697        cleanup_test_repo(temp_path);
698        Ok(())
699    }
700
701    #[test]
702    fn test_filter_by_attributes_nested_tree_filters_recursively() -> Result<(), Error> {
703        let (repo, temp_path) = setup_attr_test_repo();
704        // Only .proto files carry the attribute.
705        write_gitattributes(&temp_path, "*.proto linguist-generated\n");
706
707        let blob = repo.blob(b"content")?;
708
709        // src/api.proto and src/main.rs
710        let mut src_builder = repo.treebuilder(None)?;
711        src_builder.insert("api.proto", blob, 0o100644)?;
712        src_builder.insert("main.rs", blob, 0o100644)?;
713        let src_oid = src_builder.write()?;
714
715        let mut root_builder = repo.treebuilder(None)?;
716        root_builder.insert("src", src_oid, 0o040000)?;
717        root_builder.insert("README.md", blob, 0o100644)?;
718        let tree = repo.find_tree(root_builder.write()?)?;
719
720        let filtered = repo.filter_by_attributes(&tree, &["linguist-generated"])?;
721
722        // Top-level README.md must be gone; src/ must survive because it has
723        // at least one matching descendant.
724        assert_eq!(filtered.len(), 1);
725        assert!(filtered.get_name("src").is_some());
726        assert!(filtered.get_name("README.md").is_none());
727
728        let src_entry = filtered.get_name("src").unwrap();
729        let src_tree = repo.find_tree(src_entry.id())?;
730        assert_eq!(src_tree.len(), 1);
731        assert!(src_tree.get_name("api.proto").is_some());
732        assert!(src_tree.get_name("main.rs").is_none());
733
734        cleanup_test_repo(temp_path);
735        Ok(())
736    }
737
738    #[test]
739    fn test_filter_by_attributes_empty_tree_stays_empty() -> Result<(), Error> {
740        let (repo, temp_path) = setup_attr_test_repo();
741        write_gitattributes(&temp_path, "* export-ignore\n");
742
743        let tree = repo.find_tree(repo.treebuilder(None)?.write()?)?;
744        assert_eq!(tree.len(), 0);
745
746        let filtered = repo.filter_by_attributes(&tree, &["export-ignore"])?;
747        assert_eq!(filtered.len(), 0);
748
749        cleanup_test_repo(temp_path);
750        Ok(())
751    }
752
753    #[test]
754    fn test_filter_by_attributes_subdirectory_excluded_when_all_children_unmatched()
755    -> Result<(), Error> {
756        let (repo, temp_path) = setup_attr_test_repo();
757        // Only .txt files match; the `docs/` sub-tree contains only .md files.
758        write_gitattributes(&temp_path, "*.txt export-ignore\n");
759
760        let blob = repo.blob(b"content")?;
761
762        let mut docs_builder = repo.treebuilder(None)?;
763        docs_builder.insert("guide.md", blob, 0o100644)?;
764        docs_builder.insert("api.md", blob, 0o100644)?;
765        let docs_oid = docs_builder.write()?;
766
767        let mut root_builder = repo.treebuilder(None)?;
768        root_builder.insert("docs", docs_oid, 0o040000)?;
769        root_builder.insert("notes.txt", blob, 0o100644)?;
770        let tree = repo.find_tree(root_builder.write()?)?;
771
772        let filtered = repo.filter_by_attributes(&tree, &["export-ignore"])?;
773
774        // `docs/` should be pruned entirely because none of its children matched.
775        assert_eq!(filtered.len(), 1);
776        assert!(filtered.get_name("notes.txt").is_some());
777        assert!(filtered.get_name("docs").is_none());
778
779        cleanup_test_repo(temp_path);
780        Ok(())
781    }
782
783    #[test]
784    fn test_filter_by_predicate_always_false_returns_empty_tree() -> Result<(), Error> {
785        let (repo, temp_path) = setup_test_repo();
786        let tree = create_test_tree(&repo)?;
787
788        let filtered = repo.filter_by_predicate(&tree, |_repo, _path| false)?;
789        assert_eq!(filtered.len(), 0);
790
791        cleanup_test_repo(temp_path);
792        Ok(())
793    }
794
795    #[test]
796    fn test_filter_by_predicate_always_true_returns_full_tree() -> Result<(), Error> {
797        let (repo, temp_path) = setup_test_repo();
798        let tree = create_test_tree(&repo)?;
799
800        let filtered = repo.filter_by_predicate(&tree, |_repo, _path| true)?;
801        assert_eq!(filtered.len(), tree.len());
802
803        cleanup_test_repo(temp_path);
804        Ok(())
805    }
806
807    #[test]
808    fn test_filter_by_predicate_matches_on_path() -> Result<(), Error> {
809        let (repo, temp_path) = setup_test_repo();
810        let tree = create_test_tree(&repo)?;
811
812        // Keep only entries whose path contains "file"
813        let filtered = repo.filter_by_predicate(&tree, |_repo, path| {
814            path.to_str().is_some_and(|s| s.contains("file"))
815        })?;
816
817        assert_eq!(filtered.len(), 2);
818        assert!(filtered.get_name("file1.txt").is_some());
819        assert!(filtered.get_name("file2.rs").is_some());
820        assert!(filtered.get_name("test.md").is_none());
821
822        cleanup_test_repo(temp_path);
823        Ok(())
824    }
825
826    #[test]
827    fn test_filter_by_predicate_receives_full_nested_path() -> Result<(), Error> {
828        let (repo, temp_path) = setup_test_repo();
829
830        let blob = repo.blob(b"content")?;
831
832        let mut sub_builder = repo.treebuilder(None)?;
833        sub_builder.insert("deep.rs", blob, 0o100644)?;
834        sub_builder.insert("deep.txt", blob, 0o100644)?;
835        let sub_oid = sub_builder.write()?;
836
837        let mut root_builder = repo.treebuilder(None)?;
838        root_builder.insert("top.rs", blob, 0o100644)?;
839        root_builder.insert("src", sub_oid, 0o040000)?;
840        let tree = repo.find_tree(root_builder.write()?)?;
841
842        let seen_paths = std::cell::RefCell::new(Vec::new());
843        let _ = repo.filter_by_predicate(&tree, |_repo, path| {
844            seen_paths
845                .borrow_mut()
846                .push(path.to_str().unwrap().to_string());
847            true
848        });
849        let seen_paths = seen_paths.into_inner();
850
851        assert!(seen_paths.contains(&"top.rs".to_string()));
852
853        assert!(
854            seen_paths.contains(&("src".to_string() + std::path::MAIN_SEPARATOR_STR + "deep.rs"))
855        );
856        assert!(
857            seen_paths.contains(&("src".to_string() + std::path::MAIN_SEPARATOR_STR + "deep.txt"))
858        );
859
860        cleanup_test_repo(temp_path);
861        Ok(())
862    }
863
864    #[test]
865    fn test_filter_by_predicate_prunes_subtree_when_no_descendants_match() -> Result<(), Error> {
866        let (repo, temp_path) = setup_test_repo();
867
868        let blob = repo.blob(b"content")?;
869
870        let mut sub_builder = repo.treebuilder(None)?;
871        sub_builder.insert("a.txt", blob, 0o100644)?;
872        sub_builder.insert("b.txt", blob, 0o100644)?;
873        let sub_oid = sub_builder.write()?;
874
875        let mut root_builder = repo.treebuilder(None)?;
876        root_builder.insert("keep.rs", blob, 0o100644)?;
877        root_builder.insert("docs", sub_oid, 0o040000)?;
878        let tree = repo.find_tree(root_builder.write()?)?;
879
880        // Only keep .rs files — docs/ subtree should be pruned entirely
881        let filtered = repo.filter_by_predicate(&tree, |_repo, path| {
882            path.extension().is_some_and(|e| e == "rs")
883        })?;
884
885        assert_eq!(filtered.len(), 1);
886        assert!(filtered.get_name("keep.rs").is_some());
887        assert!(filtered.get_name("docs").is_none());
888
889        cleanup_test_repo(temp_path);
890        Ok(())
891    }
892
893    #[test]
894    fn test_filter_by_predicate_empty_tree_stays_empty() -> Result<(), Error> {
895        let (repo, temp_path) = setup_test_repo();
896
897        let tree = repo.find_tree(repo.treebuilder(None)?.write()?)?;
898        assert_eq!(tree.len(), 0);
899
900        let filtered = repo.filter_by_predicate(&tree, |_repo, _path| true)?;
901        assert_eq!(filtered.len(), 0);
902
903        cleanup_test_repo(temp_path);
904        Ok(())
905    }
906}