composition_cli/core/
get_all_entries.rs

1use ignore::gitignore::{Gitignore, GitignoreBuilder};
2use rayon::prelude::*;
3use std::{
4    collections::{HashMap, HashSet},
5    path::{Path, PathBuf},
6};
7use walkdir::{DirEntry, WalkDir};
8
9use crate::context::AppContext;
10
11pub fn get_all_entries(app_context: &AppContext) -> Vec<DirEntry> {
12    let tracked_extensions: HashSet<String> = app_context
13        .config
14        .tracked
15        .iter()
16        .flat_map(|t| t.extensions.iter().map(|e| e.to_lowercase()))
17        .collect();
18
19    let ignored_files: HashSet<String> = app_context.config.ignored_files.iter().cloned().collect();
20
21    let ignored_directories: HashSet<String> = app_context
22        .config
23        .ignored_directories
24        .iter()
25        .cloned()
26        .collect();
27
28    // canonicalize root once
29    let canonical_root = app_context
30        .path
31        .canonicalize()
32        .unwrap_or_else(|_| app_context.path.clone());
33
34    // build gitignore cache first
35    let gitignore_cache = if app_context.config.respect_gitignore {
36        build_gitignore_cache(
37            &app_context.path,
38            &canonical_root,
39            app_context.config.ignore_dotfolders,
40            &ignored_directories,
41        )
42    } else {
43        HashMap::new()
44    };
45
46    // collect all entries (sequential)
47    let all_entries = collect_directory_entries(
48        &app_context.path,
49        app_context.config.ignore_dotfolders,
50        &ignored_directories,
51    );
52
53    // filter entries in parallel
54    filter_entries_parallel(
55        all_entries,
56        app_context,
57        &canonical_root,
58        &tracked_extensions,
59        &ignored_files,
60        &gitignore_cache,
61    )
62}
63
64fn should_traverse_directory(
65    entry: &DirEntry,
66    root: &Path,
67    ignore_dotfolders: bool,
68    ignored_directories: &HashSet<String>,
69) -> bool {
70    // check directory ignore settings
71    if !entry.file_type().is_dir() {
72        return true;
73    }
74
75    if entry.path() == root {
76        return true;
77    }
78
79    let file_name = entry.file_name().to_string_lossy();
80
81    if ignore_dotfolders && file_name.starts_with('.') {
82        return false;
83    }
84
85    if ignored_directories.contains(file_name.as_ref()) {
86        return false;
87    }
88
89    true
90}
91
92fn collect_directory_entries(
93    root: &Path,
94    ignore_dotfolders: bool,
95    ignored_directories: &HashSet<String>,
96) -> Vec<DirEntry> {
97    WalkDir::new(root)
98        .follow_links(false)
99        .into_iter()
100        .filter_entry(|e| {
101            should_traverse_directory(e, root, ignore_dotfolders, ignored_directories)
102        })
103        .filter_map(Result::ok)
104        .collect()
105}
106
107fn filter_entries_parallel(
108    entries: Vec<DirEntry>,
109    app_context: &AppContext,
110    canonical_root: &Path,
111    tracked_extensions: &HashSet<String>,
112    ignored_files: &HashSet<String>,
113    gitignore_cache: &HashMap<PathBuf, Gitignore>,
114) -> Vec<DirEntry> {
115    entries
116        .into_par_iter()
117        .filter(|entry| {
118            entry.file_type().is_file()
119                && filter_file(
120                    entry,
121                    app_context,
122                    canonical_root,
123                    tracked_extensions,
124                    ignored_files,
125                    gitignore_cache,
126                )
127        })
128        .collect()
129}
130
131fn build_gitignore_cache(
132    root: &Path,
133    canonical_root: &Path,
134    ignore_dotfolders: bool,
135    ignored_directories: &HashSet<String>,
136) -> HashMap<PathBuf, Gitignore> {
137    let mut cache = HashMap::new();
138
139    // root gitignore
140    if let Ok(gitignore) = build_gitignore(root) {
141        cache.insert(canonical_root.to_path_buf(), gitignore);
142    }
143
144    // walk and build cache for all directories
145    let directories = collect_directory_entries(root, ignore_dotfolders, ignored_directories)
146        .into_iter()
147        .filter(|e| e.file_type().is_dir() && e.path() != root);
148
149    for dir_entry in directories {
150        // infer canonical path by appending relative path to canonical root
151        if let Some(canonical_dir) = infer_canonical_path(dir_entry.path(), root, canonical_root) {
152            if let Ok(gitignore) = build_gitignore(dir_entry.path()) {
153                cache.insert(canonical_dir, gitignore);
154            }
155        }
156    }
157
158    cache
159}
160
161fn infer_canonical_path(path: &Path, root: &Path, canonical_root: &Path) -> Option<PathBuf> {
162    path.strip_prefix(root)
163        .ok()
164        .map(|relative| canonical_root.join(relative))
165}
166
167fn filter_file(
168    entry: &DirEntry,
169    app_context: &AppContext,
170    canonical_root: &Path,
171    tracked_extensions: &HashSet<String>,
172    ignored_files: &HashSet<String>,
173    gitignore_cache: &HashMap<PathBuf, Gitignore>,
174) -> bool {
175    let file_name = entry.file_name().to_string_lossy();
176
177    if app_context.config.ignore_dotfiles && file_name.starts_with('.') {
178        return false;
179    }
180
181    if ignored_files.contains(file_name.as_ref()) {
182        return false;
183    }
184
185    // check extension
186    if !has_tracked_extension(entry.path(), tracked_extensions) {
187        return false;
188    }
189
190    // check gitignore
191    if app_context.config.respect_gitignore {
192        if is_ignored_by_git(entry, &app_context.path, canonical_root, gitignore_cache) {
193            return false;
194        }
195    }
196
197    true
198}
199
200fn has_tracked_extension(path: &Path, tracked_extensions: &HashSet<String>) -> bool {
201    path.extension()
202        .map(|ext| tracked_extensions.contains(&ext.to_string_lossy().to_lowercase()))
203        .unwrap_or(false)
204}
205
206fn is_ignored_by_git(
207    entry: &DirEntry,
208    root: &Path,
209    canonical_root: &Path,
210    gitignore_cache: &HashMap<PathBuf, Gitignore>,
211) -> bool {
212    // infer canonical_path
213    let canonical_path = match infer_canonical_path(entry.path(), root, canonical_root) {
214        Some(path) => path,
215        None => return false,
216    };
217
218    let is_dir = entry.file_type().is_dir();
219
220    // for files, check gitignores up to and including the parent directory
221    // for directories, check up to the directory itself
222    let check_path = if is_dir {
223        &canonical_path
224    } else {
225        canonical_path.parent().unwrap_or(&canonical_path)
226    };
227
228    let gitignore_stack = build_gitignore_stack(check_path, canonical_root, gitignore_cache);
229    apply_gitignore_rules(&canonical_path, is_dir, &gitignore_stack)
230}
231
232fn build_gitignore_stack<'a>(
233    check_path: &'a Path,
234    canonical_root: &Path,
235    gitignore_cache: &'a HashMap<PathBuf, Gitignore>,
236) -> Vec<(&'a Path, &'a Gitignore)> {
237    let mut stack = Vec::new();
238
239    for ancestor in check_path.ancestors() {
240        // stop if above root
241        if !ancestor.starts_with(canonical_root) {
242            break;
243        }
244
245        if let Some(gitignore) = gitignore_cache.get(ancestor) {
246            stack.push((ancestor.as_ref(), gitignore));
247        }
248    }
249
250    // reverse to check from parent to child
251    stack.reverse();
252    stack
253}
254
255fn apply_gitignore_rules(
256    canonical_path: &Path,
257    is_dir: bool,
258    gitignore_stack: &[(&Path, &Gitignore)],
259) -> bool {
260    let mut final_decision = None;
261
262    for (gitignore_dir, gitignore) in gitignore_stack {
263        // calculate path relative to the gitignore directory
264        let relative_path = match canonical_path.strip_prefix(gitignore_dir) {
265            Ok(rel) => rel,
266            Err(_) => continue,
267        };
268
269        let matched = gitignore.matched_path_or_any_parents(relative_path, is_dir);
270
271        if matched.is_ignore() {
272            final_decision = Some(true);
273        } else if matched.is_whitelist() {
274            final_decision = Some(false);
275        }
276    }
277
278    final_decision.unwrap_or(false)
279}
280
281fn build_gitignore(dir: &Path) -> Result<Gitignore, ignore::Error> {
282    let mut builder = GitignoreBuilder::new(dir);
283    let gitignore_path = dir.join(".gitignore");
284
285    if gitignore_path.exists() {
286        builder.add(gitignore_path);
287    }
288
289    builder.build()
290}