1use std::path::{Path, PathBuf};
12
13use ignore::gitignore::{Gitignore, GitignoreBuilder};
14use ignore::WalkBuilder;
15
16use crate::config::IgnoreConfig;
17use crate::error::Result;
18
19pub struct FileFilter {
21 config: IgnoreConfig,
22 gitignore: Option<Gitignore>,
23 custom_patterns: Vec<glob::Pattern>,
24 root_path: Option<PathBuf>,
26 excluded_files: Vec<PathBuf>,
28 excluded_directory_paths: Vec<PathBuf>,
30}
31
32impl FileFilter {
33 pub fn new(config: IgnoreConfig) -> Self {
35 let excluded_files: Vec<PathBuf> = config.excluded_files.iter()
37 .map(|p| Self::normalize_path(p))
38 .collect();
39 let excluded_directory_paths: Vec<PathBuf> = config.excluded_directories.iter()
40 .map(|p| Self::normalize_path(p))
41 .collect();
42
43 Self {
44 config,
45 gitignore: None,
46 custom_patterns: Vec::new(),
47 root_path: None,
48 excluded_files,
49 excluded_directory_paths,
50 }
51 }
52
53 pub fn for_workspace(root: &Path, config: IgnoreConfig) -> Result<Self> {
55 let mut filter = Self::new(config);
56 filter.root_path = Some(root.to_path_buf());
57 filter.load_gitignore(root)?;
58 filter.compile_patterns()?;
59 Ok(filter)
60 }
61
62 fn normalize_path(path: &Path) -> PathBuf {
65 let path_str = path.to_string_lossy().replace('\\', "/");
66 let trimmed = path_str.trim_end_matches('/');
67 PathBuf::from(trimmed)
68 }
69
70 fn get_relative_path(&self, path: &Path) -> PathBuf {
72 let relative = if let Some(ref root) = self.root_path {
73 path.strip_prefix(root).unwrap_or(path)
74 } else {
75 path
76 };
77 Self::normalize_path(relative)
78 }
79
80 fn load_gitignore(&mut self, root: &Path) -> Result<()> {
82 if !self.config.use_gitignore {
83 return Ok(());
84 }
85
86 let gitignore_path = root.join(".gitignore");
87 if gitignore_path.exists() {
88 let mut builder = GitignoreBuilder::new(root);
89 builder.add(&gitignore_path);
90
91 if let Some(home) = dirs::home_dir() {
93 let global_gitignore = home.join(".gitignore_global");
94 if global_gitignore.exists() {
95 builder.add(&global_gitignore);
96 }
97 }
98
99 self.gitignore = builder.build().ok();
100 }
101
102 Ok(())
103 }
104
105 fn compile_patterns(&mut self) -> Result<()> {
107 for pattern in &self.config.patterns {
108 if let Ok(compiled) = glob::Pattern::new(pattern) {
109 self.custom_patterns.push(compiled);
110 }
111 }
112 Ok(())
113 }
114
115 pub fn should_index(&self, path: &Path, file_size: u64) -> bool {
117 if file_size > self.config.max_file_size {
119 return false;
120 }
121
122 let filename = path.file_name()
124 .and_then(|n| n.to_str())
125 .map(|s| s.to_lowercase())
126 .unwrap_or_default();
127
128 const IGNORED_FILENAMES: &[&str] = &[
131 "pnpm-lock.yaml", "package-lock.json", "yarn.lock", "bun.lockb",
132 "cargo.lock", "gemfile.lock", "composer.lock", "poetry.lock",
133 "pipfile.lock", "pubspec.lock", "packages.lock.json",
134 "shrinkwrap.yaml", "npm-shrinkwrap.json",
135 ".ds_store", "thumbs.db", "desktop.ini",
136 ".gitignore", ".gitattributes", ".gitmodules",
137 ".npmrc", ".yarnrc", ".nvmrc", ".node-version",
138 ".env", ".env.local", ".env.development", ".env.production",
139 ];
140
141 if IGNORED_FILENAMES.iter().any(|&f| filename == f) {
142 return false;
143 }
144
145 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
147 let ext_lower = ext.to_lowercase();
148 if self.config.ignored_extensions.iter().any(|e| e == &ext_lower) {
149 return false;
150 }
151 }
152
153 for component in path.components() {
155 if let Some(name) = component.as_os_str().to_str() {
156 if self.config.ignored_directories.iter().any(|d| d == name) {
157 return false;
158 }
159 }
160 }
161
162 if let Some(ref gitignore) = self.gitignore {
164 if gitignore.matched(path, path.is_dir()).is_ignore() {
165 return false;
166 }
167 }
168
169 let path_str = path.to_string_lossy();
171 for pattern in &self.custom_patterns {
172 if pattern.matches(&path_str) {
173 return false;
174 }
175 }
176
177 let relative_path = self.get_relative_path(path);
179 if self.excluded_files.iter().any(|excluded| {
180 *excluded == relative_path
181 }) {
182 return false;
183 }
184
185 for excluded_dir in &self.excluded_directory_paths {
187 if relative_path.starts_with(excluded_dir) {
188 return false;
189 }
190 }
191
192 true
193 }
194
195 pub fn should_traverse(&self, path: &Path) -> bool {
197 if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
199 if self.config.ignored_directories.iter().any(|d| d == name) {
200 return false;
201 }
202 }
203
204 if let Some(ref gitignore) = self.gitignore {
206 if gitignore.matched(path, true).is_ignore() {
207 return false;
208 }
209 }
210
211 let relative_path = self.get_relative_path(path);
213 for excluded_dir in &self.excluded_directory_paths {
214 if relative_path == *excluded_dir || relative_path.starts_with(excluded_dir) {
215 return false;
216 }
217 }
218
219 true
220 }
221}
222
223pub struct FileWalker {
225 filter: FileFilter,
226 root: PathBuf,
227}
228
229impl FileWalker {
230 pub fn new(root: PathBuf, config: IgnoreConfig) -> Result<Self> {
232 let filter = FileFilter::for_workspace(&root, config)?;
233 Ok(Self { filter, root })
234 }
235
236 pub fn walk(&self) -> Result<Vec<PathBuf>> {
238 let mut files = Vec::new();
239
240 let walker = WalkBuilder::new(&self.root)
241 .hidden(false) .git_ignore(self.filter.config.use_gitignore)
243 .git_global(self.filter.config.use_gitignore)
244 .git_exclude(self.filter.config.use_gitignore)
245 .ignore(self.filter.config.use_ignore_files)
246 .build();
247
248 for entry in walker.filter_map(|e| e.ok()) {
249 let path = entry.path();
250
251 if path.is_dir() {
253 continue;
254 }
255
256 let metadata = match std::fs::metadata(path) {
258 Ok(m) => m,
259 Err(_) => continue,
260 };
261
262 if self.filter.should_index(path, metadata.len()) {
264 files.push(path.to_path_buf());
265 }
266 }
267
268 Ok(files)
269 }
270
271 pub fn walk_with_callback<F>(&self, mut callback: F) -> Result<()>
273 where
274 F: FnMut(&Path, u64) -> bool, {
276 let walker = WalkBuilder::new(&self.root)
277 .hidden(false)
278 .git_ignore(self.filter.config.use_gitignore)
279 .git_global(self.filter.config.use_gitignore)
280 .git_exclude(self.filter.config.use_gitignore)
281 .ignore(self.filter.config.use_ignore_files)
282 .build();
283
284 for entry in walker.filter_map(|e| e.ok()) {
285 let path = entry.path();
286
287 if path.is_dir() {
288 continue;
289 }
290
291 let metadata = match std::fs::metadata(path) {
292 Ok(m) => m,
293 Err(_) => continue,
294 };
295
296 if self.filter.should_index(path, metadata.len()) {
297 if !callback(path, metadata.len()) {
298 break;
299 }
300 }
301 }
302
303 Ok(())
304 }
305}
306
307#[allow(dead_code)]
309pub fn scan_workspace(root: &Path, config: &IgnoreConfig) -> Result<WorkspaceScan> {
310 let filter = FileFilter::for_workspace(root, config.clone())?;
311
312 let walker = WalkBuilder::new(root)
313 .hidden(false)
314 .git_ignore(config.use_gitignore)
315 .git_global(config.use_gitignore)
316 .git_exclude(config.use_gitignore)
317 .ignore(config.use_ignore_files)
318 .build();
319
320 let mut scan = WorkspaceScan {
321 file_count: 0,
322 total_bytes: 0,
323 by_extension: std::collections::HashMap::new(),
324 };
325
326 for entry in walker.filter_map(|e| e.ok()) {
327 let path = entry.path();
328
329 if path.is_dir() {
330 continue;
331 }
332
333 let metadata = match std::fs::metadata(path) {
334 Ok(m) => m,
335 Err(_) => continue,
336 };
337
338 if filter.should_index(path, metadata.len()) {
339 scan.file_count += 1;
340 scan.total_bytes += metadata.len();
341
342 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
343 *scan.by_extension.entry(ext.to_lowercase()).or_insert(0) += 1;
344 }
345 }
346 }
347
348 Ok(scan)
349}
350
351#[derive(Debug)]
353#[allow(dead_code)]
354pub struct WorkspaceScan {
355 pub file_count: usize,
357 pub total_bytes: u64,
359 pub by_extension: std::collections::HashMap<String, usize>,
361}
362
363#[cfg(test)]
364mod tests {
365 use super::*;
366
367 #[test]
368 fn test_file_filter_extension() {
369 let config = IgnoreConfig::default();
370 let filter = FileFilter::new(config);
371
372 assert!(!filter.should_index(Path::new("test.exe"), 100));
374 assert!(!filter.should_index(Path::new("test.dll"), 100));
375
376 assert!(filter.should_index(Path::new("test.rs"), 100));
378 assert!(filter.should_index(Path::new("test.py"), 100));
379 }
380
381 #[test]
382 fn test_file_filter_size() {
383 let config = IgnoreConfig {
384 max_file_size: 1000,
385 ..Default::default()
386 };
387 let filter = FileFilter::new(config);
388
389 assert!(filter.should_index(Path::new("small.rs"), 500));
390 assert!(!filter.should_index(Path::new("large.rs"), 2000));
391 }
392
393 #[test]
394 fn test_file_filter_directory() {
395 let config = IgnoreConfig::default();
396 let filter = FileFilter::new(config);
397
398 assert!(!filter.should_index(Path::new("node_modules/package/index.js"), 100));
399 assert!(!filter.should_index(Path::new("target/debug/main.rs"), 100));
400 assert!(filter.should_index(Path::new("src/main.rs"), 100));
401 }
402
403 #[test]
404 fn test_file_filter_excluded_paths() {
405 let config = IgnoreConfig {
406 excluded_files: vec![PathBuf::from("src/generated.rs")],
407 excluded_directories: vec![PathBuf::from("vendor/libs")],
408 ..Default::default()
409 };
410 let filter = FileFilter::for_workspace(Path::new("/project"), config).unwrap();
411
412 assert!(!filter.should_index(Path::new("/project/src/generated.rs"), 100));
414
415 assert!(filter.should_index(Path::new("/project/src/main.rs"), 100));
417
418 assert!(!filter.should_index(Path::new("/project/vendor/libs/helper.rs"), 100));
420
421 assert!(!filter.should_traverse(Path::new("/project/vendor/libs")));
423
424 assert!(filter.should_traverse(Path::new("/project/src")));
426 }
427
428 #[test]
429 fn test_file_filter_aurora_excluded_by_default() {
430 let config = IgnoreConfig::default();
431 let filter = FileFilter::for_workspace(Path::new("/project"), config).unwrap();
432
433 assert!(!filter.should_traverse(Path::new("/project/.aurora")));
435 assert!(!filter.should_index(Path::new("/project/.aurora/index.bin"), 100));
436 }
437}