aurora_semantic/ignore/
mod.rs1use std::path::{Path, PathBuf};
12
13use ignore::gitignore::{Gitignore, GitignoreBuilder};
14use ignore::WalkBuilder;
15
16use crate::config::IgnoreConfig;
17use crate::error::Result;
18
19pub struct FileFilter {
21 config: IgnoreConfig,
22 gitignore: Option<Gitignore>,
23 custom_patterns: Vec<glob::Pattern>,
24}
25
26impl FileFilter {
27 pub fn new(config: IgnoreConfig) -> Self {
29 Self {
30 config,
31 gitignore: None,
32 custom_patterns: Vec::new(),
33 }
34 }
35
36 pub fn for_workspace(root: &Path, config: IgnoreConfig) -> Result<Self> {
38 let mut filter = Self::new(config);
39 filter.load_gitignore(root)?;
40 filter.compile_patterns()?;
41 Ok(filter)
42 }
43
44 fn load_gitignore(&mut self, root: &Path) -> Result<()> {
46 if !self.config.use_gitignore {
47 return Ok(());
48 }
49
50 let gitignore_path = root.join(".gitignore");
51 if gitignore_path.exists() {
52 let mut builder = GitignoreBuilder::new(root);
53 builder.add(&gitignore_path);
54
55 if let Some(home) = dirs::home_dir() {
57 let global_gitignore = home.join(".gitignore_global");
58 if global_gitignore.exists() {
59 builder.add(&global_gitignore);
60 }
61 }
62
63 self.gitignore = builder.build().ok();
64 }
65
66 Ok(())
67 }
68
69 fn compile_patterns(&mut self) -> Result<()> {
71 for pattern in &self.config.patterns {
72 if let Ok(compiled) = glob::Pattern::new(pattern) {
73 self.custom_patterns.push(compiled);
74 }
75 }
76 Ok(())
77 }
78
79 pub fn should_index(&self, path: &Path, file_size: u64) -> bool {
81 if file_size > self.config.max_file_size {
83 return false;
84 }
85
86 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
88 let ext_lower = ext.to_lowercase();
89 if self.config.ignored_extensions.iter().any(|e| e == &ext_lower) {
90 return false;
91 }
92 }
93
94 for component in path.components() {
96 if let Some(name) = component.as_os_str().to_str() {
97 if self.config.ignored_directories.iter().any(|d| d == name) {
98 return false;
99 }
100 }
101 }
102
103 if let Some(ref gitignore) = self.gitignore {
105 if gitignore.matched(path, path.is_dir()).is_ignore() {
106 return false;
107 }
108 }
109
110 let path_str = path.to_string_lossy();
112 for pattern in &self.custom_patterns {
113 if pattern.matches(&path_str) {
114 return false;
115 }
116 }
117
118 true
119 }
120
121 pub fn should_traverse(&self, path: &Path) -> bool {
123 if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
125 if self.config.ignored_directories.iter().any(|d| d == name) {
126 return false;
127 }
128 }
129
130 if let Some(ref gitignore) = self.gitignore {
132 if gitignore.matched(path, true).is_ignore() {
133 return false;
134 }
135 }
136
137 true
138 }
139}
140
141pub struct FileWalker {
143 filter: FileFilter,
144 root: PathBuf,
145}
146
147impl FileWalker {
148 pub fn new(root: PathBuf, config: IgnoreConfig) -> Result<Self> {
150 let filter = FileFilter::for_workspace(&root, config)?;
151 Ok(Self { filter, root })
152 }
153
154 pub fn walk(&self) -> Result<Vec<PathBuf>> {
156 let mut files = Vec::new();
157
158 let walker = WalkBuilder::new(&self.root)
159 .hidden(false) .git_ignore(self.filter.config.use_gitignore)
161 .git_global(self.filter.config.use_gitignore)
162 .git_exclude(self.filter.config.use_gitignore)
163 .ignore(self.filter.config.use_ignore_files)
164 .build();
165
166 for entry in walker.filter_map(|e| e.ok()) {
167 let path = entry.path();
168
169 if path.is_dir() {
171 continue;
172 }
173
174 let metadata = match std::fs::metadata(path) {
176 Ok(m) => m,
177 Err(_) => continue,
178 };
179
180 if self.filter.should_index(path, metadata.len()) {
182 files.push(path.to_path_buf());
183 }
184 }
185
186 Ok(files)
187 }
188
189 pub fn walk_with_callback<F>(&self, mut callback: F) -> Result<()>
191 where
192 F: FnMut(&Path, u64) -> bool, {
194 let walker = WalkBuilder::new(&self.root)
195 .hidden(false)
196 .git_ignore(self.filter.config.use_gitignore)
197 .git_global(self.filter.config.use_gitignore)
198 .git_exclude(self.filter.config.use_gitignore)
199 .ignore(self.filter.config.use_ignore_files)
200 .build();
201
202 for entry in walker.filter_map(|e| e.ok()) {
203 let path = entry.path();
204
205 if path.is_dir() {
206 continue;
207 }
208
209 let metadata = match std::fs::metadata(path) {
210 Ok(m) => m,
211 Err(_) => continue,
212 };
213
214 if self.filter.should_index(path, metadata.len()) {
215 if !callback(path, metadata.len()) {
216 break;
217 }
218 }
219 }
220
221 Ok(())
222 }
223}
224
225#[allow(dead_code)]
227pub fn scan_workspace(root: &Path, config: &IgnoreConfig) -> Result<WorkspaceScan> {
228 let filter = FileFilter::for_workspace(root, config.clone())?;
229
230 let walker = WalkBuilder::new(root)
231 .hidden(false)
232 .git_ignore(config.use_gitignore)
233 .git_global(config.use_gitignore)
234 .git_exclude(config.use_gitignore)
235 .ignore(config.use_ignore_files)
236 .build();
237
238 let mut scan = WorkspaceScan {
239 file_count: 0,
240 total_bytes: 0,
241 by_extension: std::collections::HashMap::new(),
242 };
243
244 for entry in walker.filter_map(|e| e.ok()) {
245 let path = entry.path();
246
247 if path.is_dir() {
248 continue;
249 }
250
251 let metadata = match std::fs::metadata(path) {
252 Ok(m) => m,
253 Err(_) => continue,
254 };
255
256 if filter.should_index(path, metadata.len()) {
257 scan.file_count += 1;
258 scan.total_bytes += metadata.len();
259
260 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
261 *scan.by_extension.entry(ext.to_lowercase()).or_insert(0) += 1;
262 }
263 }
264 }
265
266 Ok(scan)
267}
268
269#[derive(Debug)]
271#[allow(dead_code)]
272pub struct WorkspaceScan {
273 pub file_count: usize,
275 pub total_bytes: u64,
277 pub by_extension: std::collections::HashMap<String, usize>,
279}
280
281#[cfg(test)]
282mod tests {
283 use super::*;
284
285 #[test]
286 fn test_file_filter_extension() {
287 let config = IgnoreConfig::default();
288 let filter = FileFilter::new(config);
289
290 assert!(!filter.should_index(Path::new("test.exe"), 100));
292 assert!(!filter.should_index(Path::new("test.dll"), 100));
293
294 assert!(filter.should_index(Path::new("test.rs"), 100));
296 assert!(filter.should_index(Path::new("test.py"), 100));
297 }
298
299 #[test]
300 fn test_file_filter_size() {
301 let config = IgnoreConfig {
302 max_file_size: 1000,
303 ..Default::default()
304 };
305 let filter = FileFilter::new(config);
306
307 assert!(filter.should_index(Path::new("small.rs"), 500));
308 assert!(!filter.should_index(Path::new("large.rs"), 2000));
309 }
310
311 #[test]
312 fn test_file_filter_directory() {
313 let config = IgnoreConfig::default();
314 let filter = FileFilter::new(config);
315
316 assert!(!filter.should_index(Path::new("node_modules/package/index.js"), 100));
317 assert!(!filter.should_index(Path::new("target/debug/main.rs"), 100));
318 assert!(filter.should_index(Path::new("src/main.rs"), 100));
319 }
320}