1use std::path::Path;
6
7use crate::WalkerFs;
8use crate::glob::glob_match;
9
10const MAX_GITIGNORE_SIZE: usize = 1_048_576;
14
15#[derive(Debug, Clone)]
17struct IgnoreRule {
18 pattern: String,
20 negated: bool,
22 dir_only: bool,
24 anchored: bool,
26}
27
28impl IgnoreRule {
29 fn parse(line: &str) -> Option<Self> {
30 let line = line.trim();
31
32 if line.is_empty() || line.starts_with('#') {
34 return None;
35 }
36
37 let mut pattern = line.to_string();
38 let mut negated = false;
39 let mut dir_only = false;
40
41 if let Some(stripped) = pattern.strip_prefix('!') {
43 negated = true;
44 pattern = stripped.to_string();
45 }
46
47 if let Some(stripped) = pattern.strip_suffix('/') {
49 dir_only = true;
50 pattern = stripped.to_string();
51 }
52
53 let anchored = pattern.contains('/');
55
56 if let Some(stripped) = pattern.strip_prefix('/') {
58 pattern = stripped.to_string();
59 }
60
61 Some(IgnoreRule {
62 pattern,
63 negated,
64 dir_only,
65 anchored,
66 })
67 }
68
69 fn matches(&self, path: &Path, is_dir: bool) -> bool {
70 if self.dir_only && !is_dir {
72 return false;
73 }
74
75 let path_str = path.to_string_lossy();
76
77 if self.anchored {
78 self.glob_match_path(&path_str)
80 } else {
81 if self.glob_match_path(&path_str) {
84 return true;
85 }
86
87 if let Some(name) = path.file_name() {
89 let name_str = name.to_string_lossy();
90 if glob_match(&self.pattern, &name_str) {
91 return true;
92 }
93 }
94
95 false
96 }
97 }
98
99 fn glob_match_path(&self, path: &str) -> bool {
100 if self.pattern.contains("**") {
102 self.match_with_globstar(path)
103 } else {
104 glob_match(&self.pattern, path)
105 }
106 }
107
108 fn match_with_globstar(&self, path: &str) -> bool {
109 let parts: Vec<&str> = self.pattern.split("**").collect();
111
112 if parts.len() == 2 {
113 let prefix = parts[0].trim_end_matches('/');
114 let suffix = parts[1].trim_start_matches('/');
115
116 let remaining = if prefix.is_empty() {
118 path
119 } else if let Some(rest) = path.strip_prefix(prefix) {
120 rest.trim_start_matches('/')
121 } else {
122 return false;
123 };
124
125 if suffix.is_empty() {
127 return true;
128 }
129
130 for (i, _) in remaining.char_indices() {
132 let tail = &remaining[i..];
133 if glob_match(suffix, tail) {
134 return true;
135 }
136 }
137
138 glob_match(suffix, remaining)
140 } else {
141 glob_match(&self.pattern.replace("**", "*"), path)
146 }
147 }
148}
149
150#[derive(Debug, Clone, Default)]
152pub struct IgnoreFilter {
153 rules: Vec<IgnoreRule>,
154}
155
156impl IgnoreFilter {
157 pub fn new() -> Self {
159 Self::default()
160 }
161
162 pub fn with_defaults() -> Self {
164 let mut filter = Self::new();
165
166 filter.add_rule(".git");
168
169 filter.add_rule("node_modules");
171 filter.add_rule("target"); filter.add_rule("__pycache__");
173 filter.add_rule(".venv");
174 filter.add_rule("venv");
175 filter.add_rule("dist");
176 filter.add_rule("build");
177 filter.add_rule(".next"); filter
180 }
181
182 pub async fn from_gitignore(
186 path: &Path,
187 fs: &impl WalkerFs,
188 ) -> Result<Self, crate::WalkerError> {
189 let content = fs.read_file(path).await?;
190 if content.len() > MAX_GITIGNORE_SIZE {
191 return Err(crate::WalkerError::Io(format!(
192 "{}: gitignore too large ({} bytes, max {})",
193 path.display(),
194 content.len(),
195 MAX_GITIGNORE_SIZE,
196 )));
197 }
198 let text = String::from_utf8_lossy(&content);
199
200 let mut filter = Self::new();
201 for line in text.lines() {
202 if let Some(rule) = IgnoreRule::parse(line) {
203 filter.rules.push(rule);
204 }
205 }
206
207 Ok(filter)
208 }
209
210 pub fn add_rule(&mut self, pattern: &str) {
212 if let Some(rule) = IgnoreRule::parse(pattern) {
213 self.rules.push(rule);
214 }
215 }
216
217 pub fn is_ignored(&self, path: &Path, is_dir: bool) -> bool {
222 let mut ignored = false;
223
224 for rule in &self.rules {
225 if rule.matches(path, is_dir) {
226 ignored = !rule.negated;
227 }
228 }
229
230 ignored
231 }
232
233 pub fn is_name_ignored(&self, name: &str, is_dir: bool) -> bool {
237 self.is_ignored(Path::new(name), is_dir)
238 }
239
240 pub fn merge(&mut self, other: &IgnoreFilter) {
245 self.rules.extend(other.rules.iter().cloned());
246 }
247
248 pub fn merged_with(&self, other: &IgnoreFilter) -> IgnoreFilter {
252 let mut merged = self.clone();
253 merged.merge(other);
254 merged
255 }
256}
257
258#[cfg(test)]
259mod tests {
260 use super::*;
261
262 #[test]
263 fn test_simple_patterns() {
264 let mut filter = IgnoreFilter::new();
265 filter.add_rule("*.log");
266 filter.add_rule("temp/");
267
268 assert!(filter.is_ignored(Path::new("app.log"), false));
269 assert!(filter.is_ignored(Path::new("debug.log"), false));
270 assert!(!filter.is_ignored(Path::new("app.txt"), false));
271
272 assert!(filter.is_ignored(Path::new("temp"), true));
273 assert!(!filter.is_ignored(Path::new("temp"), false)); }
275
276 #[test]
277 fn test_negation() {
278 let mut filter = IgnoreFilter::new();
279 filter.add_rule("*.log");
280 filter.add_rule("!important.log");
281
282 assert!(filter.is_ignored(Path::new("debug.log"), false));
283 assert!(!filter.is_ignored(Path::new("important.log"), false));
284 }
285
286 #[test]
287 fn test_anchored_patterns() {
288 let mut filter = IgnoreFilter::new();
289 filter.add_rule("/root.txt");
290 filter.add_rule("anywhere.txt");
291
292 assert!(filter.is_ignored(Path::new("root.txt"), false));
293 assert!(!filter.is_ignored(Path::new("sub/root.txt"), false));
294
295 assert!(filter.is_ignored(Path::new("anywhere.txt"), false));
296 assert!(filter.is_ignored(Path::new("sub/anywhere.txt"), false));
297 }
298
299 #[test]
300 fn test_directory_patterns() {
301 let mut filter = IgnoreFilter::new();
302 filter.add_rule("build/");
303
304 assert!(filter.is_ignored(Path::new("build"), true));
305 assert!(!filter.is_ignored(Path::new("build"), false)); }
307
308 #[test]
309 fn test_globstar() {
310 let mut filter = IgnoreFilter::new();
311 filter.add_rule("**/*.log");
312
313 assert!(filter.is_ignored(Path::new("app.log"), false));
314 assert!(filter.is_ignored(Path::new("logs/app.log"), false));
315 assert!(filter.is_ignored(Path::new("var/logs/app.log"), false));
316 }
317
318 #[test]
319 fn test_defaults() {
320 let filter = IgnoreFilter::with_defaults();
321
322 assert!(filter.is_ignored(Path::new(".git"), true));
323 assert!(filter.is_ignored(Path::new("node_modules"), true));
324 assert!(filter.is_ignored(Path::new("target"), true));
325 assert!(filter.is_ignored(Path::new("__pycache__"), true));
326 }
327
328 #[test]
329 fn test_comments_and_empty() {
330 let mut filter = IgnoreFilter::new();
331 filter.add_rule("# comment");
332 filter.add_rule("");
333 filter.add_rule(" ");
334 filter.add_rule("valid.txt");
335
336 assert_eq!(filter.rules.len(), 1);
337 assert!(filter.is_ignored(Path::new("valid.txt"), false));
338 }
339
340 #[test]
341 fn test_path_patterns() {
342 let mut filter = IgnoreFilter::new();
343 filter.add_rule("logs/*.log");
344
345 assert!(filter.is_ignored(Path::new("logs/app.log"), false));
346 assert!(!filter.is_ignored(Path::new("other/app.log"), false));
347 assert!(!filter.is_ignored(Path::new("app.log"), false));
348 }
349
350 mod async_tests {
351 use super::*;
352 use crate::{WalkerDirEntry, WalkerError, WalkerFs};
353 use std::collections::HashMap;
354 use std::path::PathBuf;
355
356 struct MemEntry;
357 impl WalkerDirEntry for MemEntry {
358 fn name(&self) -> &str { "" }
359 fn is_dir(&self) -> bool { false }
360 fn is_file(&self) -> bool { true }
361 fn is_symlink(&self) -> bool { false }
362 }
363
364 struct SingleFileFs(HashMap<PathBuf, Vec<u8>>);
366
367 #[async_trait::async_trait]
368 impl WalkerFs for SingleFileFs {
369 type DirEntry = MemEntry;
370 async fn list_dir(&self, _: &Path) -> Result<Vec<MemEntry>, WalkerError> {
371 Ok(vec![])
372 }
373 async fn read_file(&self, path: &Path) -> Result<Vec<u8>, WalkerError> {
374 self.0.get(path)
375 .cloned()
376 .ok_or_else(|| WalkerError::NotFound(path.display().to_string()))
377 }
378 async fn is_dir(&self, _: &Path) -> bool { false }
379 async fn exists(&self, path: &Path) -> bool { self.0.contains_key(path) }
380 }
381
382 #[tokio::test]
383 async fn test_oversized_gitignore_rejected() {
384 let oversized = vec![b'#'; super::MAX_GITIGNORE_SIZE + 1];
385 let mut files = HashMap::new();
386 files.insert(PathBuf::from("/.gitignore"), oversized);
387 let fs = SingleFileFs(files);
388
389 let result = IgnoreFilter::from_gitignore(Path::new("/.gitignore"), &fs).await;
390 assert!(result.is_err());
391 let err = result.unwrap_err().to_string();
392 assert!(err.contains("too large"), "expected 'too large' in: {err}");
393 }
394
395 #[tokio::test]
396 async fn test_normal_gitignore_accepted() {
397 let content = b"*.log\n# comment\ntarget/\n".to_vec();
398 let mut files = HashMap::new();
399 files.insert(PathBuf::from("/.gitignore"), content);
400 let fs = SingleFileFs(files);
401
402 let filter = IgnoreFilter::from_gitignore(Path::new("/.gitignore"), &fs)
403 .await
404 .unwrap();
405 assert!(filter.is_ignored(Path::new("app.log"), false));
406 assert!(filter.is_ignored(Path::new("target"), true));
407 }
408 }
409}