1use crate::gitignore::GitignoreMatcher;
2use crate::glob::{GlobMatcher, GlobOptions};
3use anyhow::Result;
4use std::collections::HashMap;
5use std::path::{Path, PathBuf};
6
7#[derive(Debug, Clone, PartialEq)]
9pub enum MatchResult {
10 Include,
12 Exclude,
14 Ignore,
16 NoMatch,
18}
19
20impl MatchResult {
21 pub fn should_process(&self) -> bool {
23 matches!(self, MatchResult::Include | MatchResult::NoMatch)
24 }
25
26 pub fn should_skip(&self) -> bool {
28 matches!(self, MatchResult::Exclude | MatchResult::Ignore)
29 }
30}
31
32#[derive(Debug, Clone)]
34pub struct MatcherOptions {
35 pub respect_gitignore: bool,
37 pub case_sensitive: bool,
39 pub include_hidden: bool,
41 pub custom_gitignore_files: Vec<PathBuf>,
43 pub override_patterns: Vec<String>,
45}
46
47impl Default for MatcherOptions {
48 fn default() -> Self {
49 Self {
50 respect_gitignore: true,
51 case_sensitive: true,
52 include_hidden: false,
53 custom_gitignore_files: Vec::new(),
54 override_patterns: Vec::new(),
55 }
56 }
57}
58
59#[derive(Debug)]
61pub struct PatternMatcher {
62 include_matcher: Option<GlobMatcher>,
64 exclude_matcher: Option<GlobMatcher>,
66 gitignore_matcher: Option<GitignoreMatcher>,
68 options: MatcherOptions,
70 cache: HashMap<PathBuf, MatchResult>,
72 cache_hits: u64,
74 cache_misses: u64,
76}
77
78impl PatternMatcher {
79 pub fn new(options: MatcherOptions) -> Self {
81 Self {
82 include_matcher: None,
83 exclude_matcher: None,
84 gitignore_matcher: None,
85 options,
86 cache: HashMap::new(),
87 cache_hits: 0,
88 cache_misses: 0,
89 }
90 }
91
92 pub fn with_includes<I, S>(mut self, patterns: I) -> Result<Self>
94 where
95 I: IntoIterator<Item = S>,
96 S: AsRef<str>,
97 {
98 let glob_options = GlobOptions {
99 case_sensitive: self.options.case_sensitive,
100 ..Default::default()
101 };
102
103 let mut matcher = GlobMatcher::with_options(glob_options);
104 for pattern in patterns {
105 matcher.add_pattern(pattern.as_ref())?;
106 }
107
108 if !matcher.is_empty() {
109 matcher.recompile()?;
110 self.include_matcher = Some(matcher);
111 }
112
113 Ok(self)
114 }
115
116 pub fn with_excludes<I, S>(mut self, patterns: I) -> Result<Self>
118 where
119 I: IntoIterator<Item = S>,
120 S: AsRef<str>,
121 {
122 let glob_options = GlobOptions {
123 case_sensitive: self.options.case_sensitive,
124 ..Default::default()
125 };
126
127 let mut matcher = GlobMatcher::with_options(glob_options);
128 for pattern in patterns {
129 matcher.add_pattern(pattern.as_ref())?;
130 }
131
132 if !matcher.is_empty() {
133 matcher.recompile()?;
134 self.exclude_matcher = Some(matcher);
135 }
136
137 Ok(self)
138 }
139
140 pub fn with_gitignore<P: AsRef<Path>>(mut self, base_path: P) -> Result<Self> {
142 if self.options.respect_gitignore {
143 let mut matcher = if self.options.case_sensitive {
144 GitignoreMatcher::new()
145 } else {
146 GitignoreMatcher::case_insensitive()
147 };
148
149 let gitignore_files = GitignoreMatcher::discover_gitignore_files(base_path.as_ref())?;
151 matcher.add_gitignore_files(gitignore_files)?;
152
153 for path in &self.options.custom_gitignore_files {
155 if path.exists() {
156 matcher.add_gitignore_file(path)?;
157 }
158 }
159
160 for pattern in &self.options.override_patterns {
162 matcher.add_pattern(pattern)?;
163 }
164
165 self.gitignore_matcher = Some(matcher);
166 }
167
168 Ok(self)
169 }
170
171 pub fn is_match<P: AsRef<Path>>(&mut self, path: P) -> Result<MatchResult> {
173 let path = path.as_ref();
174 let canonical_path = path.to_path_buf();
175
176 if let Some(cached_result) = self.cache.get(&canonical_path) {
178 self.cache_hits += 1;
179 return Ok(cached_result.clone());
180 }
181
182 self.cache_misses += 1;
183 let result = self.compute_match(path)?;
184
185 if self.cache.len() < 10000 {
187 self.cache.insert(canonical_path, result.clone());
189 }
190
191 Ok(result)
192 }
193
194 fn compute_match(&mut self, path: &Path) -> Result<MatchResult> {
196 if !self.options.include_hidden {
198 if let Some(name) = path.file_name() {
199 if let Some(name_str) = name.to_str() {
200 if name_str.starts_with('.') && name_str != ".." && name_str != "." {
201 return Ok(MatchResult::Exclude);
202 }
203 }
204 }
205 }
206
207 if let Some(ref mut gitignore_matcher) = self.gitignore_matcher {
215 if gitignore_matcher.is_ignored(path)? {
216 return Ok(MatchResult::Ignore);
217 }
218 }
219
220 if let Some(ref mut exclude_matcher) = self.exclude_matcher {
222 if exclude_matcher.matches(path)? {
223 return Ok(MatchResult::Exclude);
224 }
225 }
226
227 if let Some(ref mut include_matcher) = self.include_matcher {
229 if include_matcher.matches(path)? {
230 return Ok(MatchResult::Include);
231 }
232 return Ok(MatchResult::Exclude);
234 }
235
236 Ok(MatchResult::NoMatch)
238 }
239
240 pub fn should_process<P: AsRef<Path>>(&mut self, path: P) -> Result<bool> {
242 Ok(self.is_match(path)?.should_process())
243 }
244
245 pub fn should_skip<P: AsRef<Path>>(&mut self, path: P) -> Result<bool> {
247 Ok(self.is_match(path)?.should_skip())
248 }
249
250 pub fn clear_cache(&mut self) {
252 self.cache.clear();
253 self.cache_hits = 0;
254 self.cache_misses = 0;
255 }
256
257 pub fn cache_stats(&self) -> (u64, u64, f64) {
259 let total = self.cache_hits + self.cache_misses;
260 let hit_rate = if total > 0 {
261 self.cache_hits as f64 / total as f64
262 } else {
263 0.0
264 };
265 (self.cache_hits, self.cache_misses, hit_rate)
266 }
267
268 pub fn is_empty(&self) -> bool {
270 self.include_matcher.as_ref().map_or(true, |m| m.is_empty())
271 && self.exclude_matcher.as_ref().map_or(true, |m| m.is_empty())
272 && self
273 .gitignore_matcher
274 .as_ref()
275 .map_or(true, |m| m.patterns().is_empty())
276 }
277
278 pub fn pattern_count(&self) -> usize {
280 let include_count = self
281 .include_matcher
282 .as_ref()
283 .map_or(0, |m| m.pattern_count());
284 let exclude_count = self
285 .exclude_matcher
286 .as_ref()
287 .map_or(0, |m| m.pattern_count());
288 let gitignore_count = self
289 .gitignore_matcher
290 .as_ref()
291 .map_or(0, |m| m.patterns().len());
292 include_count + exclude_count + gitignore_count
293 }
294
295 pub fn compile(&mut self) -> Result<()> {
297 if let Some(ref mut matcher) = self.include_matcher {
298 matcher.recompile()?;
299 }
300 if let Some(ref mut matcher) = self.exclude_matcher {
301 matcher.recompile()?;
302 }
303 Ok(())
305 }
306}
307
308#[derive(Debug, Default)]
310pub struct PatternMatcherBuilder {
311 include_patterns: Vec<String>,
312 exclude_patterns: Vec<String>,
313 options: MatcherOptions,
314 base_path: Option<PathBuf>,
315}
316
317impl PatternMatcherBuilder {
318 pub fn new() -> Self {
320 Self::default()
321 }
322
323 pub fn includes<I, S>(mut self, patterns: I) -> Self
325 where
326 I: IntoIterator<Item = S>,
327 S: Into<String>,
328 {
329 self.include_patterns
330 .extend(patterns.into_iter().map(|p| p.into()));
331 self
332 }
333
334 pub fn include<S: Into<String>>(mut self, pattern: S) -> Self {
336 self.include_patterns.push(pattern.into());
337 self
338 }
339
340 pub fn excludes<I, S>(mut self, patterns: I) -> Self
342 where
343 I: IntoIterator<Item = S>,
344 S: Into<String>,
345 {
346 self.exclude_patterns
347 .extend(patterns.into_iter().map(|p| p.into()));
348 self
349 }
350
351 pub fn exclude<S: Into<String>>(mut self, pattern: S) -> Self {
353 self.exclude_patterns.push(pattern.into());
354 self
355 }
356
357 pub fn respect_gitignore(mut self, respect: bool) -> Self {
359 self.options.respect_gitignore = respect;
360 self
361 }
362
363 pub fn case_sensitive(mut self, sensitive: bool) -> Self {
365 self.options.case_sensitive = sensitive;
366 self
367 }
368
369 pub fn include_hidden(mut self, include: bool) -> Self {
371 self.options.include_hidden = include;
372 self
373 }
374
375 pub fn custom_gitignore_files<I, P>(mut self, files: I) -> Self
377 where
378 I: IntoIterator<Item = P>,
379 P: Into<PathBuf>,
380 {
381 self.options
382 .custom_gitignore_files
383 .extend(files.into_iter().map(|p| p.into()));
384 self
385 }
386
387 pub fn override_patterns<I, S>(mut self, patterns: I) -> Self
389 where
390 I: IntoIterator<Item = S>,
391 S: Into<String>,
392 {
393 self.options
394 .override_patterns
395 .extend(patterns.into_iter().map(|p| p.into()));
396 self
397 }
398
399 pub fn base_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
401 self.base_path = Some(path.into());
402 self
403 }
404
405 pub fn build(self) -> Result<PatternMatcher> {
407 let mut matcher = PatternMatcher::new(self.options);
408
409 if !self.include_patterns.is_empty() {
411 matcher = matcher.with_includes(self.include_patterns)?;
412 }
413
414 if !self.exclude_patterns.is_empty() {
416 matcher = matcher.with_excludes(self.exclude_patterns)?;
417 }
418
419 if let Some(base_path) = self.base_path {
421 matcher = matcher.with_gitignore(base_path)?;
422 }
423
424 matcher.compile()?;
426
427 Ok(matcher)
428 }
429}
430
431#[cfg(test)]
432mod tests {
433 use super::*;
434 use std::fs;
435 use tempfile::TempDir;
436
437 fn create_test_files(dir: &Path) -> Result<()> {
438 fs::write(dir.join("test.rs"), "// Rust file")?;
440 fs::write(dir.join("test.py"), "# Python file")?;
441 fs::write(dir.join("README.md"), "# Documentation")?;
442 fs::write(dir.join(".hidden"), "hidden file")?;
443
444 let subdir = dir.join("src");
446 fs::create_dir(&subdir)?;
447 fs::write(subdir.join("main.rs"), "fn main() {}")?;
448 fs::write(subdir.join("lib.rs"), "// Library")?;
449
450 fs::write(dir.join(".gitignore"), "*.tmp\ntarget/\n.DS_Store")?;
452
453 fs::write(dir.join("test.tmp"), "temporary file")?;
455 fs::write(dir.join(".DS_Store"), "system file")?;
456
457 Ok(())
458 }
459
460 #[test]
461 fn test_basic_matching() -> Result<()> {
462 let temp_dir = TempDir::new()?;
463 create_test_files(temp_dir.path())?;
464
465 let mut matcher = PatternMatcherBuilder::new()
466 .include("*.rs")
467 .exclude("**/target/**")
468 .base_path(temp_dir.path())
469 .build()?;
470
471 assert!(matcher.should_process("test.rs")?);
473 assert!(matcher.should_process("src/main.rs")?);
474
475 assert!(!matcher.should_process("test.py")?);
477 assert!(!matcher.should_process("README.md")?);
478
479 Ok(())
480 }
481
482 #[test]
483 fn test_gitignore_integration() -> Result<()> {
484 let temp_dir = TempDir::new()?;
485 create_test_files(temp_dir.path())?;
486
487 let mut matcher = PatternMatcherBuilder::new()
488 .respect_gitignore(true)
489 .base_path(temp_dir.path())
490 .build()?;
491
492 assert!(matcher.should_skip("test.tmp")?);
494 assert!(matcher.should_skip(".DS_Store")?);
495
496 assert!(matcher.should_process("test.rs")?);
498 assert!(matcher.should_process("README.md")?);
499
500 Ok(())
501 }
502
503 #[test]
504 fn test_hidden_files() -> Result<()> {
505 let temp_dir = TempDir::new()?;
506 create_test_files(temp_dir.path())?;
507
508 let mut matcher = PatternMatcherBuilder::new().include_hidden(false).build()?;
510
511 assert!(matcher.should_skip(".hidden")?);
512
513 let mut matcher = PatternMatcherBuilder::new().include_hidden(true).build()?;
515
516 assert!(matcher.should_process(".hidden")?);
517
518 Ok(())
519 }
520
521 #[test]
522 fn test_pattern_priority() -> Result<()> {
523 let temp_dir = TempDir::new()?;
524 create_test_files(temp_dir.path())?;
525
526 let mut matcher = PatternMatcherBuilder::new()
527 .include("*.rs")
528 .exclude("**/target/**")
529 .respect_gitignore(true)
530 .base_path(temp_dir.path())
531 .build()?;
532
533 fs::write(temp_dir.path().join("ignored.rs"), "// Ignored Rust file")?;
535 fs::write(temp_dir.path().join(".gitignore"), "ignored.rs")?;
536
537 let mut matcher = PatternMatcherBuilder::new()
539 .include("*.rs")
540 .respect_gitignore(true)
541 .base_path(temp_dir.path())
542 .build()?;
543
544 assert_eq!(matcher.is_match("ignored.rs")?, MatchResult::Ignore);
545
546 Ok(())
547 }
548
549 #[test]
550 fn test_cache_functionality() -> Result<()> {
551 let mut matcher = PatternMatcherBuilder::new().include("*.rs").build()?;
552
553 let _ = matcher.is_match("test.rs")?;
555 let (hits, misses, _) = matcher.cache_stats();
556 assert_eq!(hits, 0);
557 assert_eq!(misses, 1);
558
559 let _ = matcher.is_match("test.rs")?;
561 let (hits, misses, hit_rate) = matcher.cache_stats();
562 assert_eq!(hits, 1);
563 assert_eq!(misses, 1);
564 assert_eq!(hit_rate, 0.5);
565
566 matcher.clear_cache();
568 let (hits, misses, _) = matcher.cache_stats();
569 assert_eq!(hits, 0);
570 assert_eq!(misses, 0);
571
572 Ok(())
573 }
574
575 #[test]
576 fn test_empty_matcher() -> Result<()> {
577 let matcher = PatternMatcherBuilder::new().build()?;
578
579 assert!(matcher.is_empty());
580 assert_eq!(matcher.pattern_count(), 0);
581
582 Ok(())
583 }
584
585 #[test]
586 fn test_case_sensitivity() -> Result<()> {
587 let mut matcher = PatternMatcherBuilder::new()
589 .include("*.RS")
590 .case_sensitive(true)
591 .build()?;
592
593 assert!(!matcher.should_process("test.rs")?);
594 assert!(matcher.should_process("test.RS")?);
595
596 let mut matcher = PatternMatcherBuilder::new()
598 .include("*.RS")
599 .case_sensitive(false)
600 .build()?;
601
602 assert!(matcher.should_process("test.rs")?);
603 assert!(matcher.should_process("test.RS")?);
604
605 Ok(())
606 }
607
608 #[test]
609 fn test_override_patterns() -> Result<()> {
610 let temp_dir = TempDir::new()?;
611 create_test_files(temp_dir.path())?;
612
613 let mut matcher = PatternMatcherBuilder::new()
614 .respect_gitignore(true)
615 .override_patterns(vec!["!*.tmp".to_string()]) .base_path(temp_dir.path())
617 .build()?;
618
619 assert!(matcher.should_process("test.tmp")?);
621
622 Ok(())
623 }
624}