scribe_patterns/
matcher.rs

1use crate::gitignore::GitignoreMatcher;
2use crate::glob::{GlobMatcher, GlobOptions};
3use anyhow::Result;
4use std::collections::HashMap;
5use std::path::{Path, PathBuf};
6
7/// Combined pattern matching result
8#[derive(Debug, Clone, PartialEq)]
9pub enum MatchResult {
10    /// File should be included
11    Include,
12    /// File should be excluded
13    Exclude,
14    /// File should be ignored (gitignore)
15    Ignore,
16    /// No explicit match - use default behavior
17    NoMatch,
18}
19
20impl MatchResult {
21    /// Check if the result indicates the file should be processed
22    pub fn should_process(&self) -> bool {
23        matches!(self, MatchResult::Include | MatchResult::NoMatch)
24    }
25
26    /// Check if the result indicates the file should be skipped
27    pub fn should_skip(&self) -> bool {
28        matches!(self, MatchResult::Exclude | MatchResult::Ignore)
29    }
30}
31
32/// Options for combined pattern matching
33#[derive(Debug, Clone)]
34pub struct MatcherOptions {
35    /// Whether to respect gitignore files
36    pub respect_gitignore: bool,
37    /// Whether pattern matching is case sensitive
38    pub case_sensitive: bool,
39    /// Whether to match hidden files by default
40    pub include_hidden: bool,
41    /// Custom gitignore file paths
42    pub custom_gitignore_files: Vec<PathBuf>,
43    /// Override gitignore patterns (always respected)
44    pub override_patterns: Vec<String>,
45}
46
47impl Default for MatcherOptions {
48    fn default() -> Self {
49        Self {
50            respect_gitignore: true,
51            case_sensitive: true,
52            include_hidden: false,
53            custom_gitignore_files: Vec::new(),
54            override_patterns: Vec::new(),
55        }
56    }
57}
58
59/// Combined pattern matcher that integrates glob and gitignore patterns
60#[derive(Debug)]
61pub struct PatternMatcher {
62    /// Glob patterns for inclusion
63    include_matcher: Option<GlobMatcher>,
64    /// Glob patterns for exclusion
65    exclude_matcher: Option<GlobMatcher>,
66    /// Gitignore pattern matcher
67    gitignore_matcher: Option<GitignoreMatcher>,
68    /// Matcher options
69    options: MatcherOptions,
70    /// Cached results for performance
71    cache: HashMap<PathBuf, MatchResult>,
72    /// Cache hit statistics
73    cache_hits: u64,
74    /// Cache miss statistics
75    cache_misses: u64,
76}
77
78impl PatternMatcher {
79    /// Create a new pattern matcher
80    pub fn new(options: MatcherOptions) -> Self {
81        Self {
82            include_matcher: None,
83            exclude_matcher: None,
84            gitignore_matcher: None,
85            options,
86            cache: HashMap::new(),
87            cache_hits: 0,
88            cache_misses: 0,
89        }
90    }
91
92    /// Create a pattern matcher with include patterns
93    pub fn with_includes<I, S>(mut self, patterns: I) -> Result<Self>
94    where
95        I: IntoIterator<Item = S>,
96        S: AsRef<str>,
97    {
98        let glob_options = GlobOptions {
99            case_sensitive: self.options.case_sensitive,
100            ..Default::default()
101        };
102
103        let mut matcher = GlobMatcher::with_options(glob_options);
104        for pattern in patterns {
105            matcher.add_pattern(pattern.as_ref())?;
106        }
107
108        if !matcher.is_empty() {
109            matcher.recompile()?;
110            self.include_matcher = Some(matcher);
111        }
112
113        Ok(self)
114    }
115
116    /// Create a pattern matcher with exclude patterns
117    pub fn with_excludes<I, S>(mut self, patterns: I) -> Result<Self>
118    where
119        I: IntoIterator<Item = S>,
120        S: AsRef<str>,
121    {
122        let glob_options = GlobOptions {
123            case_sensitive: self.options.case_sensitive,
124            ..Default::default()
125        };
126
127        let mut matcher = GlobMatcher::with_options(glob_options);
128        for pattern in patterns {
129            matcher.add_pattern(pattern.as_ref())?;
130        }
131
132        if !matcher.is_empty() {
133            matcher.recompile()?;
134            self.exclude_matcher = Some(matcher);
135        }
136
137        Ok(self)
138    }
139
140    /// Create a pattern matcher with gitignore support
141    pub fn with_gitignore<P: AsRef<Path>>(mut self, base_path: P) -> Result<Self> {
142        if self.options.respect_gitignore {
143            let mut matcher = if self.options.case_sensitive {
144                GitignoreMatcher::new()
145            } else {
146                GitignoreMatcher::case_insensitive()
147            };
148
149            // Load standard gitignore files from the directory tree
150            let gitignore_files = GitignoreMatcher::discover_gitignore_files(base_path.as_ref())?;
151            matcher.add_gitignore_files(gitignore_files)?;
152
153            // Load custom gitignore files
154            for path in &self.options.custom_gitignore_files {
155                if path.exists() {
156                    matcher.add_gitignore_file(path)?;
157                }
158            }
159
160            // Add override patterns as regular patterns (they will take precedence due to order)
161            for pattern in &self.options.override_patterns {
162                matcher.add_pattern(pattern)?;
163            }
164
165            self.gitignore_matcher = Some(matcher);
166        }
167
168        Ok(self)
169    }
170
171    /// Check if a path matches the patterns
172    pub fn is_match<P: AsRef<Path>>(&mut self, path: P) -> Result<MatchResult> {
173        let path = path.as_ref();
174        let canonical_path = path.to_path_buf();
175
176        // Check cache first
177        if let Some(cached_result) = self.cache.get(&canonical_path) {
178            self.cache_hits += 1;
179            return Ok(cached_result.clone());
180        }
181
182        self.cache_misses += 1;
183        let result = self.compute_match(path)?;
184
185        // Cache the result
186        if self.cache.len() < 10000 {
187            // Prevent unbounded cache growth
188            self.cache.insert(canonical_path, result.clone());
189        }
190
191        Ok(result)
192    }
193
194    /// Compute the match result for a path
195    fn compute_match(&mut self, path: &Path) -> Result<MatchResult> {
196        // Check if it's a hidden file and we're not including hidden files
197        if !self.options.include_hidden {
198            if let Some(name) = path.file_name() {
199                if let Some(name_str) = name.to_str() {
200                    if name_str.starts_with('.') && name_str != ".." && name_str != "." {
201                        return Ok(MatchResult::Exclude);
202                    }
203                }
204            }
205        }
206
207        // Priority order:
208        // 1. Gitignore patterns (if enabled) - can exclude
209        // 2. Explicit exclude patterns - can exclude
210        // 3. Explicit include patterns - can include
211        // 4. Default behavior based on options
212
213        // Check gitignore first (highest priority)
214        if let Some(ref mut gitignore_matcher) = self.gitignore_matcher {
215            if gitignore_matcher.is_ignored(path)? {
216                return Ok(MatchResult::Ignore);
217            }
218        }
219
220        // Check explicit exclude patterns
221        if let Some(ref mut exclude_matcher) = self.exclude_matcher {
222            if exclude_matcher.matches(path)? {
223                return Ok(MatchResult::Exclude);
224            }
225        }
226
227        // Check explicit include patterns
228        if let Some(ref mut include_matcher) = self.include_matcher {
229            if include_matcher.matches(path)? {
230                return Ok(MatchResult::Include);
231            }
232            // If we have include patterns but no match, exclude by default
233            return Ok(MatchResult::Exclude);
234        }
235
236        // No explicit patterns matched
237        Ok(MatchResult::NoMatch)
238    }
239
240    /// Check if a path should be processed (not excluded or ignored)
241    pub fn should_process<P: AsRef<Path>>(&mut self, path: P) -> Result<bool> {
242        Ok(self.is_match(path)?.should_process())
243    }
244
245    /// Check if a path should be skipped (excluded or ignored)
246    pub fn should_skip<P: AsRef<Path>>(&mut self, path: P) -> Result<bool> {
247        Ok(self.is_match(path)?.should_skip())
248    }
249
250    /// Clear the internal cache
251    pub fn clear_cache(&mut self) {
252        self.cache.clear();
253        self.cache_hits = 0;
254        self.cache_misses = 0;
255    }
256
257    /// Get cache statistics
258    pub fn cache_stats(&self) -> (u64, u64, f64) {
259        let total = self.cache_hits + self.cache_misses;
260        let hit_rate = if total > 0 {
261            self.cache_hits as f64 / total as f64
262        } else {
263            0.0
264        };
265        (self.cache_hits, self.cache_misses, hit_rate)
266    }
267
268    /// Check if the matcher has any patterns
269    pub fn is_empty(&self) -> bool {
270        self.include_matcher.as_ref().map_or(true, |m| m.is_empty())
271            && self.exclude_matcher.as_ref().map_or(true, |m| m.is_empty())
272            && self
273                .gitignore_matcher
274                .as_ref()
275                .map_or(true, |m| m.patterns().is_empty())
276    }
277
278    /// Get the number of patterns
279    pub fn pattern_count(&self) -> usize {
280        let include_count = self
281            .include_matcher
282            .as_ref()
283            .map_or(0, |m| m.pattern_count());
284        let exclude_count = self
285            .exclude_matcher
286            .as_ref()
287            .map_or(0, |m| m.pattern_count());
288        let gitignore_count = self
289            .gitignore_matcher
290            .as_ref()
291            .map_or(0, |m| m.patterns().len());
292        include_count + exclude_count + gitignore_count
293    }
294
295    /// Compile all patterns for optimal performance
296    pub fn compile(&mut self) -> Result<()> {
297        if let Some(ref mut matcher) = self.include_matcher {
298            matcher.recompile()?;
299        }
300        if let Some(ref mut matcher) = self.exclude_matcher {
301            matcher.recompile()?;
302        }
303        // Gitignore matcher compiles automatically when patterns are added
304        Ok(())
305    }
306}
307
308/// Builder for creating pattern matchers with a fluent API
309#[derive(Debug, Default)]
310pub struct PatternMatcherBuilder {
311    include_patterns: Vec<String>,
312    exclude_patterns: Vec<String>,
313    options: MatcherOptions,
314    base_path: Option<PathBuf>,
315}
316
317impl PatternMatcherBuilder {
318    /// Create a new builder
319    pub fn new() -> Self {
320        Self::default()
321    }
322
323    /// Add include patterns
324    pub fn includes<I, S>(mut self, patterns: I) -> Self
325    where
326        I: IntoIterator<Item = S>,
327        S: Into<String>,
328    {
329        self.include_patterns
330            .extend(patterns.into_iter().map(|p| p.into()));
331        self
332    }
333
334    /// Add a single include pattern
335    pub fn include<S: Into<String>>(mut self, pattern: S) -> Self {
336        self.include_patterns.push(pattern.into());
337        self
338    }
339
340    /// Add exclude patterns
341    pub fn excludes<I, S>(mut self, patterns: I) -> Self
342    where
343        I: IntoIterator<Item = S>,
344        S: Into<String>,
345    {
346        self.exclude_patterns
347            .extend(patterns.into_iter().map(|p| p.into()));
348        self
349    }
350
351    /// Add a single exclude pattern
352    pub fn exclude<S: Into<String>>(mut self, pattern: S) -> Self {
353        self.exclude_patterns.push(pattern.into());
354        self
355    }
356
357    /// Set whether to respect gitignore files
358    pub fn respect_gitignore(mut self, respect: bool) -> Self {
359        self.options.respect_gitignore = respect;
360        self
361    }
362
363    /// Set case sensitivity
364    pub fn case_sensitive(mut self, sensitive: bool) -> Self {
365        self.options.case_sensitive = sensitive;
366        self
367    }
368
369    /// Set whether to include hidden files
370    pub fn include_hidden(mut self, include: bool) -> Self {
371        self.options.include_hidden = include;
372        self
373    }
374
375    /// Add custom gitignore files
376    pub fn custom_gitignore_files<I, P>(mut self, files: I) -> Self
377    where
378        I: IntoIterator<Item = P>,
379        P: Into<PathBuf>,
380    {
381        self.options
382            .custom_gitignore_files
383            .extend(files.into_iter().map(|p| p.into()));
384        self
385    }
386
387    /// Add override patterns
388    pub fn override_patterns<I, S>(mut self, patterns: I) -> Self
389    where
390        I: IntoIterator<Item = S>,
391        S: Into<String>,
392    {
393        self.options
394            .override_patterns
395            .extend(patterns.into_iter().map(|p| p.into()));
396        self
397    }
398
399    /// Set the base path for gitignore resolution
400    pub fn base_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
401        self.base_path = Some(path.into());
402        self
403    }
404
405    /// Build the pattern matcher
406    pub fn build(self) -> Result<PatternMatcher> {
407        let mut matcher = PatternMatcher::new(self.options);
408
409        // Add include patterns
410        if !self.include_patterns.is_empty() {
411            matcher = matcher.with_includes(self.include_patterns)?;
412        }
413
414        // Add exclude patterns
415        if !self.exclude_patterns.is_empty() {
416            matcher = matcher.with_excludes(self.exclude_patterns)?;
417        }
418
419        // Set up gitignore if base path is provided
420        if let Some(base_path) = self.base_path {
421            matcher = matcher.with_gitignore(base_path)?;
422        }
423
424        // Compile patterns for optimal performance
425        matcher.compile()?;
426
427        Ok(matcher)
428    }
429}
430
431#[cfg(test)]
432mod tests {
433    use super::*;
434    use std::fs;
435    use tempfile::TempDir;
436
437    fn create_test_files(dir: &Path) -> Result<()> {
438        // Create various test files
439        fs::write(dir.join("test.rs"), "// Rust file")?;
440        fs::write(dir.join("test.py"), "# Python file")?;
441        fs::write(dir.join("README.md"), "# Documentation")?;
442        fs::write(dir.join(".hidden"), "hidden file")?;
443
444        // Create subdirectory
445        let subdir = dir.join("src");
446        fs::create_dir(&subdir)?;
447        fs::write(subdir.join("main.rs"), "fn main() {}")?;
448        fs::write(subdir.join("lib.rs"), "// Library")?;
449
450        // Create .gitignore
451        fs::write(dir.join(".gitignore"), "*.tmp\ntarget/\n.DS_Store")?;
452
453        // Create ignored files
454        fs::write(dir.join("test.tmp"), "temporary file")?;
455        fs::write(dir.join(".DS_Store"), "system file")?;
456
457        Ok(())
458    }
459
460    #[test]
461    fn test_basic_matching() -> Result<()> {
462        let temp_dir = TempDir::new()?;
463        create_test_files(temp_dir.path())?;
464
465        let mut matcher = PatternMatcherBuilder::new()
466            .include("*.rs")
467            .exclude("**/target/**")
468            .base_path(temp_dir.path())
469            .build()?;
470
471        // Should match Rust files
472        assert!(matcher.should_process("test.rs")?);
473        assert!(matcher.should_process("src/main.rs")?);
474
475        // Should not match other files
476        assert!(!matcher.should_process("test.py")?);
477        assert!(!matcher.should_process("README.md")?);
478
479        Ok(())
480    }
481
482    #[test]
483    fn test_gitignore_integration() -> Result<()> {
484        let temp_dir = TempDir::new()?;
485        create_test_files(temp_dir.path())?;
486
487        let mut matcher = PatternMatcherBuilder::new()
488            .respect_gitignore(true)
489            .base_path(temp_dir.path())
490            .build()?;
491
492        // Should ignore files matching gitignore
493        assert!(matcher.should_skip("test.tmp")?);
494        assert!(matcher.should_skip(".DS_Store")?);
495
496        // Should not ignore regular files
497        assert!(matcher.should_process("test.rs")?);
498        assert!(matcher.should_process("README.md")?);
499
500        Ok(())
501    }
502
503    #[test]
504    fn test_hidden_files() -> Result<()> {
505        let temp_dir = TempDir::new()?;
506        create_test_files(temp_dir.path())?;
507
508        // Without include_hidden
509        let mut matcher = PatternMatcherBuilder::new().include_hidden(false).build()?;
510
511        assert!(matcher.should_skip(".hidden")?);
512
513        // With include_hidden
514        let mut matcher = PatternMatcherBuilder::new().include_hidden(true).build()?;
515
516        assert!(matcher.should_process(".hidden")?);
517
518        Ok(())
519    }
520
521    #[test]
522    fn test_pattern_priority() -> Result<()> {
523        let temp_dir = TempDir::new()?;
524        create_test_files(temp_dir.path())?;
525
526        let mut matcher = PatternMatcherBuilder::new()
527            .include("*.rs")
528            .exclude("**/target/**")
529            .respect_gitignore(true)
530            .base_path(temp_dir.path())
531            .build()?;
532
533        // Gitignore should take priority over include patterns
534        fs::write(temp_dir.path().join("ignored.rs"), "// Ignored Rust file")?;
535        fs::write(temp_dir.path().join(".gitignore"), "ignored.rs")?;
536
537        // Rebuild matcher to pick up new gitignore
538        let mut matcher = PatternMatcherBuilder::new()
539            .include("*.rs")
540            .respect_gitignore(true)
541            .base_path(temp_dir.path())
542            .build()?;
543
544        assert_eq!(matcher.is_match("ignored.rs")?, MatchResult::Ignore);
545
546        Ok(())
547    }
548
549    #[test]
550    fn test_cache_functionality() -> Result<()> {
551        let mut matcher = PatternMatcherBuilder::new().include("*.rs").build()?;
552
553        // First call should be a cache miss
554        let _ = matcher.is_match("test.rs")?;
555        let (hits, misses, _) = matcher.cache_stats();
556        assert_eq!(hits, 0);
557        assert_eq!(misses, 1);
558
559        // Second call should be a cache hit
560        let _ = matcher.is_match("test.rs")?;
561        let (hits, misses, hit_rate) = matcher.cache_stats();
562        assert_eq!(hits, 1);
563        assert_eq!(misses, 1);
564        assert_eq!(hit_rate, 0.5);
565
566        // Clear cache
567        matcher.clear_cache();
568        let (hits, misses, _) = matcher.cache_stats();
569        assert_eq!(hits, 0);
570        assert_eq!(misses, 0);
571
572        Ok(())
573    }
574
575    #[test]
576    fn test_empty_matcher() -> Result<()> {
577        let matcher = PatternMatcherBuilder::new().build()?;
578
579        assert!(matcher.is_empty());
580        assert_eq!(matcher.pattern_count(), 0);
581
582        Ok(())
583    }
584
585    #[test]
586    fn test_case_sensitivity() -> Result<()> {
587        // Case sensitive
588        let mut matcher = PatternMatcherBuilder::new()
589            .include("*.RS")
590            .case_sensitive(true)
591            .build()?;
592
593        assert!(!matcher.should_process("test.rs")?);
594        assert!(matcher.should_process("test.RS")?);
595
596        // Case insensitive
597        let mut matcher = PatternMatcherBuilder::new()
598            .include("*.RS")
599            .case_sensitive(false)
600            .build()?;
601
602        assert!(matcher.should_process("test.rs")?);
603        assert!(matcher.should_process("test.RS")?);
604
605        Ok(())
606    }
607
608    #[test]
609    fn test_override_patterns() -> Result<()> {
610        let temp_dir = TempDir::new()?;
611        create_test_files(temp_dir.path())?;
612
613        let mut matcher = PatternMatcherBuilder::new()
614            .respect_gitignore(true)
615            .override_patterns(vec!["!*.tmp".to_string()]) // Override gitignore
616            .base_path(temp_dir.path())
617            .build()?;
618
619        // Should not ignore .tmp files due to override
620        assert!(matcher.should_process("test.tmp")?);
621
622        Ok(())
623    }
624}