scribe_patterns/
lib.rs

1//! # Scribe Patterns
2//! 
3//! Advanced pattern matching and search algorithms for the Scribe library.
4//! This crate provides high-performance pattern matching capabilities including
5//! glob patterns, gitignore integration, and flexible include/exclude logic.
6//!
7//! ## Features
8//!
9//! - **High-Performance Glob Matching**: Using `globset` for efficient pattern compilation
10//! - **Gitignore Integration**: Full gitignore syntax support with proper precedence
11//! - **Include/Exclude Logic**: Complex pattern combinations with comma-separated input
12//! - **Path Normalization**: Cross-platform path handling and matching
13//! - **Pattern Validation**: Comprehensive error handling and pattern validation
14//! - **Caching**: Efficient pattern compilation and matching with caching
15//!
16//! ## Usage
17//!
18//! ```rust
19//! use scribe_patterns::{PatternMatcherBuilder, MatchResult};
20//! use std::path::Path;
21//!
22//! # fn example() -> anyhow::Result<()> {
23//! // Create a combined matcher with glob and gitignore patterns
24//! let mut matcher = PatternMatcherBuilder::new()
25//!     .include("src/**/*.rs")
26//!     .exclude("target/**")
27//!     .respect_gitignore(true)
28//!     .base_path(".")
29//!     .build()?;
30//!
31//! // Test if a path matches
32//! let path = Path::new("src/lib.rs");
33//! if matcher.should_process(path)? {
34//!     println!("Path is included: {}", path.display());
35//! }
36//! # Ok(())
37//! # }
38//! ```
39
40// Core modules
41pub mod glob;
42pub mod gitignore;
43pub mod matcher;
44pub mod validation;
45
46// Re-export main types for convenience
47pub use glob::{GlobMatcher, GlobPattern, GlobOptions, GlobMatchResult};
48pub use gitignore::{GitignoreMatcher, GitignorePattern, GitignoreRule, GitignoreStats};
49pub use matcher::{
50    PatternMatcher, MatchResult, MatcherOptions, PatternMatcherBuilder
51};
52pub use validation::{
53    PatternValidator, ValidationResult, ValidationError, ValidationConfig,
54    PerformanceRisk, PerformanceRiskLevel
55};
56
57use scribe_core::{Result, ScribeError};
58use std::path::Path;
59
60/// Current version of the patterns crate
61pub const VERSION: &str = env!("CARGO_PKG_VERSION");
62
63/// Quick pattern matching utility for simple use cases
64pub struct QuickMatcher {
65    matcher: PatternMatcher,
66}
67
68impl QuickMatcher {
69    /// Create a new quick matcher with include and exclude patterns
70    pub fn new(include_patterns: &[&str], exclude_patterns: &[&str]) -> Result<Self> {
71        let mut builder = PatternMatcherBuilder::new();
72        
73        for pattern in include_patterns {
74            builder = builder.include(*pattern);
75        }
76        
77        for pattern in exclude_patterns {
78            builder = builder.exclude(*pattern);
79        }
80        
81        let matcher = builder.build().map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))?;
82        Ok(Self { matcher })
83    }
84
85    /// Create a quick matcher from comma-separated pattern strings
86    pub fn from_patterns(include_csv: Option<&str>, exclude_csv: Option<&str>) -> Result<Self> {
87        let mut builder = PatternMatcherBuilder::new();
88        
89        if let Some(includes) = include_csv {
90            let patterns = utils::parse_csv_patterns(includes);
91            builder = builder.includes(patterns);
92        }
93        
94        if let Some(excludes) = exclude_csv {
95            let patterns = utils::parse_csv_patterns(excludes);
96            builder = builder.excludes(patterns);
97        }
98        
99        let matcher = builder.build().map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))?;
100        Ok(Self { matcher })
101    }
102
103    /// Test if a path should be included
104    pub fn matches<P: AsRef<Path>>(&mut self, path: P) -> Result<bool> {
105        self.matcher.should_process(path).map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
106    }
107
108    /// Get detailed match information
109    pub fn match_details<P: AsRef<Path>>(&mut self, path: P) -> Result<MatchResult> {
110        self.matcher.is_match(path).map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
111    }
112}
113
114/// Pattern matching builder for fluent API construction
115pub struct PatternBuilder {
116    includes: Vec<String>,
117    excludes: Vec<String>,
118    gitignore_files: Vec<std::path::PathBuf>,
119    case_sensitive: bool,
120}
121
122impl Default for PatternBuilder {
123    fn default() -> Self {
124        Self::new()
125    }
126}
127
128impl PatternBuilder {
129    /// Create a new pattern builder
130    pub fn new() -> Self {
131        Self {
132            includes: Vec::new(),
133            excludes: Vec::new(),
134            gitignore_files: Vec::new(),
135            case_sensitive: true,
136        }
137    }
138
139    /// Add an include pattern
140    pub fn include<S: Into<String>>(mut self, pattern: S) -> Self {
141        self.includes.push(pattern.into());
142        self
143    }
144
145    /// Add multiple include patterns
146    pub fn includes<I, S>(mut self, patterns: I) -> Self
147    where
148        I: IntoIterator<Item = S>,
149        S: Into<String>,
150    {
151        self.includes.extend(patterns.into_iter().map(|p| p.into()));
152        self
153    }
154
155    /// Add an exclude pattern
156    pub fn exclude<S: Into<String>>(mut self, pattern: S) -> Self {
157        self.excludes.push(pattern.into());
158        self
159    }
160
161    /// Add multiple exclude patterns
162    pub fn excludes<I, S>(mut self, patterns: I) -> Self
163    where
164        I: IntoIterator<Item = S>,
165        S: Into<String>,
166    {
167        self.excludes.extend(patterns.into_iter().map(|p| p.into()));
168        self
169    }
170
171    /// Add a gitignore file
172    pub fn gitignore<P: AsRef<Path>>(mut self, path: P) -> Self {
173        self.gitignore_files.push(path.as_ref().to_path_buf());
174        self
175    }
176
177    /// Set case sensitivity
178    pub fn case_sensitive(mut self, enabled: bool) -> Self {
179        self.case_sensitive = enabled;
180        self
181    }
182
183    /// Build the pattern matcher
184    pub fn build(self) -> Result<PatternMatcher> {
185        let options = MatcherOptions {
186            case_sensitive: self.case_sensitive,
187            respect_gitignore: !self.gitignore_files.is_empty(),
188            include_hidden: false,
189            custom_gitignore_files: self.gitignore_files,
190            override_patterns: Vec::new(),
191        };
192
193        let mut builder = PatternMatcherBuilder::new();
194        
195        if !self.includes.is_empty() {
196            builder = builder.includes(self.includes);
197        }
198        
199        if !self.excludes.is_empty() {
200            builder = builder.excludes(self.excludes);
201        }
202        
203        builder = builder
204            .case_sensitive(self.case_sensitive);
205            
206        // Set up gitignore with first gitignore file if available
207        if let Some(first_gitignore) = options.custom_gitignore_files.first() {
208            if let Some(parent) = first_gitignore.parent() {
209                builder = builder.base_path(parent);
210            }
211        }
212
213        builder.build().map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
214    }
215}
216
217/// Utility functions for common pattern operations
218pub mod utils {
219    use super::*;
220    use std::path::PathBuf;
221
222    /// Normalize a path for consistent pattern matching across platforms
223    pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
224        let path = path.as_ref();
225        
226        // Convert to forward slashes for consistent matching
227        let normalized = path.to_string_lossy().replace('\\', "/");
228        
229        // Remove redundant separators and resolve . and ..
230        let components: Vec<&str> = normalized
231            .split('/')
232            .filter(|c| !c.is_empty() && *c != ".")
233            .collect();
234            
235        let mut result = Vec::new();
236        for component in components {
237            if component == ".." && !result.is_empty() && result.last() != Some(&"..") {
238                result.pop();
239            } else {
240                result.push(component);
241            }
242        }
243        
244        PathBuf::from(result.join("/"))
245    }
246
247    /// Check if a pattern is valid glob syntax
248    pub fn is_valid_glob_pattern(pattern: &str) -> bool {
249        glob::GlobPattern::new(pattern).is_ok()
250    }
251
252    /// Check if a pattern is valid gitignore syntax
253    pub fn is_valid_gitignore_pattern(pattern: &str) -> bool {
254        gitignore::GitignorePattern::new(pattern).is_ok()
255    }
256
257    /// Parse comma-separated patterns into a vector
258    pub fn parse_csv_patterns(csv: &str) -> Vec<String> {
259        csv.split(',')
260            .map(|s| s.trim().to_string())
261            .filter(|s| !s.is_empty())
262            .collect()
263    }
264
265    /// Escape special glob characters in a string
266    pub fn escape_glob_pattern(input: &str) -> String {
267        input
268            .replace('*', r"\*")
269            .replace('?', r"\?")
270            .replace('[', r"\[")
271            .replace(']', r"\]")
272            .replace('{', r"\{")
273            .replace('}', r"\}")
274    }
275
276    /// Convert a simple file extension to a glob pattern
277    pub fn extension_to_glob(extension: &str) -> String {
278        format!("**/*.{}", extension.trim_start_matches('.'))
279    }
280
281    /// Convert multiple extensions to include patterns
282    pub fn extensions_to_globs(extensions: &[&str]) -> Vec<String> {
283        extensions.iter()
284            .map(|ext| extension_to_glob(ext))
285            .collect()
286    }
287}
288
289/// Pre-configured pattern matchers for common use cases
290pub mod presets {
291    use super::*;
292
293    /// Create a matcher for common source code files
294    pub fn source_code() -> Result<PatternMatcher> {
295        PatternMatcherBuilder::new()
296            .includes([
297                "**/*.rs", "**/*.py", "**/*.js", "**/*.ts", "**/*.jsx", "**/*.tsx",
298                "**/*.java", "**/*.kt", "**/*.scala", "**/*.go", "**/*.c", "**/*.cpp",
299                "**/*.cxx", "**/*.cc", "**/*.h", "**/*.hpp", "**/*.cs", "**/*.swift",
300                "**/*.dart", "**/*.rb", "**/*.php", "**/*.sh", "**/*.bash", "**/*.zsh"
301            ])
302            .excludes([
303                "**/node_modules/**", "**/target/**", "**/build/**", "**/dist/**",
304                "**/__pycache__/**", "**/*.pyc", "**/.git/**", "**/vendor/**"
305            ])
306            .build()
307            .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
308    }
309
310    /// Create a matcher for documentation files
311    pub fn documentation() -> Result<PatternMatcher> {
312        PatternMatcherBuilder::new()
313            .includes([
314                "**/*.md", "**/*.rst", "**/*.txt", "**/*.adoc", "**/*.org",
315                "**/README*", "**/CHANGELOG*", "**/LICENSE*", "**/COPYING*",
316                "**/*.tex", "**/*.latex"
317            ])
318            .excludes([
319                "**/node_modules/**", "**/target/**", "**/build/**", "**/dist/**"
320            ])
321            .build()
322            .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
323    }
324
325    /// Create a matcher for configuration files
326    pub fn configuration() -> Result<PatternMatcher> {
327        PatternMatcherBuilder::new()
328            .includes([
329                "**/*.json", "**/*.yaml", "**/*.yml", "**/*.toml", "**/*.ini",
330                "**/*.cfg", "**/*.conf", "**/*.xml", "**/Dockerfile*", "**/Makefile*",
331                "**/.env*", "**/*.env"
332            ])
333            .excludes([
334                "**/node_modules/**", "**/target/**", "**/build/**", "**/dist/**"
335            ])
336            .build()
337            .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
338    }
339
340    /// Create a matcher for web assets
341    pub fn web_assets() -> Result<PatternMatcher> {
342        PatternMatcherBuilder::new()
343            .includes([
344                "**/*.html", "**/*.css", "**/*.scss", "**/*.sass", "**/*.less",
345                "**/*.js", "**/*.ts", "**/*.jsx", "**/*.tsx", "**/*.vue", "**/*.svelte"
346            ])
347            .excludes([
348                "**/node_modules/**", "**/dist/**", "**/build/**", "**/.next/**",
349                "**/coverage/**", "**/*.min.js", "**/*.min.css"
350            ])
351            .build()
352            .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
353    }
354
355    /// Create a matcher that excludes all common build artifacts
356    pub fn no_build_artifacts() -> Result<PatternMatcher> {
357        PatternMatcherBuilder::new()
358            .include("**/*")
359            .excludes([
360                "**/target/**", "**/build/**", "**/dist/**", "**/out/**",
361                "**/node_modules/**", "**/__pycache__/**", "**/*.pyc",
362                "**/vendor/**", "**/deps/**", "**/.git/**", "**/.svn/**",
363                "**/bin/**", "**/obj/**", "**/*.o", "**/*.a", "**/*.so",
364                "**/*.dylib", "**/*.dll", "**/*.exe", "**/coverage/**",
365                "**/.nyc_output/**", "**/junit.xml", "**/test-results/**"
366            ])
367            .build()
368            .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
369    }
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375    use tempfile::TempDir;
376    use std::fs;
377    use std::path::PathBuf;
378
379    #[test]
380    fn test_quick_matcher_creation() {
381        let mut matcher = QuickMatcher::new(&["**/*.rs"], &["**/target/**"]).unwrap();
382        assert!(matcher.matches("src/lib.rs").unwrap());
383        assert!(!matcher.matches("target/debug/lib.rs").unwrap());
384    }
385
386    #[test]
387    fn test_quick_matcher_csv() {
388        let mut matcher = QuickMatcher::from_patterns(
389            Some("**/*.rs,**/*.py"),
390            Some("**/target/**,**/__pycache__/**")
391        ).unwrap();
392        
393        assert!(matcher.matches("src/lib.rs").unwrap());
394        assert!(matcher.matches("src/main.py").unwrap());
395        assert!(!matcher.matches("target/debug/lib.rs").unwrap());
396        assert!(!matcher.matches("src/__pycache__/lib.pyc").unwrap());
397    }
398
399    #[test]
400    fn test_pattern_builder() {
401        let mut matcher = PatternMatcherBuilder::new()
402            .include("**/*.rs")
403            .include("**/*.py")
404            .exclude("**/target/**")
405            .exclude("**/__pycache__/**")
406            .case_sensitive(true)
407            .build()
408            .unwrap();
409
410        assert!(matcher.should_process("src/lib.rs").unwrap());
411        assert!(matcher.should_process("src/main.py").unwrap());
412        assert!(!matcher.should_process("target/debug/lib.rs").unwrap());
413        assert!(!matcher.should_process("src/__pycache__/main.pyc").unwrap());
414    }
415
416    #[test]
417    fn test_pattern_builder_fluent_api() {
418        let mut matcher = PatternMatcherBuilder::new()
419            .includes(["**/*.rs", "**/*.py", "**/*.js"])
420            .excludes(["**/node_modules/**", "**/target/**"])
421            .case_sensitive(false)
422            .build()
423            .unwrap();
424
425        assert!(matcher.should_process("src/lib.rs").unwrap());
426        assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
427    }
428
429    #[test]
430    fn test_utils_path_normalization() {
431        use super::utils::*;
432        
433        assert_eq!(normalize_path("src/lib.rs"), PathBuf::from("src/lib.rs"));
434        assert_eq!(normalize_path("src//lib.rs"), PathBuf::from("src/lib.rs"));
435        assert_eq!(normalize_path("src/./lib.rs"), PathBuf::from("src/lib.rs"));
436        assert_eq!(normalize_path("src/../src/lib.rs"), PathBuf::from("src/lib.rs"));
437    }
438
439    #[test]
440    fn test_utils_pattern_validation() {
441        use super::utils::*;
442        
443        assert!(is_valid_glob_pattern("**/*.rs"));
444        assert!(is_valid_glob_pattern("src/**"));
445        assert!(is_valid_glob_pattern("*.{rs,py}"));
446        
447        assert!(is_valid_gitignore_pattern("*.rs"));
448        assert!(is_valid_gitignore_pattern("!important.rs"));
449        assert!(is_valid_gitignore_pattern("build/"));
450    }
451
452    #[test]
453    fn test_utils_csv_parsing() {
454        use super::utils::*;
455        
456        assert_eq!(
457            parse_csv_patterns("*.rs,*.py, *.js "),
458            vec!["*.rs", "*.py", "*.js"]
459        );
460        
461        assert_eq!(
462            parse_csv_patterns("single"),
463            vec!["single"]
464        );
465        
466        assert!(parse_csv_patterns("").is_empty());
467        assert!(parse_csv_patterns(",,,").is_empty());
468    }
469
470    #[test]
471    fn test_utils_extension_conversion() {
472        use super::utils::*;
473        
474        assert_eq!(extension_to_glob("rs"), "**/*.rs");
475        assert_eq!(extension_to_glob(".py"), "**/*.py");
476        
477        assert_eq!(
478            extensions_to_globs(&["rs", "py", "js"]),
479            vec!["**/*.rs", "**/*.py", "**/*.js"]
480        );
481    }
482
483    #[test]
484    fn test_utils_glob_escaping() {
485        use super::utils::*;
486        
487        assert_eq!(escape_glob_pattern("file*.txt"), r"file\*.txt");
488        assert_eq!(escape_glob_pattern("test?file.txt"), r"test\?file.txt");
489        assert_eq!(escape_glob_pattern("file[1-3].txt"), r"file\[1-3\].txt");
490        assert_eq!(escape_glob_pattern("file{a,b}.txt"), r"file\{a,b\}.txt");
491    }
492
493    #[test]
494    fn test_presets_source_code() {
495        let mut matcher = presets::source_code().unwrap();
496        
497        assert!(matcher.should_process("src/lib.rs").unwrap());
498        assert!(matcher.should_process("src/main.py").unwrap());
499        assert!(matcher.should_process("src/app.js").unwrap());
500        assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
501        assert!(!matcher.should_process("target/debug/main").unwrap());
502    }
503
504    #[test]
505    fn test_presets_documentation() {
506        let mut matcher = presets::documentation().unwrap();
507        
508        assert!(matcher.should_process("README.md").unwrap());
509        assert!(matcher.should_process("docs/guide.rst").unwrap());
510        assert!(matcher.should_process("CHANGELOG.txt").unwrap());
511        assert!(!matcher.should_process("src/main.rs").unwrap());
512        assert!(!matcher.should_process("node_modules/package/README.md").unwrap());
513    }
514
515    #[test]
516    fn test_presets_configuration() {
517        let mut matcher = presets::configuration().unwrap();
518        
519        assert!(matcher.should_process("config.json").unwrap());
520        assert!(matcher.should_process("docker-compose.yml").unwrap());
521        assert!(matcher.should_process("Dockerfile").unwrap());
522        assert!(matcher.should_process("Makefile").unwrap());
523        assert!(!matcher.should_process("src/main.rs").unwrap());
524    }
525
526    #[test]
527    fn test_presets_web_assets() {
528        let mut matcher = presets::web_assets().unwrap();
529        
530        assert!(matcher.should_process("index.html").unwrap());
531        assert!(matcher.should_process("styles.css").unwrap());
532        assert!(matcher.should_process("app.js").unwrap());
533        assert!(matcher.should_process("component.tsx").unwrap());
534        assert!(!matcher.should_process("app.min.js").unwrap());
535        assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
536    }
537
538    #[test]
539    fn test_presets_no_build_artifacts() {
540        let mut matcher = presets::no_build_artifacts().unwrap();
541        
542        assert!(matcher.should_process("src/lib.rs").unwrap());
543        assert!(matcher.should_process("README.md").unwrap());
544        assert!(!matcher.should_process("target/debug/main").unwrap());
545        assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
546        assert!(!matcher.should_process("__pycache__/main.pyc").unwrap());
547        assert!(!matcher.should_process("build/output.js").unwrap());
548    }
549
550    #[tokio::test]
551    async fn test_integration_with_file_system() {
552        let temp_dir = TempDir::new().unwrap();
553        let base_path = temp_dir.path();
554        
555        // Create test files
556        fs::create_dir_all(base_path.join("src")).unwrap();
557        fs::create_dir_all(base_path.join("target/debug")).unwrap();
558        fs::create_dir_all(base_path.join("docs")).unwrap();
559        
560        fs::write(base_path.join("src/lib.rs"), "fn main() {}").unwrap();
561        fs::write(base_path.join("src/main.py"), "print('hello')").unwrap();
562        fs::write(base_path.join("target/debug/main"), "binary").unwrap();
563        fs::write(base_path.join("README.md"), "# Project").unwrap();
564        fs::write(base_path.join("docs/guide.md"), "# Guide").unwrap();
565
566        let mut matcher = presets::source_code().unwrap();
567        
568        // Test paths relative to the base directory
569        assert!(matcher.should_process("src/lib.rs").unwrap());
570        assert!(matcher.should_process("src/main.py").unwrap());
571        assert!(!matcher.should_process("target/debug/main").unwrap());
572        
573        // Documentation should not match source code preset
574        assert!(!matcher.should_process("README.md").unwrap());
575        assert!(!matcher.should_process("docs/guide.md").unwrap());
576    }
577}