scribe_patterns/
lib.rs

1//! # Scribe Patterns
2//!
3//! Advanced pattern matching and search algorithms for the Scribe library.
4//! This crate provides high-performance pattern matching capabilities including
5//! glob patterns, gitignore integration, and flexible include/exclude logic.
6//!
7//! ## Features
8//!
9//! - **High-Performance Glob Matching**: Using `globset` for efficient pattern compilation
10//! - **Gitignore Integration**: Full gitignore syntax support with proper precedence
11//! - **Include/Exclude Logic**: Complex pattern combinations with comma-separated input
12//! - **Path Normalization**: Cross-platform path handling and matching
13//! - **Pattern Validation**: Comprehensive error handling and pattern validation
14//! - **Caching**: Efficient pattern compilation and matching with caching
15//!
16//! ## Usage
17//!
18//! ```rust
19//! use scribe_patterns::{PatternMatcherBuilder, MatchResult};
20//! use std::path::Path;
21//!
22//! # fn example() -> anyhow::Result<()> {
23//! // Create a combined matcher with glob and gitignore patterns
24//! let mut matcher = PatternMatcherBuilder::new()
25//!     .include("src/**/*.rs")
26//!     .exclude("target/**")
27//!     .respect_gitignore(true)
28//!     .base_path(".")
29//!     .build()?;
30//!
31//! // Test if a path matches
32//! let path = Path::new("src/lib.rs");
33//! if matcher.should_process(path)? {
34//!     println!("Path is included: {}", path.display());
35//! }
36//! # Ok(())
37//! # }
38//! ```
39
40// Core modules
41pub mod gitignore;
42pub mod glob;
43pub mod matcher;
44pub mod validation;
45
46// Re-export main types for convenience
47pub use gitignore::{GitignoreMatcher, GitignorePattern, GitignoreRule, GitignoreStats};
48pub use glob::{GlobMatchResult, GlobMatcher, GlobOptions, GlobPattern};
49pub use matcher::{MatchResult, MatcherOptions, PatternMatcher, PatternMatcherBuilder};
50pub use validation::{
51    PatternValidator, PerformanceRisk, PerformanceRiskLevel, ValidationConfig, ValidationError,
52    ValidationResult,
53};
54
55use scribe_core::{Result, ScribeError};
56use std::path::Path;
57
58/// Current version of the patterns crate
59pub const VERSION: &str = env!("CARGO_PKG_VERSION");
60
61/// Quick pattern matching utility for simple use cases
62pub struct QuickMatcher {
63    matcher: PatternMatcher,
64}
65
66impl QuickMatcher {
67    /// Create a new quick matcher with include and exclude patterns
68    pub fn new(include_patterns: &[&str], exclude_patterns: &[&str]) -> Result<Self> {
69        let mut builder = PatternMatcherBuilder::new();
70
71        for pattern in include_patterns {
72            builder = builder.include(*pattern);
73        }
74
75        for pattern in exclude_patterns {
76            builder = builder.exclude(*pattern);
77        }
78
79        let matcher = builder
80            .build()
81            .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))?;
82        Ok(Self { matcher })
83    }
84
85    /// Create a quick matcher from comma-separated pattern strings
86    pub fn from_patterns(include_csv: Option<&str>, exclude_csv: Option<&str>) -> Result<Self> {
87        let mut builder = PatternMatcherBuilder::new();
88
89        if let Some(includes) = include_csv {
90            let patterns = utils::parse_csv_patterns(includes);
91            builder = builder.includes(patterns);
92        }
93
94        if let Some(excludes) = exclude_csv {
95            let patterns = utils::parse_csv_patterns(excludes);
96            builder = builder.excludes(patterns);
97        }
98
99        let matcher = builder
100            .build()
101            .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))?;
102        Ok(Self { matcher })
103    }
104
105    /// Test if a path should be included
106    pub fn matches<P: AsRef<Path>>(&mut self, path: P) -> Result<bool> {
107        self.matcher
108            .should_process(path)
109            .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
110    }
111
112    /// Get detailed match information
113    pub fn match_details<P: AsRef<Path>>(&mut self, path: P) -> Result<MatchResult> {
114        self.matcher
115            .is_match(path)
116            .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
117    }
118}
119
120/// Pattern matching builder for fluent API construction
121pub struct PatternBuilder {
122    includes: Vec<String>,
123    excludes: Vec<String>,
124    gitignore_files: Vec<std::path::PathBuf>,
125    case_sensitive: bool,
126}
127
128impl Default for PatternBuilder {
129    fn default() -> Self {
130        Self::new()
131    }
132}
133
134impl PatternBuilder {
135    /// Create a new pattern builder
136    pub fn new() -> Self {
137        Self {
138            includes: Vec::new(),
139            excludes: Vec::new(),
140            gitignore_files: Vec::new(),
141            case_sensitive: true,
142        }
143    }
144
145    /// Add an include pattern
146    pub fn include<S: Into<String>>(mut self, pattern: S) -> Self {
147        self.includes.push(pattern.into());
148        self
149    }
150
151    /// Add multiple include patterns
152    pub fn includes<I, S>(mut self, patterns: I) -> Self
153    where
154        I: IntoIterator<Item = S>,
155        S: Into<String>,
156    {
157        self.includes.extend(patterns.into_iter().map(|p| p.into()));
158        self
159    }
160
161    /// Add an exclude pattern
162    pub fn exclude<S: Into<String>>(mut self, pattern: S) -> Self {
163        self.excludes.push(pattern.into());
164        self
165    }
166
167    /// Add multiple exclude patterns
168    pub fn excludes<I, S>(mut self, patterns: I) -> Self
169    where
170        I: IntoIterator<Item = S>,
171        S: Into<String>,
172    {
173        self.excludes.extend(patterns.into_iter().map(|p| p.into()));
174        self
175    }
176
177    /// Add a gitignore file
178    pub fn gitignore<P: AsRef<Path>>(mut self, path: P) -> Self {
179        self.gitignore_files.push(path.as_ref().to_path_buf());
180        self
181    }
182
183    /// Set case sensitivity
184    pub fn case_sensitive(mut self, enabled: bool) -> Self {
185        self.case_sensitive = enabled;
186        self
187    }
188
189    /// Build the pattern matcher
190    pub fn build(self) -> Result<PatternMatcher> {
191        let options = MatcherOptions {
192            case_sensitive: self.case_sensitive,
193            respect_gitignore: !self.gitignore_files.is_empty(),
194            include_hidden: false,
195            custom_gitignore_files: self.gitignore_files,
196            override_patterns: Vec::new(),
197        };
198
199        let mut builder = PatternMatcherBuilder::new();
200
201        if !self.includes.is_empty() {
202            builder = builder.includes(self.includes);
203        }
204
205        if !self.excludes.is_empty() {
206            builder = builder.excludes(self.excludes);
207        }
208
209        builder = builder.case_sensitive(self.case_sensitive);
210
211        // Set up gitignore with first gitignore file if available
212        if let Some(first_gitignore) = options.custom_gitignore_files.first() {
213            if let Some(parent) = first_gitignore.parent() {
214                builder = builder.base_path(parent);
215            }
216        }
217
218        builder
219            .build()
220            .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
221    }
222}
223
224/// Utility functions for common pattern operations
225pub mod utils {
226    use super::*;
227    use std::path::PathBuf;
228
229    /// Normalize a path for consistent pattern matching across platforms
230    pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
231        let path = path.as_ref();
232
233        // Convert to forward slashes for consistent matching
234        let normalized = path.to_string_lossy().replace('\\', "/");
235
236        // Remove redundant separators and resolve . and ..
237        let components: Vec<&str> = normalized
238            .split('/')
239            .filter(|c| !c.is_empty() && *c != ".")
240            .collect();
241
242        let mut result = Vec::new();
243        for component in components {
244            if component == ".." && !result.is_empty() && result.last() != Some(&"..") {
245                result.pop();
246            } else {
247                result.push(component);
248            }
249        }
250
251        PathBuf::from(result.join("/"))
252    }
253
254    /// Check if a pattern is valid glob syntax
255    pub fn is_valid_glob_pattern(pattern: &str) -> bool {
256        glob::GlobPattern::new(pattern).is_ok()
257    }
258
259    /// Check if a pattern is valid gitignore syntax
260    pub fn is_valid_gitignore_pattern(pattern: &str) -> bool {
261        gitignore::GitignorePattern::new(pattern).is_ok()
262    }
263
264    /// Parse comma-separated patterns into a vector
265    pub fn parse_csv_patterns(csv: &str) -> Vec<String> {
266        csv.split(',')
267            .map(|s| s.trim().to_string())
268            .filter(|s| !s.is_empty())
269            .collect()
270    }
271
272    /// Escape special glob characters in a string
273    pub fn escape_glob_pattern(input: &str) -> String {
274        input
275            .replace('*', r"\*")
276            .replace('?', r"\?")
277            .replace('[', r"\[")
278            .replace(']', r"\]")
279            .replace('{', r"\{")
280            .replace('}', r"\}")
281    }
282
283    /// Convert a simple file extension to a glob pattern
284    pub fn extension_to_glob(extension: &str) -> String {
285        format!("**/*.{}", extension.trim_start_matches('.'))
286    }
287
288    /// Convert multiple extensions to include patterns
289    pub fn extensions_to_globs(extensions: &[&str]) -> Vec<String> {
290        extensions
291            .iter()
292            .map(|ext| extension_to_glob(ext))
293            .collect()
294    }
295}
296
297/// Pre-configured pattern matchers for common use cases
298pub mod presets {
299    use super::*;
300
301    /// Create a matcher for common source code files
302    pub fn source_code() -> Result<PatternMatcher> {
303        PatternMatcherBuilder::new()
304            .includes([
305                "**/*.rs",
306                "**/*.py",
307                "**/*.js",
308                "**/*.ts",
309                "**/*.jsx",
310                "**/*.tsx",
311                "**/*.java",
312                "**/*.kt",
313                "**/*.scala",
314                "**/*.go",
315                "**/*.c",
316                "**/*.cpp",
317                "**/*.cxx",
318                "**/*.cc",
319                "**/*.h",
320                "**/*.hpp",
321                "**/*.cs",
322                "**/*.swift",
323                "**/*.dart",
324                "**/*.rb",
325                "**/*.php",
326                "**/*.sh",
327                "**/*.bash",
328                "**/*.zsh",
329            ])
330            .excludes([
331                "**/node_modules/**",
332                "**/target/**",
333                "**/build/**",
334                "**/dist/**",
335                "**/__pycache__/**",
336                "**/*.pyc",
337                "**/.git/**",
338                "**/vendor/**",
339            ])
340            .build()
341            .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
342    }
343
344    /// Create a matcher for documentation files
345    pub fn documentation() -> Result<PatternMatcher> {
346        PatternMatcherBuilder::new()
347            .includes([
348                "**/*.md",
349                "**/*.rst",
350                "**/*.txt",
351                "**/*.adoc",
352                "**/*.org",
353                "**/README*",
354                "**/CHANGELOG*",
355                "**/LICENSE*",
356                "**/COPYING*",
357                "**/*.tex",
358                "**/*.latex",
359            ])
360            .excludes([
361                "**/node_modules/**",
362                "**/target/**",
363                "**/build/**",
364                "**/dist/**",
365            ])
366            .build()
367            .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
368    }
369
370    /// Create a matcher for configuration files
371    pub fn configuration() -> Result<PatternMatcher> {
372        PatternMatcherBuilder::new()
373            .includes([
374                "**/*.json",
375                "**/*.yaml",
376                "**/*.yml",
377                "**/*.toml",
378                "**/*.ini",
379                "**/*.cfg",
380                "**/*.conf",
381                "**/*.xml",
382                "**/Dockerfile*",
383                "**/Makefile*",
384                "**/.env*",
385                "**/*.env",
386            ])
387            .excludes([
388                "**/node_modules/**",
389                "**/target/**",
390                "**/build/**",
391                "**/dist/**",
392            ])
393            .build()
394            .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
395    }
396
397    /// Create a matcher for web assets
398    pub fn web_assets() -> Result<PatternMatcher> {
399        PatternMatcherBuilder::new()
400            .includes([
401                "**/*.html",
402                "**/*.css",
403                "**/*.scss",
404                "**/*.sass",
405                "**/*.less",
406                "**/*.js",
407                "**/*.ts",
408                "**/*.jsx",
409                "**/*.tsx",
410                "**/*.vue",
411                "**/*.svelte",
412            ])
413            .excludes([
414                "**/node_modules/**",
415                "**/dist/**",
416                "**/build/**",
417                "**/.next/**",
418                "**/coverage/**",
419                "**/*.min.js",
420                "**/*.min.css",
421            ])
422            .build()
423            .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
424    }
425
426    /// Create a matcher that excludes all common build artifacts
427    pub fn no_build_artifacts() -> Result<PatternMatcher> {
428        PatternMatcherBuilder::new()
429            .include("**/*")
430            .excludes([
431                "**/target/**",
432                "**/build/**",
433                "**/dist/**",
434                "**/out/**",
435                "**/node_modules/**",
436                "**/__pycache__/**",
437                "**/*.pyc",
438                "**/vendor/**",
439                "**/deps/**",
440                "**/.git/**",
441                "**/.svn/**",
442                "**/bin/**",
443                "**/obj/**",
444                "**/*.o",
445                "**/*.a",
446                "**/*.so",
447                "**/*.dylib",
448                "**/*.dll",
449                "**/*.exe",
450                "**/coverage/**",
451                "**/.nyc_output/**",
452                "**/junit.xml",
453                "**/test-results/**",
454            ])
455            .build()
456            .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
457    }
458}
459
460#[cfg(test)]
461mod tests {
462    use super::*;
463    use std::fs;
464    use std::path::PathBuf;
465    use tempfile::TempDir;
466
467    #[test]
468    fn test_quick_matcher_creation() {
469        let mut matcher = QuickMatcher::new(&["**/*.rs"], &["**/target/**"]).unwrap();
470        assert!(matcher.matches("src/lib.rs").unwrap());
471        assert!(!matcher.matches("target/debug/lib.rs").unwrap());
472    }
473
474    #[test]
475    fn test_quick_matcher_csv() {
476        let mut matcher = QuickMatcher::from_patterns(
477            Some("**/*.rs,**/*.py"),
478            Some("**/target/**,**/__pycache__/**"),
479        )
480        .unwrap();
481
482        assert!(matcher.matches("src/lib.rs").unwrap());
483        assert!(matcher.matches("src/main.py").unwrap());
484        assert!(!matcher.matches("target/debug/lib.rs").unwrap());
485        assert!(!matcher.matches("src/__pycache__/lib.pyc").unwrap());
486    }
487
488    #[test]
489    fn test_pattern_builder() {
490        let mut matcher = PatternMatcherBuilder::new()
491            .include("**/*.rs")
492            .include("**/*.py")
493            .exclude("**/target/**")
494            .exclude("**/__pycache__/**")
495            .case_sensitive(true)
496            .build()
497            .unwrap();
498
499        assert!(matcher.should_process("src/lib.rs").unwrap());
500        assert!(matcher.should_process("src/main.py").unwrap());
501        assert!(!matcher.should_process("target/debug/lib.rs").unwrap());
502        assert!(!matcher.should_process("src/__pycache__/main.pyc").unwrap());
503    }
504
505    #[test]
506    fn test_pattern_builder_fluent_api() {
507        let mut matcher = PatternMatcherBuilder::new()
508            .includes(["**/*.rs", "**/*.py", "**/*.js"])
509            .excludes(["**/node_modules/**", "**/target/**"])
510            .case_sensitive(false)
511            .build()
512            .unwrap();
513
514        assert!(matcher.should_process("src/lib.rs").unwrap());
515        assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
516    }
517
518    #[test]
519    fn test_utils_path_normalization() {
520        use super::utils::*;
521
522        assert_eq!(normalize_path("src/lib.rs"), PathBuf::from("src/lib.rs"));
523        assert_eq!(normalize_path("src//lib.rs"), PathBuf::from("src/lib.rs"));
524        assert_eq!(normalize_path("src/./lib.rs"), PathBuf::from("src/lib.rs"));
525        assert_eq!(
526            normalize_path("src/../src/lib.rs"),
527            PathBuf::from("src/lib.rs")
528        );
529    }
530
531    #[test]
532    fn test_utils_pattern_validation() {
533        use super::utils::*;
534
535        assert!(is_valid_glob_pattern("**/*.rs"));
536        assert!(is_valid_glob_pattern("src/**"));
537        assert!(is_valid_glob_pattern("*.{rs,py}"));
538
539        assert!(is_valid_gitignore_pattern("*.rs"));
540        assert!(is_valid_gitignore_pattern("!important.rs"));
541        assert!(is_valid_gitignore_pattern("build/"));
542    }
543
544    #[test]
545    fn test_utils_csv_parsing() {
546        use super::utils::*;
547
548        assert_eq!(
549            parse_csv_patterns("*.rs,*.py, *.js "),
550            vec!["*.rs", "*.py", "*.js"]
551        );
552
553        assert_eq!(parse_csv_patterns("single"), vec!["single"]);
554
555        assert!(parse_csv_patterns("").is_empty());
556        assert!(parse_csv_patterns(",,,").is_empty());
557    }
558
559    #[test]
560    fn test_utils_extension_conversion() {
561        use super::utils::*;
562
563        assert_eq!(extension_to_glob("rs"), "**/*.rs");
564        assert_eq!(extension_to_glob(".py"), "**/*.py");
565
566        assert_eq!(
567            extensions_to_globs(&["rs", "py", "js"]),
568            vec!["**/*.rs", "**/*.py", "**/*.js"]
569        );
570    }
571
572    #[test]
573    fn test_utils_glob_escaping() {
574        use super::utils::*;
575
576        assert_eq!(escape_glob_pattern("file*.txt"), r"file\*.txt");
577        assert_eq!(escape_glob_pattern("test?file.txt"), r"test\?file.txt");
578        assert_eq!(escape_glob_pattern("file[1-3].txt"), r"file\[1-3\].txt");
579        assert_eq!(escape_glob_pattern("file{a,b}.txt"), r"file\{a,b\}.txt");
580    }
581
582    #[test]
583    fn test_presets_source_code() {
584        let mut matcher = presets::source_code().unwrap();
585
586        assert!(matcher.should_process("src/lib.rs").unwrap());
587        assert!(matcher.should_process("src/main.py").unwrap());
588        assert!(matcher.should_process("src/app.js").unwrap());
589        assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
590        assert!(!matcher.should_process("target/debug/main").unwrap());
591    }
592
593    #[test]
594    fn test_presets_documentation() {
595        let mut matcher = presets::documentation().unwrap();
596
597        assert!(matcher.should_process("README.md").unwrap());
598        assert!(matcher.should_process("docs/guide.rst").unwrap());
599        assert!(matcher.should_process("CHANGELOG.txt").unwrap());
600        assert!(!matcher.should_process("src/main.rs").unwrap());
601        assert!(!matcher
602            .should_process("node_modules/package/README.md")
603            .unwrap());
604    }
605
606    #[test]
607    fn test_presets_configuration() {
608        let mut matcher = presets::configuration().unwrap();
609
610        assert!(matcher.should_process("config.json").unwrap());
611        assert!(matcher.should_process("docker-compose.yml").unwrap());
612        assert!(matcher.should_process("Dockerfile").unwrap());
613        assert!(matcher.should_process("Makefile").unwrap());
614        assert!(!matcher.should_process("src/main.rs").unwrap());
615    }
616
617    #[test]
618    fn test_presets_web_assets() {
619        let mut matcher = presets::web_assets().unwrap();
620
621        assert!(matcher.should_process("index.html").unwrap());
622        assert!(matcher.should_process("styles.css").unwrap());
623        assert!(matcher.should_process("app.js").unwrap());
624        assert!(matcher.should_process("component.tsx").unwrap());
625        assert!(!matcher.should_process("app.min.js").unwrap());
626        assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
627    }
628
629    #[test]
630    fn test_presets_no_build_artifacts() {
631        let mut matcher = presets::no_build_artifacts().unwrap();
632
633        assert!(matcher.should_process("src/lib.rs").unwrap());
634        assert!(matcher.should_process("README.md").unwrap());
635        assert!(!matcher.should_process("target/debug/main").unwrap());
636        assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
637        assert!(!matcher.should_process("__pycache__/main.pyc").unwrap());
638        assert!(!matcher.should_process("build/output.js").unwrap());
639    }
640
641    #[tokio::test]
642    async fn test_integration_with_file_system() {
643        let temp_dir = TempDir::new().unwrap();
644        let base_path = temp_dir.path();
645
646        // Create test files
647        fs::create_dir_all(base_path.join("src")).unwrap();
648        fs::create_dir_all(base_path.join("target/debug")).unwrap();
649        fs::create_dir_all(base_path.join("docs")).unwrap();
650
651        fs::write(base_path.join("src/lib.rs"), "fn main() {}").unwrap();
652        fs::write(base_path.join("src/main.py"), "print('hello')").unwrap();
653        fs::write(base_path.join("target/debug/main"), "binary").unwrap();
654        fs::write(base_path.join("README.md"), "# Project").unwrap();
655        fs::write(base_path.join("docs/guide.md"), "# Guide").unwrap();
656
657        let mut matcher = presets::source_code().unwrap();
658
659        // Test paths relative to the base directory
660        assert!(matcher.should_process("src/lib.rs").unwrap());
661        assert!(matcher.should_process("src/main.py").unwrap());
662        assert!(!matcher.should_process("target/debug/main").unwrap());
663
664        // Documentation should not match source code preset
665        assert!(!matcher.should_process("README.md").unwrap());
666        assert!(!matcher.should_process("docs/guide.md").unwrap());
667    }
668}