1pub mod glob;
42pub mod gitignore;
43pub mod matcher;
44pub mod validation;
45
46pub use glob::{GlobMatcher, GlobPattern, GlobOptions, GlobMatchResult};
48pub use gitignore::{GitignoreMatcher, GitignorePattern, GitignoreRule, GitignoreStats};
49pub use matcher::{
50 PatternMatcher, MatchResult, MatcherOptions, PatternMatcherBuilder
51};
52pub use validation::{
53 PatternValidator, ValidationResult, ValidationError, ValidationConfig,
54 PerformanceRisk, PerformanceRiskLevel
55};
56
57use scribe_core::{Result, ScribeError};
58use std::path::Path;
59
60pub const VERSION: &str = env!("CARGO_PKG_VERSION");
62
63pub struct QuickMatcher {
65 matcher: PatternMatcher,
66}
67
68impl QuickMatcher {
69 pub fn new(include_patterns: &[&str], exclude_patterns: &[&str]) -> Result<Self> {
71 let mut builder = PatternMatcherBuilder::new();
72
73 for pattern in include_patterns {
74 builder = builder.include(*pattern);
75 }
76
77 for pattern in exclude_patterns {
78 builder = builder.exclude(*pattern);
79 }
80
81 let matcher = builder.build().map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))?;
82 Ok(Self { matcher })
83 }
84
85 pub fn from_patterns(include_csv: Option<&str>, exclude_csv: Option<&str>) -> Result<Self> {
87 let mut builder = PatternMatcherBuilder::new();
88
89 if let Some(includes) = include_csv {
90 let patterns = utils::parse_csv_patterns(includes);
91 builder = builder.includes(patterns);
92 }
93
94 if let Some(excludes) = exclude_csv {
95 let patterns = utils::parse_csv_patterns(excludes);
96 builder = builder.excludes(patterns);
97 }
98
99 let matcher = builder.build().map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))?;
100 Ok(Self { matcher })
101 }
102
103 pub fn matches<P: AsRef<Path>>(&mut self, path: P) -> Result<bool> {
105 self.matcher.should_process(path).map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
106 }
107
108 pub fn match_details<P: AsRef<Path>>(&mut self, path: P) -> Result<MatchResult> {
110 self.matcher.is_match(path).map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
111 }
112}
113
114pub struct PatternBuilder {
116 includes: Vec<String>,
117 excludes: Vec<String>,
118 gitignore_files: Vec<std::path::PathBuf>,
119 case_sensitive: bool,
120}
121
122impl Default for PatternBuilder {
123 fn default() -> Self {
124 Self::new()
125 }
126}
127
128impl PatternBuilder {
129 pub fn new() -> Self {
131 Self {
132 includes: Vec::new(),
133 excludes: Vec::new(),
134 gitignore_files: Vec::new(),
135 case_sensitive: true,
136 }
137 }
138
139 pub fn include<S: Into<String>>(mut self, pattern: S) -> Self {
141 self.includes.push(pattern.into());
142 self
143 }
144
145 pub fn includes<I, S>(mut self, patterns: I) -> Self
147 where
148 I: IntoIterator<Item = S>,
149 S: Into<String>,
150 {
151 self.includes.extend(patterns.into_iter().map(|p| p.into()));
152 self
153 }
154
155 pub fn exclude<S: Into<String>>(mut self, pattern: S) -> Self {
157 self.excludes.push(pattern.into());
158 self
159 }
160
161 pub fn excludes<I, S>(mut self, patterns: I) -> Self
163 where
164 I: IntoIterator<Item = S>,
165 S: Into<String>,
166 {
167 self.excludes.extend(patterns.into_iter().map(|p| p.into()));
168 self
169 }
170
171 pub fn gitignore<P: AsRef<Path>>(mut self, path: P) -> Self {
173 self.gitignore_files.push(path.as_ref().to_path_buf());
174 self
175 }
176
177 pub fn case_sensitive(mut self, enabled: bool) -> Self {
179 self.case_sensitive = enabled;
180 self
181 }
182
183 pub fn build(self) -> Result<PatternMatcher> {
185 let options = MatcherOptions {
186 case_sensitive: self.case_sensitive,
187 respect_gitignore: !self.gitignore_files.is_empty(),
188 include_hidden: false,
189 custom_gitignore_files: self.gitignore_files,
190 override_patterns: Vec::new(),
191 };
192
193 let mut builder = PatternMatcherBuilder::new();
194
195 if !self.includes.is_empty() {
196 builder = builder.includes(self.includes);
197 }
198
199 if !self.excludes.is_empty() {
200 builder = builder.excludes(self.excludes);
201 }
202
203 builder = builder
204 .case_sensitive(self.case_sensitive);
205
206 if let Some(first_gitignore) = options.custom_gitignore_files.first() {
208 if let Some(parent) = first_gitignore.parent() {
209 builder = builder.base_path(parent);
210 }
211 }
212
213 builder.build().map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
214 }
215}
216
217pub mod utils {
219 use super::*;
220 use std::path::PathBuf;
221
222 pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
224 let path = path.as_ref();
225
226 let normalized = path.to_string_lossy().replace('\\', "/");
228
229 let components: Vec<&str> = normalized
231 .split('/')
232 .filter(|c| !c.is_empty() && *c != ".")
233 .collect();
234
235 let mut result = Vec::new();
236 for component in components {
237 if component == ".." && !result.is_empty() && result.last() != Some(&"..") {
238 result.pop();
239 } else {
240 result.push(component);
241 }
242 }
243
244 PathBuf::from(result.join("/"))
245 }
246
247 pub fn is_valid_glob_pattern(pattern: &str) -> bool {
249 glob::GlobPattern::new(pattern).is_ok()
250 }
251
252 pub fn is_valid_gitignore_pattern(pattern: &str) -> bool {
254 gitignore::GitignorePattern::new(pattern).is_ok()
255 }
256
257 pub fn parse_csv_patterns(csv: &str) -> Vec<String> {
259 csv.split(',')
260 .map(|s| s.trim().to_string())
261 .filter(|s| !s.is_empty())
262 .collect()
263 }
264
265 pub fn escape_glob_pattern(input: &str) -> String {
267 input
268 .replace('*', r"\*")
269 .replace('?', r"\?")
270 .replace('[', r"\[")
271 .replace(']', r"\]")
272 .replace('{', r"\{")
273 .replace('}', r"\}")
274 }
275
276 pub fn extension_to_glob(extension: &str) -> String {
278 format!("**/*.{}", extension.trim_start_matches('.'))
279 }
280
281 pub fn extensions_to_globs(extensions: &[&str]) -> Vec<String> {
283 extensions.iter()
284 .map(|ext| extension_to_glob(ext))
285 .collect()
286 }
287}
288
289pub mod presets {
291 use super::*;
292
293 pub fn source_code() -> Result<PatternMatcher> {
295 PatternMatcherBuilder::new()
296 .includes([
297 "**/*.rs", "**/*.py", "**/*.js", "**/*.ts", "**/*.jsx", "**/*.tsx",
298 "**/*.java", "**/*.kt", "**/*.scala", "**/*.go", "**/*.c", "**/*.cpp",
299 "**/*.cxx", "**/*.cc", "**/*.h", "**/*.hpp", "**/*.cs", "**/*.swift",
300 "**/*.dart", "**/*.rb", "**/*.php", "**/*.sh", "**/*.bash", "**/*.zsh"
301 ])
302 .excludes([
303 "**/node_modules/**", "**/target/**", "**/build/**", "**/dist/**",
304 "**/__pycache__/**", "**/*.pyc", "**/.git/**", "**/vendor/**"
305 ])
306 .build()
307 .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
308 }
309
310 pub fn documentation() -> Result<PatternMatcher> {
312 PatternMatcherBuilder::new()
313 .includes([
314 "**/*.md", "**/*.rst", "**/*.txt", "**/*.adoc", "**/*.org",
315 "**/README*", "**/CHANGELOG*", "**/LICENSE*", "**/COPYING*",
316 "**/*.tex", "**/*.latex"
317 ])
318 .excludes([
319 "**/node_modules/**", "**/target/**", "**/build/**", "**/dist/**"
320 ])
321 .build()
322 .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
323 }
324
325 pub fn configuration() -> Result<PatternMatcher> {
327 PatternMatcherBuilder::new()
328 .includes([
329 "**/*.json", "**/*.yaml", "**/*.yml", "**/*.toml", "**/*.ini",
330 "**/*.cfg", "**/*.conf", "**/*.xml", "**/Dockerfile*", "**/Makefile*",
331 "**/.env*", "**/*.env"
332 ])
333 .excludes([
334 "**/node_modules/**", "**/target/**", "**/build/**", "**/dist/**"
335 ])
336 .build()
337 .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
338 }
339
340 pub fn web_assets() -> Result<PatternMatcher> {
342 PatternMatcherBuilder::new()
343 .includes([
344 "**/*.html", "**/*.css", "**/*.scss", "**/*.sass", "**/*.less",
345 "**/*.js", "**/*.ts", "**/*.jsx", "**/*.tsx", "**/*.vue", "**/*.svelte"
346 ])
347 .excludes([
348 "**/node_modules/**", "**/dist/**", "**/build/**", "**/.next/**",
349 "**/coverage/**", "**/*.min.js", "**/*.min.css"
350 ])
351 .build()
352 .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
353 }
354
355 pub fn no_build_artifacts() -> Result<PatternMatcher> {
357 PatternMatcherBuilder::new()
358 .include("**/*")
359 .excludes([
360 "**/target/**", "**/build/**", "**/dist/**", "**/out/**",
361 "**/node_modules/**", "**/__pycache__/**", "**/*.pyc",
362 "**/vendor/**", "**/deps/**", "**/.git/**", "**/.svn/**",
363 "**/bin/**", "**/obj/**", "**/*.o", "**/*.a", "**/*.so",
364 "**/*.dylib", "**/*.dll", "**/*.exe", "**/coverage/**",
365 "**/.nyc_output/**", "**/junit.xml", "**/test-results/**"
366 ])
367 .build()
368 .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
369 }
370}
371
372#[cfg(test)]
373mod tests {
374 use super::*;
375 use tempfile::TempDir;
376 use std::fs;
377 use std::path::PathBuf;
378
379 #[test]
380 fn test_quick_matcher_creation() {
381 let mut matcher = QuickMatcher::new(&["**/*.rs"], &["**/target/**"]).unwrap();
382 assert!(matcher.matches("src/lib.rs").unwrap());
383 assert!(!matcher.matches("target/debug/lib.rs").unwrap());
384 }
385
386 #[test]
387 fn test_quick_matcher_csv() {
388 let mut matcher = QuickMatcher::from_patterns(
389 Some("**/*.rs,**/*.py"),
390 Some("**/target/**,**/__pycache__/**")
391 ).unwrap();
392
393 assert!(matcher.matches("src/lib.rs").unwrap());
394 assert!(matcher.matches("src/main.py").unwrap());
395 assert!(!matcher.matches("target/debug/lib.rs").unwrap());
396 assert!(!matcher.matches("src/__pycache__/lib.pyc").unwrap());
397 }
398
399 #[test]
400 fn test_pattern_builder() {
401 let mut matcher = PatternMatcherBuilder::new()
402 .include("**/*.rs")
403 .include("**/*.py")
404 .exclude("**/target/**")
405 .exclude("**/__pycache__/**")
406 .case_sensitive(true)
407 .build()
408 .unwrap();
409
410 assert!(matcher.should_process("src/lib.rs").unwrap());
411 assert!(matcher.should_process("src/main.py").unwrap());
412 assert!(!matcher.should_process("target/debug/lib.rs").unwrap());
413 assert!(!matcher.should_process("src/__pycache__/main.pyc").unwrap());
414 }
415
416 #[test]
417 fn test_pattern_builder_fluent_api() {
418 let mut matcher = PatternMatcherBuilder::new()
419 .includes(["**/*.rs", "**/*.py", "**/*.js"])
420 .excludes(["**/node_modules/**", "**/target/**"])
421 .case_sensitive(false)
422 .build()
423 .unwrap();
424
425 assert!(matcher.should_process("src/lib.rs").unwrap());
426 assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
427 }
428
429 #[test]
430 fn test_utils_path_normalization() {
431 use super::utils::*;
432
433 assert_eq!(normalize_path("src/lib.rs"), PathBuf::from("src/lib.rs"));
434 assert_eq!(normalize_path("src//lib.rs"), PathBuf::from("src/lib.rs"));
435 assert_eq!(normalize_path("src/./lib.rs"), PathBuf::from("src/lib.rs"));
436 assert_eq!(normalize_path("src/../src/lib.rs"), PathBuf::from("src/lib.rs"));
437 }
438
439 #[test]
440 fn test_utils_pattern_validation() {
441 use super::utils::*;
442
443 assert!(is_valid_glob_pattern("**/*.rs"));
444 assert!(is_valid_glob_pattern("src/**"));
445 assert!(is_valid_glob_pattern("*.{rs,py}"));
446
447 assert!(is_valid_gitignore_pattern("*.rs"));
448 assert!(is_valid_gitignore_pattern("!important.rs"));
449 assert!(is_valid_gitignore_pattern("build/"));
450 }
451
452 #[test]
453 fn test_utils_csv_parsing() {
454 use super::utils::*;
455
456 assert_eq!(
457 parse_csv_patterns("*.rs,*.py, *.js "),
458 vec!["*.rs", "*.py", "*.js"]
459 );
460
461 assert_eq!(
462 parse_csv_patterns("single"),
463 vec!["single"]
464 );
465
466 assert!(parse_csv_patterns("").is_empty());
467 assert!(parse_csv_patterns(",,,").is_empty());
468 }
469
470 #[test]
471 fn test_utils_extension_conversion() {
472 use super::utils::*;
473
474 assert_eq!(extension_to_glob("rs"), "**/*.rs");
475 assert_eq!(extension_to_glob(".py"), "**/*.py");
476
477 assert_eq!(
478 extensions_to_globs(&["rs", "py", "js"]),
479 vec!["**/*.rs", "**/*.py", "**/*.js"]
480 );
481 }
482
483 #[test]
484 fn test_utils_glob_escaping() {
485 use super::utils::*;
486
487 assert_eq!(escape_glob_pattern("file*.txt"), r"file\*.txt");
488 assert_eq!(escape_glob_pattern("test?file.txt"), r"test\?file.txt");
489 assert_eq!(escape_glob_pattern("file[1-3].txt"), r"file\[1-3\].txt");
490 assert_eq!(escape_glob_pattern("file{a,b}.txt"), r"file\{a,b\}.txt");
491 }
492
493 #[test]
494 fn test_presets_source_code() {
495 let mut matcher = presets::source_code().unwrap();
496
497 assert!(matcher.should_process("src/lib.rs").unwrap());
498 assert!(matcher.should_process("src/main.py").unwrap());
499 assert!(matcher.should_process("src/app.js").unwrap());
500 assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
501 assert!(!matcher.should_process("target/debug/main").unwrap());
502 }
503
504 #[test]
505 fn test_presets_documentation() {
506 let mut matcher = presets::documentation().unwrap();
507
508 assert!(matcher.should_process("README.md").unwrap());
509 assert!(matcher.should_process("docs/guide.rst").unwrap());
510 assert!(matcher.should_process("CHANGELOG.txt").unwrap());
511 assert!(!matcher.should_process("src/main.rs").unwrap());
512 assert!(!matcher.should_process("node_modules/package/README.md").unwrap());
513 }
514
515 #[test]
516 fn test_presets_configuration() {
517 let mut matcher = presets::configuration().unwrap();
518
519 assert!(matcher.should_process("config.json").unwrap());
520 assert!(matcher.should_process("docker-compose.yml").unwrap());
521 assert!(matcher.should_process("Dockerfile").unwrap());
522 assert!(matcher.should_process("Makefile").unwrap());
523 assert!(!matcher.should_process("src/main.rs").unwrap());
524 }
525
526 #[test]
527 fn test_presets_web_assets() {
528 let mut matcher = presets::web_assets().unwrap();
529
530 assert!(matcher.should_process("index.html").unwrap());
531 assert!(matcher.should_process("styles.css").unwrap());
532 assert!(matcher.should_process("app.js").unwrap());
533 assert!(matcher.should_process("component.tsx").unwrap());
534 assert!(!matcher.should_process("app.min.js").unwrap());
535 assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
536 }
537
538 #[test]
539 fn test_presets_no_build_artifacts() {
540 let mut matcher = presets::no_build_artifacts().unwrap();
541
542 assert!(matcher.should_process("src/lib.rs").unwrap());
543 assert!(matcher.should_process("README.md").unwrap());
544 assert!(!matcher.should_process("target/debug/main").unwrap());
545 assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
546 assert!(!matcher.should_process("__pycache__/main.pyc").unwrap());
547 assert!(!matcher.should_process("build/output.js").unwrap());
548 }
549
550 #[tokio::test]
551 async fn test_integration_with_file_system() {
552 let temp_dir = TempDir::new().unwrap();
553 let base_path = temp_dir.path();
554
555 fs::create_dir_all(base_path.join("src")).unwrap();
557 fs::create_dir_all(base_path.join("target/debug")).unwrap();
558 fs::create_dir_all(base_path.join("docs")).unwrap();
559
560 fs::write(base_path.join("src/lib.rs"), "fn main() {}").unwrap();
561 fs::write(base_path.join("src/main.py"), "print('hello')").unwrap();
562 fs::write(base_path.join("target/debug/main"), "binary").unwrap();
563 fs::write(base_path.join("README.md"), "# Project").unwrap();
564 fs::write(base_path.join("docs/guide.md"), "# Guide").unwrap();
565
566 let mut matcher = presets::source_code().unwrap();
567
568 assert!(matcher.should_process("src/lib.rs").unwrap());
570 assert!(matcher.should_process("src/main.py").unwrap());
571 assert!(!matcher.should_process("target/debug/main").unwrap());
572
573 assert!(!matcher.should_process("README.md").unwrap());
575 assert!(!matcher.should_process("docs/guide.md").unwrap());
576 }
577}