1pub mod gitignore;
42pub mod glob;
43pub mod matcher;
44pub mod validation;
45
46pub use gitignore::{GitignoreMatcher, GitignorePattern, GitignoreRule, GitignoreStats};
48pub use glob::{GlobMatchResult, GlobMatcher, GlobOptions, GlobPattern};
49pub use matcher::{MatchResult, MatcherOptions, PatternMatcher, PatternMatcherBuilder};
50pub use validation::{
51 PatternValidator, PerformanceRisk, PerformanceRiskLevel, ValidationConfig, ValidationError,
52 ValidationResult,
53};
54
55use scribe_core::{Result, ScribeError};
56use std::path::Path;
57
58pub const VERSION: &str = env!("CARGO_PKG_VERSION");
60
61pub struct QuickMatcher {
63 matcher: PatternMatcher,
64}
65
66impl QuickMatcher {
67 pub fn new(include_patterns: &[&str], exclude_patterns: &[&str]) -> Result<Self> {
69 let mut builder = PatternMatcherBuilder::new();
70
71 for pattern in include_patterns {
72 builder = builder.include(*pattern);
73 }
74
75 for pattern in exclude_patterns {
76 builder = builder.exclude(*pattern);
77 }
78
79 let matcher = builder
80 .build()
81 .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))?;
82 Ok(Self { matcher })
83 }
84
85 pub fn from_patterns(include_csv: Option<&str>, exclude_csv: Option<&str>) -> Result<Self> {
87 let mut builder = PatternMatcherBuilder::new();
88
89 if let Some(includes) = include_csv {
90 let patterns = utils::parse_csv_patterns(includes);
91 builder = builder.includes(patterns);
92 }
93
94 if let Some(excludes) = exclude_csv {
95 let patterns = utils::parse_csv_patterns(excludes);
96 builder = builder.excludes(patterns);
97 }
98
99 let matcher = builder
100 .build()
101 .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))?;
102 Ok(Self { matcher })
103 }
104
105 pub fn matches<P: AsRef<Path>>(&mut self, path: P) -> Result<bool> {
107 self.matcher
108 .should_process(path)
109 .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
110 }
111
112 pub fn match_details<P: AsRef<Path>>(&mut self, path: P) -> Result<MatchResult> {
114 self.matcher
115 .is_match(path)
116 .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
117 }
118}
119
120pub struct PatternBuilder {
122 includes: Vec<String>,
123 excludes: Vec<String>,
124 gitignore_files: Vec<std::path::PathBuf>,
125 case_sensitive: bool,
126}
127
128impl Default for PatternBuilder {
129 fn default() -> Self {
130 Self::new()
131 }
132}
133
134impl PatternBuilder {
135 pub fn new() -> Self {
137 Self {
138 includes: Vec::new(),
139 excludes: Vec::new(),
140 gitignore_files: Vec::new(),
141 case_sensitive: true,
142 }
143 }
144
145 pub fn include<S: Into<String>>(mut self, pattern: S) -> Self {
147 self.includes.push(pattern.into());
148 self
149 }
150
151 pub fn includes<I, S>(mut self, patterns: I) -> Self
153 where
154 I: IntoIterator<Item = S>,
155 S: Into<String>,
156 {
157 self.includes.extend(patterns.into_iter().map(|p| p.into()));
158 self
159 }
160
161 pub fn exclude<S: Into<String>>(mut self, pattern: S) -> Self {
163 self.excludes.push(pattern.into());
164 self
165 }
166
167 pub fn excludes<I, S>(mut self, patterns: I) -> Self
169 where
170 I: IntoIterator<Item = S>,
171 S: Into<String>,
172 {
173 self.excludes.extend(patterns.into_iter().map(|p| p.into()));
174 self
175 }
176
177 pub fn gitignore<P: AsRef<Path>>(mut self, path: P) -> Self {
179 self.gitignore_files.push(path.as_ref().to_path_buf());
180 self
181 }
182
183 pub fn case_sensitive(mut self, enabled: bool) -> Self {
185 self.case_sensitive = enabled;
186 self
187 }
188
189 pub fn build(self) -> Result<PatternMatcher> {
191 let options = MatcherOptions {
192 case_sensitive: self.case_sensitive,
193 respect_gitignore: !self.gitignore_files.is_empty(),
194 include_hidden: false,
195 custom_gitignore_files: self.gitignore_files,
196 override_patterns: Vec::new(),
197 };
198
199 let mut builder = PatternMatcherBuilder::new();
200
201 if !self.includes.is_empty() {
202 builder = builder.includes(self.includes);
203 }
204
205 if !self.excludes.is_empty() {
206 builder = builder.excludes(self.excludes);
207 }
208
209 builder = builder.case_sensitive(self.case_sensitive);
210
211 if let Some(first_gitignore) = options.custom_gitignore_files.first() {
213 if let Some(parent) = first_gitignore.parent() {
214 builder = builder.base_path(parent);
215 }
216 }
217
218 builder
219 .build()
220 .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
221 }
222}
223
224pub mod utils {
226 use super::*;
227 use std::path::PathBuf;
228
229 pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
231 let path = path.as_ref();
232
233 let normalized = path.to_string_lossy().replace('\\', "/");
235
236 let components: Vec<&str> = normalized
238 .split('/')
239 .filter(|c| !c.is_empty() && *c != ".")
240 .collect();
241
242 let mut result = Vec::new();
243 for component in components {
244 if component == ".." && !result.is_empty() && result.last() != Some(&"..") {
245 result.pop();
246 } else {
247 result.push(component);
248 }
249 }
250
251 PathBuf::from(result.join("/"))
252 }
253
254 pub fn is_valid_glob_pattern(pattern: &str) -> bool {
256 glob::GlobPattern::new(pattern).is_ok()
257 }
258
259 pub fn is_valid_gitignore_pattern(pattern: &str) -> bool {
261 gitignore::GitignorePattern::new(pattern).is_ok()
262 }
263
264 pub fn parse_csv_patterns(csv: &str) -> Vec<String> {
266 csv.split(',')
267 .map(|s| s.trim().to_string())
268 .filter(|s| !s.is_empty())
269 .collect()
270 }
271
272 pub fn escape_glob_pattern(input: &str) -> String {
274 input
275 .replace('*', r"\*")
276 .replace('?', r"\?")
277 .replace('[', r"\[")
278 .replace(']', r"\]")
279 .replace('{', r"\{")
280 .replace('}', r"\}")
281 }
282
283 pub fn extension_to_glob(extension: &str) -> String {
285 format!("**/*.{}", extension.trim_start_matches('.'))
286 }
287
288 pub fn extensions_to_globs(extensions: &[&str]) -> Vec<String> {
290 extensions
291 .iter()
292 .map(|ext| extension_to_glob(ext))
293 .collect()
294 }
295}
296
297pub mod presets {
299 use super::*;
300
301 pub fn source_code() -> Result<PatternMatcher> {
303 PatternMatcherBuilder::new()
304 .includes([
305 "**/*.rs",
306 "**/*.py",
307 "**/*.js",
308 "**/*.ts",
309 "**/*.jsx",
310 "**/*.tsx",
311 "**/*.java",
312 "**/*.kt",
313 "**/*.scala",
314 "**/*.go",
315 "**/*.c",
316 "**/*.cpp",
317 "**/*.cxx",
318 "**/*.cc",
319 "**/*.h",
320 "**/*.hpp",
321 "**/*.cs",
322 "**/*.swift",
323 "**/*.dart",
324 "**/*.rb",
325 "**/*.php",
326 "**/*.sh",
327 "**/*.bash",
328 "**/*.zsh",
329 ])
330 .excludes([
331 "**/node_modules/**",
332 "**/target/**",
333 "**/build/**",
334 "**/dist/**",
335 "**/__pycache__/**",
336 "**/*.pyc",
337 "**/.git/**",
338 "**/vendor/**",
339 ])
340 .build()
341 .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
342 }
343
344 pub fn documentation() -> Result<PatternMatcher> {
346 PatternMatcherBuilder::new()
347 .includes([
348 "**/*.md",
349 "**/*.rst",
350 "**/*.txt",
351 "**/*.adoc",
352 "**/*.org",
353 "**/README*",
354 "**/CHANGELOG*",
355 "**/LICENSE*",
356 "**/COPYING*",
357 "**/*.tex",
358 "**/*.latex",
359 ])
360 .excludes([
361 "**/node_modules/**",
362 "**/target/**",
363 "**/build/**",
364 "**/dist/**",
365 ])
366 .build()
367 .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
368 }
369
370 pub fn configuration() -> Result<PatternMatcher> {
372 PatternMatcherBuilder::new()
373 .includes([
374 "**/*.json",
375 "**/*.yaml",
376 "**/*.yml",
377 "**/*.toml",
378 "**/*.ini",
379 "**/*.cfg",
380 "**/*.conf",
381 "**/*.xml",
382 "**/Dockerfile*",
383 "**/Makefile*",
384 "**/.env*",
385 "**/*.env",
386 ])
387 .excludes([
388 "**/node_modules/**",
389 "**/target/**",
390 "**/build/**",
391 "**/dist/**",
392 ])
393 .build()
394 .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
395 }
396
397 pub fn web_assets() -> Result<PatternMatcher> {
399 PatternMatcherBuilder::new()
400 .includes([
401 "**/*.html",
402 "**/*.css",
403 "**/*.scss",
404 "**/*.sass",
405 "**/*.less",
406 "**/*.js",
407 "**/*.ts",
408 "**/*.jsx",
409 "**/*.tsx",
410 "**/*.vue",
411 "**/*.svelte",
412 ])
413 .excludes([
414 "**/node_modules/**",
415 "**/dist/**",
416 "**/build/**",
417 "**/.next/**",
418 "**/coverage/**",
419 "**/*.min.js",
420 "**/*.min.css",
421 ])
422 .build()
423 .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
424 }
425
426 pub fn no_build_artifacts() -> Result<PatternMatcher> {
428 PatternMatcherBuilder::new()
429 .include("**/*")
430 .excludes([
431 "**/target/**",
432 "**/build/**",
433 "**/dist/**",
434 "**/out/**",
435 "**/node_modules/**",
436 "**/__pycache__/**",
437 "**/*.pyc",
438 "**/vendor/**",
439 "**/deps/**",
440 "**/.git/**",
441 "**/.svn/**",
442 "**/bin/**",
443 "**/obj/**",
444 "**/*.o",
445 "**/*.a",
446 "**/*.so",
447 "**/*.dylib",
448 "**/*.dll",
449 "**/*.exe",
450 "**/coverage/**",
451 "**/.nyc_output/**",
452 "**/junit.xml",
453 "**/test-results/**",
454 ])
455 .build()
456 .map_err(|e| ScribeError::pattern(e.to_string(), "unknown"))
457 }
458}
459
460#[cfg(test)]
461mod tests {
462 use super::*;
463 use std::fs;
464 use std::path::PathBuf;
465 use tempfile::TempDir;
466
467 #[test]
468 fn test_quick_matcher_creation() {
469 let mut matcher = QuickMatcher::new(&["**/*.rs"], &["**/target/**"]).unwrap();
470 assert!(matcher.matches("src/lib.rs").unwrap());
471 assert!(!matcher.matches("target/debug/lib.rs").unwrap());
472 }
473
474 #[test]
475 fn test_quick_matcher_csv() {
476 let mut matcher = QuickMatcher::from_patterns(
477 Some("**/*.rs,**/*.py"),
478 Some("**/target/**,**/__pycache__/**"),
479 )
480 .unwrap();
481
482 assert!(matcher.matches("src/lib.rs").unwrap());
483 assert!(matcher.matches("src/main.py").unwrap());
484 assert!(!matcher.matches("target/debug/lib.rs").unwrap());
485 assert!(!matcher.matches("src/__pycache__/lib.pyc").unwrap());
486 }
487
488 #[test]
489 fn test_pattern_builder() {
490 let mut matcher = PatternMatcherBuilder::new()
491 .include("**/*.rs")
492 .include("**/*.py")
493 .exclude("**/target/**")
494 .exclude("**/__pycache__/**")
495 .case_sensitive(true)
496 .build()
497 .unwrap();
498
499 assert!(matcher.should_process("src/lib.rs").unwrap());
500 assert!(matcher.should_process("src/main.py").unwrap());
501 assert!(!matcher.should_process("target/debug/lib.rs").unwrap());
502 assert!(!matcher.should_process("src/__pycache__/main.pyc").unwrap());
503 }
504
505 #[test]
506 fn test_pattern_builder_fluent_api() {
507 let mut matcher = PatternMatcherBuilder::new()
508 .includes(["**/*.rs", "**/*.py", "**/*.js"])
509 .excludes(["**/node_modules/**", "**/target/**"])
510 .case_sensitive(false)
511 .build()
512 .unwrap();
513
514 assert!(matcher.should_process("src/lib.rs").unwrap());
515 assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
516 }
517
518 #[test]
519 fn test_utils_path_normalization() {
520 use super::utils::*;
521
522 assert_eq!(normalize_path("src/lib.rs"), PathBuf::from("src/lib.rs"));
523 assert_eq!(normalize_path("src//lib.rs"), PathBuf::from("src/lib.rs"));
524 assert_eq!(normalize_path("src/./lib.rs"), PathBuf::from("src/lib.rs"));
525 assert_eq!(
526 normalize_path("src/../src/lib.rs"),
527 PathBuf::from("src/lib.rs")
528 );
529 }
530
531 #[test]
532 fn test_utils_pattern_validation() {
533 use super::utils::*;
534
535 assert!(is_valid_glob_pattern("**/*.rs"));
536 assert!(is_valid_glob_pattern("src/**"));
537 assert!(is_valid_glob_pattern("*.{rs,py}"));
538
539 assert!(is_valid_gitignore_pattern("*.rs"));
540 assert!(is_valid_gitignore_pattern("!important.rs"));
541 assert!(is_valid_gitignore_pattern("build/"));
542 }
543
544 #[test]
545 fn test_utils_csv_parsing() {
546 use super::utils::*;
547
548 assert_eq!(
549 parse_csv_patterns("*.rs,*.py, *.js "),
550 vec!["*.rs", "*.py", "*.js"]
551 );
552
553 assert_eq!(parse_csv_patterns("single"), vec!["single"]);
554
555 assert!(parse_csv_patterns("").is_empty());
556 assert!(parse_csv_patterns(",,,").is_empty());
557 }
558
559 #[test]
560 fn test_utils_extension_conversion() {
561 use super::utils::*;
562
563 assert_eq!(extension_to_glob("rs"), "**/*.rs");
564 assert_eq!(extension_to_glob(".py"), "**/*.py");
565
566 assert_eq!(
567 extensions_to_globs(&["rs", "py", "js"]),
568 vec!["**/*.rs", "**/*.py", "**/*.js"]
569 );
570 }
571
572 #[test]
573 fn test_utils_glob_escaping() {
574 use super::utils::*;
575
576 assert_eq!(escape_glob_pattern("file*.txt"), r"file\*.txt");
577 assert_eq!(escape_glob_pattern("test?file.txt"), r"test\?file.txt");
578 assert_eq!(escape_glob_pattern("file[1-3].txt"), r"file\[1-3\].txt");
579 assert_eq!(escape_glob_pattern("file{a,b}.txt"), r"file\{a,b\}.txt");
580 }
581
582 #[test]
583 fn test_presets_source_code() {
584 let mut matcher = presets::source_code().unwrap();
585
586 assert!(matcher.should_process("src/lib.rs").unwrap());
587 assert!(matcher.should_process("src/main.py").unwrap());
588 assert!(matcher.should_process("src/app.js").unwrap());
589 assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
590 assert!(!matcher.should_process("target/debug/main").unwrap());
591 }
592
593 #[test]
594 fn test_presets_documentation() {
595 let mut matcher = presets::documentation().unwrap();
596
597 assert!(matcher.should_process("README.md").unwrap());
598 assert!(matcher.should_process("docs/guide.rst").unwrap());
599 assert!(matcher.should_process("CHANGELOG.txt").unwrap());
600 assert!(!matcher.should_process("src/main.rs").unwrap());
601 assert!(!matcher
602 .should_process("node_modules/package/README.md")
603 .unwrap());
604 }
605
606 #[test]
607 fn test_presets_configuration() {
608 let mut matcher = presets::configuration().unwrap();
609
610 assert!(matcher.should_process("config.json").unwrap());
611 assert!(matcher.should_process("docker-compose.yml").unwrap());
612 assert!(matcher.should_process("Dockerfile").unwrap());
613 assert!(matcher.should_process("Makefile").unwrap());
614 assert!(!matcher.should_process("src/main.rs").unwrap());
615 }
616
617 #[test]
618 fn test_presets_web_assets() {
619 let mut matcher = presets::web_assets().unwrap();
620
621 assert!(matcher.should_process("index.html").unwrap());
622 assert!(matcher.should_process("styles.css").unwrap());
623 assert!(matcher.should_process("app.js").unwrap());
624 assert!(matcher.should_process("component.tsx").unwrap());
625 assert!(!matcher.should_process("app.min.js").unwrap());
626 assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
627 }
628
629 #[test]
630 fn test_presets_no_build_artifacts() {
631 let mut matcher = presets::no_build_artifacts().unwrap();
632
633 assert!(matcher.should_process("src/lib.rs").unwrap());
634 assert!(matcher.should_process("README.md").unwrap());
635 assert!(!matcher.should_process("target/debug/main").unwrap());
636 assert!(!matcher.should_process("node_modules/lib/index.js").unwrap());
637 assert!(!matcher.should_process("__pycache__/main.pyc").unwrap());
638 assert!(!matcher.should_process("build/output.js").unwrap());
639 }
640
641 #[tokio::test]
642 async fn test_integration_with_file_system() {
643 let temp_dir = TempDir::new().unwrap();
644 let base_path = temp_dir.path();
645
646 fs::create_dir_all(base_path.join("src")).unwrap();
648 fs::create_dir_all(base_path.join("target/debug")).unwrap();
649 fs::create_dir_all(base_path.join("docs")).unwrap();
650
651 fs::write(base_path.join("src/lib.rs"), "fn main() {}").unwrap();
652 fs::write(base_path.join("src/main.py"), "print('hello')").unwrap();
653 fs::write(base_path.join("target/debug/main"), "binary").unwrap();
654 fs::write(base_path.join("README.md"), "# Project").unwrap();
655 fs::write(base_path.join("docs/guide.md"), "# Guide").unwrap();
656
657 let mut matcher = presets::source_code().unwrap();
658
659 assert!(matcher.should_process("src/lib.rs").unwrap());
661 assert!(matcher.should_process("src/main.py").unwrap());
662 assert!(!matcher.should_process("target/debug/main").unwrap());
663
664 assert!(!matcher.should_process("README.md").unwrap());
666 assert!(!matcher.should_process("docs/guide.md").unwrap());
667 }
668}