1use scribe_core::Result;
7use std::path::Path;
8use std::collections::HashMap;
9use globset::{Glob, GlobBuilder, GlobSet, GlobSetBuilder};
10use serde::{Serialize, Deserialize};
11use crate::utils::normalize_path;
12
13#[derive(Debug)]
15pub struct GlobMatcher {
16 patterns: Vec<GlobPattern>,
17 compiled_set: Option<GlobSet>,
18 options: GlobOptions,
19 cache: HashMap<String, bool>,
20 cache_hits: u64,
21 cache_misses: u64,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct GlobPattern {
27 pub pattern: String,
28 pub case_sensitive: bool,
29 pub literal_separator: bool,
30 pub backslash_escape: bool,
31 pub require_literal_separator: bool,
32 pub require_literal_leading_dot: bool,
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct GlobOptions {
38 pub case_sensitive: bool,
39 pub literal_separator: bool,
40 pub backslash_escape: bool,
41 pub require_literal_separator: bool,
42 pub require_literal_leading_dot: bool,
43 pub cache_enabled: bool,
44 pub cache_size_limit: usize,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct GlobMatchResult {
50 pub matched: bool,
51 pub pattern_index: Option<usize>,
52 pub pattern: Option<String>,
53 pub match_method: MatchMethod,
54}
55
56#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
58pub enum MatchMethod {
59 Cached,
60 Compiled,
61 Individual,
62 Literal,
63}
64
65impl Default for GlobOptions {
66 fn default() -> Self {
67 Self {
68 case_sensitive: true,
69 literal_separator: false,
70 backslash_escape: false,
71 require_literal_separator: false,
72 require_literal_leading_dot: false,
73 cache_enabled: true,
74 cache_size_limit: 1000,
75 }
76 }
77}
78
79impl GlobPattern {
80 pub fn new(pattern: &str) -> Result<Self> {
82 Self::with_options(pattern, &GlobOptions::default())
83 }
84
85 pub fn with_options(pattern: &str, options: &GlobOptions) -> Result<Self> {
87 let _glob = Glob::new(pattern)?;
89
90 Ok(Self {
91 pattern: pattern.to_string(),
92 case_sensitive: options.case_sensitive,
93 literal_separator: options.literal_separator,
94 backslash_escape: options.backslash_escape,
95 require_literal_separator: options.require_literal_separator,
96 require_literal_leading_dot: options.require_literal_leading_dot,
97 })
98 }
99
100 pub fn matches<P: AsRef<Path>>(&self, path: P) -> Result<bool> {
102 let normalized_path = normalize_path(path);
103 let path_str = normalized_path.to_string_lossy();
104
105 let mut glob_builder = globset::GlobBuilder::new(&self.pattern);
106 glob_builder.case_insensitive(!self.case_sensitive);
107 glob_builder.literal_separator(self.literal_separator);
108 glob_builder.backslash_escape(self.backslash_escape);
109
110 let glob = glob_builder.build()?;
111 let matcher = glob.compile_matcher();
112 Ok(matcher.is_match(path_str.as_ref()))
113 }
114
115 pub fn is_literal(&self) -> bool {
117 !self.pattern.contains('*') &&
118 !self.pattern.contains('?') &&
119 !self.pattern.contains('[') &&
120 !self.pattern.contains('{')
121 }
122
123 pub fn as_str(&self) -> &str {
125 &self.pattern
126 }
127}
128
129impl GlobMatcher {
130 pub fn new() -> Self {
132 Self::with_options(GlobOptions::default())
133 }
134
135 pub fn with_options(options: GlobOptions) -> Self {
137 Self {
138 patterns: Vec::new(),
139 compiled_set: None,
140 options,
141 cache: HashMap::new(),
142 cache_hits: 0,
143 cache_misses: 0,
144 }
145 }
146
147 pub fn add_pattern(&mut self, pattern: &str) -> Result<()> {
149 let glob_pattern = GlobPattern::with_options(pattern, &self.options)?;
150 self.patterns.push(glob_pattern);
151
152 self.compiled_set = None;
154
155 Ok(())
156 }
157
158 pub fn add_patterns<I, S>(&mut self, patterns: I) -> Result<()>
160 where
161 I: IntoIterator<Item = S>,
162 S: AsRef<str>,
163 {
164 for pattern in patterns {
165 self.add_pattern(pattern.as_ref())?;
166 }
167 Ok(())
168 }
169
170 pub fn add_patterns_csv(&mut self, csv: &str) -> Result<()> {
172 let patterns = crate::utils::parse_csv_patterns(csv);
173 for pattern in patterns {
174 self.add_pattern(&pattern)?;
175 }
176 Ok(())
177 }
178
179 pub fn clear(&mut self) {
181 self.patterns.clear();
182 self.compiled_set = None;
183 self.cache.clear();
184 }
185
186 pub fn matches<P: AsRef<Path>>(&mut self, path: P) -> Result<bool> {
188 let result = self.match_with_details(path)?;
189 Ok(result.matched)
190 }
191
192 pub fn match_with_details<P: AsRef<Path>>(&mut self, path: P) -> Result<GlobMatchResult> {
194 let normalized_path = normalize_path(path);
195 let path_str = normalized_path.to_string_lossy().to_string();
196
197 if self.options.cache_enabled {
199 if let Some(&cached_result) = self.cache.get(&path_str) {
200 self.cache_hits += 1;
201 return Ok(GlobMatchResult {
202 matched: cached_result,
203 pattern_index: None, pattern: None,
205 match_method: MatchMethod::Cached,
206 });
207 }
208 self.cache_misses += 1;
209 }
210
211 if self.patterns.is_empty() {
212 return Ok(GlobMatchResult {
213 matched: false,
214 pattern_index: None,
215 pattern: None,
216 match_method: MatchMethod::Individual,
217 });
218 }
219
220 let result = if self.patterns.len() > 1 {
222 self.match_with_compiled_set(&normalized_path)?
223 } else {
224 self.match_with_individual_pattern(&normalized_path)?
225 };
226
227 if self.options.cache_enabled {
229 if self.cache.len() >= self.options.cache_size_limit {
230 let keys_to_remove: Vec<String> = self.cache.keys()
232 .take(self.cache.len() / 2)
233 .cloned()
234 .collect();
235 for key in keys_to_remove {
236 self.cache.remove(&key);
237 }
238 }
239 self.cache.insert(path_str, result.matched);
240 }
241
242 Ok(result)
243 }
244
245 fn match_with_compiled_set(&mut self, path: &Path) -> Result<GlobMatchResult> {
247 if self.compiled_set.is_none() {
248 self.compiled_set = Some(self.compile_patterns()?);
249 }
250
251 let compiled_set = self.compiled_set.as_ref().unwrap();
252 let path_str = path.to_string_lossy();
253
254 let matches: Vec<usize> = compiled_set.matches(path_str.as_ref());
255
256 if matches.is_empty() {
257 Ok(GlobMatchResult {
258 matched: false,
259 pattern_index: None,
260 pattern: None,
261 match_method: MatchMethod::Compiled,
262 })
263 } else {
264 let pattern_index = matches[0];
265 let pattern = self.patterns.get(pattern_index)
266 .map(|p| p.pattern.clone());
267
268 Ok(GlobMatchResult {
269 matched: true,
270 pattern_index: Some(pattern_index),
271 pattern,
272 match_method: MatchMethod::Compiled,
273 })
274 }
275 }
276
277 fn match_with_individual_pattern(&self, path: &Path) -> Result<GlobMatchResult> {
279 for (index, pattern) in self.patterns.iter().enumerate() {
280 if pattern.matches(path)? {
281 return Ok(GlobMatchResult {
282 matched: true,
283 pattern_index: Some(index),
284 pattern: Some(pattern.pattern.clone()),
285 match_method: if pattern.is_literal() {
286 MatchMethod::Literal
287 } else {
288 MatchMethod::Individual
289 },
290 });
291 }
292 }
293
294 Ok(GlobMatchResult {
295 matched: false,
296 pattern_index: None,
297 pattern: None,
298 match_method: MatchMethod::Individual,
299 })
300 }
301
302 fn compile_patterns(&self) -> Result<GlobSet> {
304 let mut builder = GlobSetBuilder::new();
305
306 for pattern in &self.patterns {
307 let mut glob_builder = GlobBuilder::new(&pattern.pattern);
308 glob_builder.case_insensitive(!pattern.case_sensitive);
309 glob_builder.literal_separator(pattern.literal_separator);
310 glob_builder.backslash_escape(pattern.backslash_escape);
311
312 let glob = glob_builder.build()?;
313 builder.add(glob);
314 }
315
316 Ok(builder.build()?)
317 }
318
319 pub fn pattern_count(&self) -> usize {
321 self.patterns.len()
322 }
323
324 pub fn patterns(&self) -> &[GlobPattern] {
326 &self.patterns
327 }
328
329 pub fn cache_stats(&self) -> (u64, u64, usize) {
331 (self.cache_hits, self.cache_misses, self.cache.len())
332 }
333
334 pub fn clear_cache(&mut self) {
336 self.cache.clear();
337 self.cache_hits = 0;
338 self.cache_misses = 0;
339 }
340
341 pub fn is_compiled(&self) -> bool {
343 self.compiled_set.is_some()
344 }
345
346 pub fn recompile(&mut self) -> Result<()> {
348 if !self.patterns.is_empty() {
349 self.compiled_set = Some(self.compile_patterns()?);
350 }
351 Ok(())
352 }
353
354 pub fn cache_hit_ratio(&self) -> f64 {
356 let total = self.cache_hits + self.cache_misses;
357 if total == 0 {
358 0.0
359 } else {
360 self.cache_hits as f64 / total as f64
361 }
362 }
363
364 pub fn optimize(&mut self) {
366 self.patterns.sort_by_key(|p| !p.is_literal());
368
369 self.compiled_set = None;
371 }
372
373 pub fn match_all<P: AsRef<Path>>(&mut self, path: P) -> Result<Vec<usize>> {
375 if self.compiled_set.is_none() && self.patterns.len() > 1 {
376 self.compiled_set = Some(self.compile_patterns()?);
377 }
378
379 if let Some(ref compiled_set) = self.compiled_set {
380 let path_str = path.as_ref().to_string_lossy();
381 Ok(compiled_set.matches(path_str.as_ref()))
382 } else {
383 let mut matches = Vec::new();
385 for (index, pattern) in self.patterns.iter().enumerate() {
386 if pattern.matches(&path)? {
387 matches.push(index);
388 }
389 }
390 Ok(matches)
391 }
392 }
393
394 pub fn is_empty(&self) -> bool {
396 self.patterns.is_empty()
397 }
398
399 pub fn set_cache_enabled(&mut self, enabled: bool) {
401 self.options.cache_enabled = enabled;
402 if !enabled {
403 self.clear_cache();
404 }
405 }
406
407 pub fn set_cache_size_limit(&mut self, limit: usize) {
409 self.options.cache_size_limit = limit;
410
411 if self.cache.len() > limit {
413 let keys_to_remove: Vec<String> = self.cache.keys()
414 .skip(limit)
415 .cloned()
416 .collect();
417 for key in keys_to_remove {
418 self.cache.remove(&key);
419 }
420 }
421 }
422}
423
424impl Default for GlobMatcher {
425 fn default() -> Self {
426 Self::new()
427 }
428}
429
430impl GlobMatcher {
432 pub fn for_extensions(extensions: &[&str]) -> Result<Self> {
434 let mut matcher = Self::new();
435 for ext in extensions {
436 let pattern = crate::utils::extension_to_glob(ext);
437 matcher.add_pattern(&pattern)?;
438 }
439 Ok(matcher)
440 }
441
442 pub fn for_directories(directories: &[&str]) -> Result<Self> {
444 let mut matcher = Self::new();
445 for dir in directories {
446 let pattern = format!("{}/**/*", dir.trim_end_matches('/'));
447 matcher.add_pattern(&pattern)?;
448 }
449 Ok(matcher)
450 }
451
452 pub fn case_insensitive() -> Self {
454 Self::with_options(GlobOptions {
455 case_sensitive: false,
456 ..Default::default()
457 })
458 }
459}
460
461#[cfg(test)]
464mod tests {
465 use super::*;
466 #[test]
469 fn test_glob_pattern_creation() {
470 let pattern = GlobPattern::new("**/*.rs").unwrap();
471 assert_eq!(pattern.pattern, "**/*.rs");
472 assert!(pattern.case_sensitive);
473
474 assert!(pattern.matches("src/lib.rs").unwrap());
475 assert!(pattern.matches("tests/integration/test.rs").unwrap());
476 assert!(!pattern.matches("src/lib.py").unwrap());
477 }
478
479 #[test]
480 fn test_glob_pattern_literal_detection() {
481 let literal = GlobPattern::new("src/lib.rs").unwrap();
482 assert!(literal.is_literal());
483
484 let glob = GlobPattern::new("src/**/*.rs").unwrap();
485 assert!(!glob.is_literal());
486
487 let question_mark = GlobPattern::new("src/lib?.rs").unwrap();
488 assert!(!question_mark.is_literal());
489
490 let bracket = GlobPattern::new("src/lib[123].rs").unwrap();
491 assert!(!bracket.is_literal());
492
493 let brace = GlobPattern::new("src/lib.{rs,py}").unwrap();
494 assert!(!brace.is_literal());
495 }
496
497 #[test]
498 fn test_case_insensitive_matching() {
499 let options = GlobOptions {
500 case_sensitive: false,
501 ..Default::default()
502 };
503
504 let pattern = GlobPattern::with_options("**/*.RS", &options).unwrap();
505 assert!(pattern.matches("src/lib.rs").unwrap());
506 assert!(pattern.matches("src/LIB.RS").unwrap());
507 assert!(pattern.matches("src/Lib.Rs").unwrap());
508 }
509
510 #[test]
511 fn test_glob_matcher_single_pattern() {
512 let mut matcher = GlobMatcher::new();
513 matcher.add_pattern("**/*.rs").unwrap();
514
515 assert!(matcher.matches("src/lib.rs").unwrap());
516 assert!(matcher.matches("tests/test.rs").unwrap());
517 assert!(!matcher.matches("src/lib.py").unwrap());
518 }
519
520 #[test]
521 fn test_glob_matcher_multiple_patterns() {
522 let mut matcher = GlobMatcher::new();
523 matcher.add_pattern("**/*.rs").unwrap();
524 matcher.add_pattern("**/*.py").unwrap();
525 matcher.add_pattern("**/*.js").unwrap();
526
527 assert!(matcher.matches("src/lib.rs").unwrap());
528 assert!(matcher.matches("src/main.py").unwrap());
529 assert!(matcher.matches("src/app.js").unwrap());
530 assert!(!matcher.matches("src/data.json").unwrap());
531 }
532
533 #[test]
534 fn test_glob_matcher_csv_patterns() {
535 let mut matcher = GlobMatcher::new();
536 matcher.add_patterns_csv("**/*.rs, **/*.py , **/*.js").unwrap();
537
538 assert!(matcher.matches("src/lib.rs").unwrap());
539 assert!(matcher.matches("src/main.py").unwrap());
540 assert!(matcher.matches("src/app.js").unwrap());
541 assert!(!matcher.matches("src/data.json").unwrap());
542 assert_eq!(matcher.pattern_count(), 3);
543 }
544
545 #[test]
546 fn test_glob_matcher_detailed_results() {
547 let mut matcher = GlobMatcher::new();
548 matcher.add_pattern("**/*.rs").unwrap();
549 matcher.add_pattern("**/*.py").unwrap();
550
551 let result = matcher.match_with_details("src/lib.rs").unwrap();
552 assert!(result.matched);
553 assert_eq!(result.pattern_index, Some(0));
554 assert_eq!(result.pattern, Some("**/*.rs".to_string()));
555
556 let result = matcher.match_with_details("src/main.py").unwrap();
557 assert!(result.matched);
558 assert_eq!(result.pattern_index, Some(1));
559 assert_eq!(result.pattern, Some("**/*.py".to_string()));
560
561 let result = matcher.match_with_details("src/data.json").unwrap();
562 assert!(!result.matched);
563 assert_eq!(result.pattern_index, None);
564 }
565
566 #[test]
567 fn test_glob_matcher_cache() {
568 let mut matcher = GlobMatcher::with_options(GlobOptions {
569 cache_enabled: true,
570 cache_size_limit: 10,
571 ..Default::default()
572 });
573
574 matcher.add_pattern("**/*.rs").unwrap();
575
576 assert!(matcher.matches("src/lib.rs").unwrap());
578 let (hits, misses, size) = matcher.cache_stats();
579 assert_eq!(hits, 0);
580 assert_eq!(misses, 1);
581 assert_eq!(size, 1);
582
583 assert!(matcher.matches("src/lib.rs").unwrap());
585 let (hits, misses, size) = matcher.cache_stats();
586 assert_eq!(hits, 1);
587 assert_eq!(misses, 1);
588 assert_eq!(size, 1);
589
590 assert_eq!(matcher.cache_hit_ratio(), 0.5);
592 }
593
594 #[test]
595 fn test_glob_matcher_cache_eviction() {
596 let mut matcher = GlobMatcher::with_options(GlobOptions {
597 cache_enabled: true,
598 cache_size_limit: 2,
599 ..Default::default()
600 });
601
602 matcher.add_pattern("**/*").unwrap();
603
604 matcher.matches("file1.rs").unwrap();
606 matcher.matches("file2.py").unwrap();
607 assert_eq!(matcher.cache_stats().2, 2);
608
609 matcher.matches("file3.js").unwrap();
611 assert_eq!(matcher.cache_stats().2, 2); }
613
614 #[test]
615 fn test_glob_matcher_optimization() {
616 let mut matcher = GlobMatcher::new();
617 matcher.add_pattern("**/*.rs").unwrap(); matcher.add_pattern("exact/path.py").unwrap(); matcher.add_pattern("src/**/*.js").unwrap(); assert_eq!(matcher.patterns()[0].pattern, "**/*.rs");
623 assert_eq!(matcher.patterns()[1].pattern, "exact/path.py");
624 assert_eq!(matcher.patterns()[2].pattern, "src/**/*.js");
625
626 matcher.optimize();
627
628 assert_eq!(matcher.patterns()[0].pattern, "exact/path.py");
630 assert!(matcher.patterns()[0].is_literal());
631 }
632
633 #[test]
634 fn test_glob_matcher_match_all() {
635 let mut matcher = GlobMatcher::new();
636 matcher.add_pattern("**/*.rs").unwrap();
637 matcher.add_pattern("src/**").unwrap();
638 matcher.add_pattern("**/*lib*").unwrap();
639
640 let matches = matcher.match_all("src/lib.rs").unwrap();
641 assert_eq!(matches.len(), 3); assert!(matches.contains(&0)); assert!(matches.contains(&1)); assert!(matches.contains(&2)); let matches = matcher.match_all("tests/test.rs").unwrap();
647 assert_eq!(matches.len(), 1); assert!(matches.contains(&0));
649 }
650
651 #[test]
652 fn test_glob_matcher_convenience_methods() {
653 let mut matcher = GlobMatcher::for_extensions(&["rs", "py", "js"]).unwrap();
654 assert!(matcher.matches("src/lib.rs").unwrap());
655 assert!(matcher.matches("src/main.py").unwrap());
656 assert!(matcher.matches("src/app.js").unwrap());
657 assert!(!matcher.matches("src/data.json").unwrap());
658 assert_eq!(matcher.pattern_count(), 3);
659
660 let mut matcher = GlobMatcher::for_directories(&["src", "tests"]).unwrap();
661 assert!(matcher.matches("src/lib.rs").unwrap());
662 assert!(matcher.matches("tests/test.rs").unwrap());
663 assert!(!matcher.matches("docs/readme.md").unwrap());
664 assert_eq!(matcher.pattern_count(), 2);
665 }
666
667 #[test]
668 fn test_glob_matcher_case_insensitive() {
669 let mut matcher = GlobMatcher::case_insensitive();
670 matcher.add_pattern("**/*.RS").unwrap();
671
672 assert!(matcher.matches("src/lib.rs").unwrap());
673 assert!(matcher.matches("src/LIB.RS").unwrap());
674 assert!(matcher.matches("src/Lib.Rs").unwrap());
675 }
676
677 #[test]
678 fn test_glob_matcher_empty() {
679 let mut matcher = GlobMatcher::new();
680 assert!(matcher.is_empty());
681 assert!(!matcher.matches("any/path").unwrap());
682
683 matcher.add_pattern("**/*.rs").unwrap();
684 assert!(!matcher.is_empty());
685
686 matcher.clear();
687 assert!(matcher.is_empty());
688 assert!(!matcher.matches("any/path.rs").unwrap());
689 }
690
691 #[test]
692 fn test_glob_matcher_compilation() {
693 let mut matcher = GlobMatcher::new();
694 assert!(!matcher.is_compiled());
695
696 matcher.add_pattern("**/*.rs").unwrap();
697 matcher.add_pattern("**/*.py").unwrap();
698
699 assert!(!matcher.is_compiled());
701
702 matcher.matches("src/lib.rs").unwrap();
704 assert!(matcher.is_compiled());
705
706 matcher.add_pattern("**/*.js").unwrap();
708 assert!(!matcher.is_compiled());
709
710 matcher.recompile().unwrap();
712 assert!(matcher.is_compiled());
713 }
714
715 #[test]
716 fn test_complex_glob_patterns() {
717 let mut matcher = GlobMatcher::new();
718
719 matcher.add_pattern("**/*.{rs,py,js}").unwrap();
721 assert!(matcher.matches("src/lib.rs").unwrap());
722 assert!(matcher.matches("src/main.py").unwrap());
723 assert!(matcher.matches("src/app.js").unwrap());
724 assert!(!matcher.matches("src/data.json").unwrap());
725
726 matcher.clear();
727
728 matcher.add_pattern("test[0-9].rs").unwrap();
730 assert!(matcher.matches("test1.rs").unwrap());
731 assert!(matcher.matches("test9.rs").unwrap());
732 assert!(!matcher.matches("testA.rs").unwrap());
733
734 matcher.clear();
735
736 matcher.add_pattern("test?.rs").unwrap();
738 assert!(matcher.matches("test1.rs").unwrap());
739 assert!(matcher.matches("testA.rs").unwrap());
740 assert!(!matcher.matches("test12.rs").unwrap());
741 }
742
743 #[test]
744 fn test_path_normalization_in_matching() {
745 let mut matcher = GlobMatcher::new();
746 matcher.add_pattern("src/**/*.rs").unwrap();
747
748 assert!(matcher.matches("src/lib.rs").unwrap());
750 assert!(matcher.matches("src\\lib.rs").unwrap()); assert!(matcher.matches("src/subdir/lib.rs").unwrap());
752 assert!(matcher.matches("src\\subdir\\lib.rs").unwrap()); }
754}