1use crate::utils::normalize_path;
7use globset::{Glob, GlobBuilder, GlobSet, GlobSetBuilder};
8use scribe_core::Result;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::path::Path;
12
13#[derive(Debug)]
15pub struct GlobMatcher {
16 patterns: Vec<GlobPattern>,
17 compiled_set: Option<GlobSet>,
18 options: GlobOptions,
19 cache: HashMap<String, bool>,
20 cache_hits: u64,
21 cache_misses: u64,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct GlobPattern {
27 pub pattern: String,
28 pub case_sensitive: bool,
29 pub literal_separator: bool,
30 pub backslash_escape: bool,
31 pub require_literal_separator: bool,
32 pub require_literal_leading_dot: bool,
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct GlobOptions {
38 pub case_sensitive: bool,
39 pub literal_separator: bool,
40 pub backslash_escape: bool,
41 pub require_literal_separator: bool,
42 pub require_literal_leading_dot: bool,
43 pub cache_enabled: bool,
44 pub cache_size_limit: usize,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct GlobMatchResult {
50 pub matched: bool,
51 pub pattern_index: Option<usize>,
52 pub pattern: Option<String>,
53 pub match_method: MatchMethod,
54}
55
56#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
58pub enum MatchMethod {
59 Cached,
60 Compiled,
61 Individual,
62 Literal,
63}
64
65impl Default for GlobOptions {
66 fn default() -> Self {
67 Self {
68 case_sensitive: true,
69 literal_separator: false,
70 backslash_escape: false,
71 require_literal_separator: false,
72 require_literal_leading_dot: false,
73 cache_enabled: true,
74 cache_size_limit: 1000,
75 }
76 }
77}
78
79impl GlobPattern {
80 pub fn new(pattern: &str) -> Result<Self> {
82 Self::with_options(pattern, &GlobOptions::default())
83 }
84
85 pub fn with_options(pattern: &str, options: &GlobOptions) -> Result<Self> {
87 let _glob = Glob::new(pattern)?;
89
90 Ok(Self {
91 pattern: pattern.to_string(),
92 case_sensitive: options.case_sensitive,
93 literal_separator: options.literal_separator,
94 backslash_escape: options.backslash_escape,
95 require_literal_separator: options.require_literal_separator,
96 require_literal_leading_dot: options.require_literal_leading_dot,
97 })
98 }
99
100 pub fn matches<P: AsRef<Path>>(&self, path: P) -> Result<bool> {
102 let normalized_path = normalize_path(path);
103 let path_str = normalized_path.to_string_lossy();
104
105 let mut glob_builder = globset::GlobBuilder::new(&self.pattern);
106 glob_builder.case_insensitive(!self.case_sensitive);
107 glob_builder.literal_separator(self.literal_separator);
108 glob_builder.backslash_escape(self.backslash_escape);
109
110 let glob = glob_builder.build()?;
111 let matcher = glob.compile_matcher();
112 Ok(matcher.is_match(path_str.as_ref()))
113 }
114
115 pub fn is_literal(&self) -> bool {
117 !self.pattern.contains('*')
118 && !self.pattern.contains('?')
119 && !self.pattern.contains('[')
120 && !self.pattern.contains('{')
121 }
122
123 pub fn as_str(&self) -> &str {
125 &self.pattern
126 }
127}
128
129impl GlobMatcher {
130 pub fn new() -> Self {
132 Self::with_options(GlobOptions::default())
133 }
134
135 pub fn with_options(options: GlobOptions) -> Self {
137 Self {
138 patterns: Vec::new(),
139 compiled_set: None,
140 options,
141 cache: HashMap::new(),
142 cache_hits: 0,
143 cache_misses: 0,
144 }
145 }
146
147 pub fn add_pattern(&mut self, pattern: &str) -> Result<()> {
149 let glob_pattern = GlobPattern::with_options(pattern, &self.options)?;
150 self.patterns.push(glob_pattern);
151
152 self.compiled_set = None;
154
155 Ok(())
156 }
157
158 pub fn add_patterns<I, S>(&mut self, patterns: I) -> Result<()>
160 where
161 I: IntoIterator<Item = S>,
162 S: AsRef<str>,
163 {
164 for pattern in patterns {
165 self.add_pattern(pattern.as_ref())?;
166 }
167 Ok(())
168 }
169
170 pub fn add_patterns_csv(&mut self, csv: &str) -> Result<()> {
172 let patterns = crate::utils::parse_csv_patterns(csv);
173 for pattern in patterns {
174 self.add_pattern(&pattern)?;
175 }
176 Ok(())
177 }
178
179 pub fn clear(&mut self) {
181 self.patterns.clear();
182 self.compiled_set = None;
183 self.cache.clear();
184 }
185
186 pub fn matches<P: AsRef<Path>>(&mut self, path: P) -> Result<bool> {
188 let result = self.match_with_details(path)?;
189 Ok(result.matched)
190 }
191
192 pub fn match_with_details<P: AsRef<Path>>(&mut self, path: P) -> Result<GlobMatchResult> {
194 let normalized_path = normalize_path(path);
195 let path_str = normalized_path.to_string_lossy().to_string();
196
197 if self.options.cache_enabled {
199 if let Some(&cached_result) = self.cache.get(&path_str) {
200 self.cache_hits += 1;
201 return Ok(GlobMatchResult {
202 matched: cached_result,
203 pattern_index: None, pattern: None,
205 match_method: MatchMethod::Cached,
206 });
207 }
208 self.cache_misses += 1;
209 }
210
211 if self.patterns.is_empty() {
212 return Ok(GlobMatchResult {
213 matched: false,
214 pattern_index: None,
215 pattern: None,
216 match_method: MatchMethod::Individual,
217 });
218 }
219
220 let result = if self.patterns.len() > 1 {
222 self.match_with_compiled_set(&normalized_path)?
223 } else {
224 self.match_with_individual_pattern(&normalized_path)?
225 };
226
227 if self.options.cache_enabled {
229 if self.cache.len() >= self.options.cache_size_limit {
230 let keys_to_remove: Vec<String> = self
232 .cache
233 .keys()
234 .take(self.cache.len() / 2)
235 .cloned()
236 .collect();
237 for key in keys_to_remove {
238 self.cache.remove(&key);
239 }
240 }
241 self.cache.insert(path_str, result.matched);
242 }
243
244 Ok(result)
245 }
246
247 fn match_with_compiled_set(&mut self, path: &Path) -> Result<GlobMatchResult> {
249 if self.compiled_set.is_none() {
250 self.compiled_set = Some(self.compile_patterns()?);
251 }
252
253 let compiled_set = self.compiled_set.as_ref().unwrap();
254 let path_str = path.to_string_lossy();
255
256 let matches: Vec<usize> = compiled_set.matches(path_str.as_ref());
257
258 if matches.is_empty() {
259 Ok(GlobMatchResult {
260 matched: false,
261 pattern_index: None,
262 pattern: None,
263 match_method: MatchMethod::Compiled,
264 })
265 } else {
266 let pattern_index = matches[0];
267 let pattern = self.patterns.get(pattern_index).map(|p| p.pattern.clone());
268
269 Ok(GlobMatchResult {
270 matched: true,
271 pattern_index: Some(pattern_index),
272 pattern,
273 match_method: MatchMethod::Compiled,
274 })
275 }
276 }
277
278 fn match_with_individual_pattern(&self, path: &Path) -> Result<GlobMatchResult> {
280 for (index, pattern) in self.patterns.iter().enumerate() {
281 if pattern.matches(path)? {
282 return Ok(GlobMatchResult {
283 matched: true,
284 pattern_index: Some(index),
285 pattern: Some(pattern.pattern.clone()),
286 match_method: if pattern.is_literal() {
287 MatchMethod::Literal
288 } else {
289 MatchMethod::Individual
290 },
291 });
292 }
293 }
294
295 Ok(GlobMatchResult {
296 matched: false,
297 pattern_index: None,
298 pattern: None,
299 match_method: MatchMethod::Individual,
300 })
301 }
302
303 fn compile_patterns(&self) -> Result<GlobSet> {
305 let mut builder = GlobSetBuilder::new();
306
307 for pattern in &self.patterns {
308 let mut glob_builder = GlobBuilder::new(&pattern.pattern);
309 glob_builder.case_insensitive(!pattern.case_sensitive);
310 glob_builder.literal_separator(pattern.literal_separator);
311 glob_builder.backslash_escape(pattern.backslash_escape);
312
313 let glob = glob_builder.build()?;
314 builder.add(glob);
315 }
316
317 Ok(builder.build()?)
318 }
319
320 pub fn pattern_count(&self) -> usize {
322 self.patterns.len()
323 }
324
325 pub fn patterns(&self) -> &[GlobPattern] {
327 &self.patterns
328 }
329
330 pub fn cache_stats(&self) -> (u64, u64, usize) {
332 (self.cache_hits, self.cache_misses, self.cache.len())
333 }
334
335 pub fn clear_cache(&mut self) {
337 self.cache.clear();
338 self.cache_hits = 0;
339 self.cache_misses = 0;
340 }
341
342 pub fn is_compiled(&self) -> bool {
344 self.compiled_set.is_some()
345 }
346
347 pub fn recompile(&mut self) -> Result<()> {
349 if !self.patterns.is_empty() {
350 self.compiled_set = Some(self.compile_patterns()?);
351 }
352 Ok(())
353 }
354
355 pub fn cache_hit_ratio(&self) -> f64 {
357 let total = self.cache_hits + self.cache_misses;
358 if total == 0 {
359 0.0
360 } else {
361 self.cache_hits as f64 / total as f64
362 }
363 }
364
365 pub fn optimize(&mut self) {
367 self.patterns.sort_by_key(|p| !p.is_literal());
369
370 self.compiled_set = None;
372 }
373
374 pub fn match_all<P: AsRef<Path>>(&mut self, path: P) -> Result<Vec<usize>> {
376 if self.compiled_set.is_none() && self.patterns.len() > 1 {
377 self.compiled_set = Some(self.compile_patterns()?);
378 }
379
380 if let Some(ref compiled_set) = self.compiled_set {
381 let path_str = path.as_ref().to_string_lossy();
382 Ok(compiled_set.matches(path_str.as_ref()))
383 } else {
384 let mut matches = Vec::new();
386 for (index, pattern) in self.patterns.iter().enumerate() {
387 if pattern.matches(&path)? {
388 matches.push(index);
389 }
390 }
391 Ok(matches)
392 }
393 }
394
395 pub fn is_empty(&self) -> bool {
397 self.patterns.is_empty()
398 }
399
400 pub fn set_cache_enabled(&mut self, enabled: bool) {
402 self.options.cache_enabled = enabled;
403 if !enabled {
404 self.clear_cache();
405 }
406 }
407
408 pub fn set_cache_size_limit(&mut self, limit: usize) {
410 self.options.cache_size_limit = limit;
411
412 if self.cache.len() > limit {
414 let keys_to_remove: Vec<String> = self.cache.keys().skip(limit).cloned().collect();
415 for key in keys_to_remove {
416 self.cache.remove(&key);
417 }
418 }
419 }
420}
421
422impl Default for GlobMatcher {
423 fn default() -> Self {
424 Self::new()
425 }
426}
427
428impl GlobMatcher {
430 pub fn for_extensions(extensions: &[&str]) -> Result<Self> {
432 let mut matcher = Self::new();
433 for ext in extensions {
434 let pattern = crate::utils::extension_to_glob(ext);
435 matcher.add_pattern(&pattern)?;
436 }
437 Ok(matcher)
438 }
439
440 pub fn for_directories(directories: &[&str]) -> Result<Self> {
442 let mut matcher = Self::new();
443 for dir in directories {
444 let pattern = format!("{}/**/*", dir.trim_end_matches('/'));
445 matcher.add_pattern(&pattern)?;
446 }
447 Ok(matcher)
448 }
449
450 pub fn case_insensitive() -> Self {
452 Self::with_options(GlobOptions {
453 case_sensitive: false,
454 ..Default::default()
455 })
456 }
457}
458
459#[cfg(test)]
462mod tests {
463 use super::*;
464 #[test]
467 fn test_glob_pattern_creation() {
468 let pattern = GlobPattern::new("**/*.rs").unwrap();
469 assert_eq!(pattern.pattern, "**/*.rs");
470 assert!(pattern.case_sensitive);
471
472 assert!(pattern.matches("src/lib.rs").unwrap());
473 assert!(pattern.matches("tests/integration/test.rs").unwrap());
474 assert!(!pattern.matches("src/lib.py").unwrap());
475 }
476
477 #[test]
478 fn test_glob_pattern_literal_detection() {
479 let literal = GlobPattern::new("src/lib.rs").unwrap();
480 assert!(literal.is_literal());
481
482 let glob = GlobPattern::new("src/**/*.rs").unwrap();
483 assert!(!glob.is_literal());
484
485 let question_mark = GlobPattern::new("src/lib?.rs").unwrap();
486 assert!(!question_mark.is_literal());
487
488 let bracket = GlobPattern::new("src/lib[123].rs").unwrap();
489 assert!(!bracket.is_literal());
490
491 let brace = GlobPattern::new("src/lib.{rs,py}").unwrap();
492 assert!(!brace.is_literal());
493 }
494
495 #[test]
496 fn test_case_insensitive_matching() {
497 let options = GlobOptions {
498 case_sensitive: false,
499 ..Default::default()
500 };
501
502 let pattern = GlobPattern::with_options("**/*.RS", &options).unwrap();
503 assert!(pattern.matches("src/lib.rs").unwrap());
504 assert!(pattern.matches("src/LIB.RS").unwrap());
505 assert!(pattern.matches("src/Lib.Rs").unwrap());
506 }
507
508 #[test]
509 fn test_glob_matcher_single_pattern() {
510 let mut matcher = GlobMatcher::new();
511 matcher.add_pattern("**/*.rs").unwrap();
512
513 assert!(matcher.matches("src/lib.rs").unwrap());
514 assert!(matcher.matches("tests/test.rs").unwrap());
515 assert!(!matcher.matches("src/lib.py").unwrap());
516 }
517
518 #[test]
519 fn test_glob_matcher_multiple_patterns() {
520 let mut matcher = GlobMatcher::new();
521 matcher.add_pattern("**/*.rs").unwrap();
522 matcher.add_pattern("**/*.py").unwrap();
523 matcher.add_pattern("**/*.js").unwrap();
524
525 assert!(matcher.matches("src/lib.rs").unwrap());
526 assert!(matcher.matches("src/main.py").unwrap());
527 assert!(matcher.matches("src/app.js").unwrap());
528 assert!(!matcher.matches("src/data.json").unwrap());
529 }
530
531 #[test]
532 fn test_glob_matcher_csv_patterns() {
533 let mut matcher = GlobMatcher::new();
534 matcher
535 .add_patterns_csv("**/*.rs, **/*.py , **/*.js")
536 .unwrap();
537
538 assert!(matcher.matches("src/lib.rs").unwrap());
539 assert!(matcher.matches("src/main.py").unwrap());
540 assert!(matcher.matches("src/app.js").unwrap());
541 assert!(!matcher.matches("src/data.json").unwrap());
542 assert_eq!(matcher.pattern_count(), 3);
543 }
544
545 #[test]
546 fn test_glob_matcher_detailed_results() {
547 let mut matcher = GlobMatcher::new();
548 matcher.add_pattern("**/*.rs").unwrap();
549 matcher.add_pattern("**/*.py").unwrap();
550
551 let result = matcher.match_with_details("src/lib.rs").unwrap();
552 assert!(result.matched);
553 assert_eq!(result.pattern_index, Some(0));
554 assert_eq!(result.pattern, Some("**/*.rs".to_string()));
555
556 let result = matcher.match_with_details("src/main.py").unwrap();
557 assert!(result.matched);
558 assert_eq!(result.pattern_index, Some(1));
559 assert_eq!(result.pattern, Some("**/*.py".to_string()));
560
561 let result = matcher.match_with_details("src/data.json").unwrap();
562 assert!(!result.matched);
563 assert_eq!(result.pattern_index, None);
564 }
565
566 #[test]
567 fn test_glob_matcher_cache() {
568 let mut matcher = GlobMatcher::with_options(GlobOptions {
569 cache_enabled: true,
570 cache_size_limit: 10,
571 ..Default::default()
572 });
573
574 matcher.add_pattern("**/*.rs").unwrap();
575
576 assert!(matcher.matches("src/lib.rs").unwrap());
578 let (hits, misses, size) = matcher.cache_stats();
579 assert_eq!(hits, 0);
580 assert_eq!(misses, 1);
581 assert_eq!(size, 1);
582
583 assert!(matcher.matches("src/lib.rs").unwrap());
585 let (hits, misses, size) = matcher.cache_stats();
586 assert_eq!(hits, 1);
587 assert_eq!(misses, 1);
588 assert_eq!(size, 1);
589
590 assert_eq!(matcher.cache_hit_ratio(), 0.5);
592 }
593
594 #[test]
595 fn test_glob_matcher_cache_eviction() {
596 let mut matcher = GlobMatcher::with_options(GlobOptions {
597 cache_enabled: true,
598 cache_size_limit: 2,
599 ..Default::default()
600 });
601
602 matcher.add_pattern("**/*").unwrap();
603
604 matcher.matches("file1.rs").unwrap();
606 matcher.matches("file2.py").unwrap();
607 assert_eq!(matcher.cache_stats().2, 2);
608
609 matcher.matches("file3.js").unwrap();
611 assert_eq!(matcher.cache_stats().2, 2); }
613
614 #[test]
615 fn test_glob_matcher_optimization() {
616 let mut matcher = GlobMatcher::new();
617 matcher.add_pattern("**/*.rs").unwrap(); matcher.add_pattern("exact/path.py").unwrap(); matcher.add_pattern("src/**/*.js").unwrap(); assert_eq!(matcher.patterns()[0].pattern, "**/*.rs");
623 assert_eq!(matcher.patterns()[1].pattern, "exact/path.py");
624 assert_eq!(matcher.patterns()[2].pattern, "src/**/*.js");
625
626 matcher.optimize();
627
628 assert_eq!(matcher.patterns()[0].pattern, "exact/path.py");
630 assert!(matcher.patterns()[0].is_literal());
631 }
632
633 #[test]
634 fn test_glob_matcher_match_all() {
635 let mut matcher = GlobMatcher::new();
636 matcher.add_pattern("**/*.rs").unwrap();
637 matcher.add_pattern("src/**").unwrap();
638 matcher.add_pattern("**/*lib*").unwrap();
639
640 let matches = matcher.match_all("src/lib.rs").unwrap();
641 assert_eq!(matches.len(), 3); assert!(matches.contains(&0)); assert!(matches.contains(&1)); assert!(matches.contains(&2)); let matches = matcher.match_all("tests/test.rs").unwrap();
647 assert_eq!(matches.len(), 1); assert!(matches.contains(&0));
649 }
650
651 #[test]
652 fn test_glob_matcher_convenience_methods() {
653 let mut matcher = GlobMatcher::for_extensions(&["rs", "py", "js"]).unwrap();
654 assert!(matcher.matches("src/lib.rs").unwrap());
655 assert!(matcher.matches("src/main.py").unwrap());
656 assert!(matcher.matches("src/app.js").unwrap());
657 assert!(!matcher.matches("src/data.json").unwrap());
658 assert_eq!(matcher.pattern_count(), 3);
659
660 let mut matcher = GlobMatcher::for_directories(&["src", "tests"]).unwrap();
661 assert!(matcher.matches("src/lib.rs").unwrap());
662 assert!(matcher.matches("tests/test.rs").unwrap());
663 assert!(!matcher.matches("docs/readme.md").unwrap());
664 assert_eq!(matcher.pattern_count(), 2);
665 }
666
667 #[test]
668 fn test_glob_matcher_case_insensitive() {
669 let mut matcher = GlobMatcher::case_insensitive();
670 matcher.add_pattern("**/*.RS").unwrap();
671
672 assert!(matcher.matches("src/lib.rs").unwrap());
673 assert!(matcher.matches("src/LIB.RS").unwrap());
674 assert!(matcher.matches("src/Lib.Rs").unwrap());
675 }
676
677 #[test]
678 fn test_glob_matcher_empty() {
679 let mut matcher = GlobMatcher::new();
680 assert!(matcher.is_empty());
681 assert!(!matcher.matches("any/path").unwrap());
682
683 matcher.add_pattern("**/*.rs").unwrap();
684 assert!(!matcher.is_empty());
685
686 matcher.clear();
687 assert!(matcher.is_empty());
688 assert!(!matcher.matches("any/path.rs").unwrap());
689 }
690
691 #[test]
692 fn test_glob_matcher_compilation() {
693 let mut matcher = GlobMatcher::new();
694 assert!(!matcher.is_compiled());
695
696 matcher.add_pattern("**/*.rs").unwrap();
697 matcher.add_pattern("**/*.py").unwrap();
698
699 assert!(!matcher.is_compiled());
701
702 matcher.matches("src/lib.rs").unwrap();
704 assert!(matcher.is_compiled());
705
706 matcher.add_pattern("**/*.js").unwrap();
708 assert!(!matcher.is_compiled());
709
710 matcher.recompile().unwrap();
712 assert!(matcher.is_compiled());
713 }
714
715 #[test]
716 fn test_complex_glob_patterns() {
717 let mut matcher = GlobMatcher::new();
718
719 matcher.add_pattern("**/*.{rs,py,js}").unwrap();
721 assert!(matcher.matches("src/lib.rs").unwrap());
722 assert!(matcher.matches("src/main.py").unwrap());
723 assert!(matcher.matches("src/app.js").unwrap());
724 assert!(!matcher.matches("src/data.json").unwrap());
725
726 matcher.clear();
727
728 matcher.add_pattern("test[0-9].rs").unwrap();
730 assert!(matcher.matches("test1.rs").unwrap());
731 assert!(matcher.matches("test9.rs").unwrap());
732 assert!(!matcher.matches("testA.rs").unwrap());
733
734 matcher.clear();
735
736 matcher.add_pattern("test?.rs").unwrap();
738 assert!(matcher.matches("test1.rs").unwrap());
739 assert!(matcher.matches("testA.rs").unwrap());
740 assert!(!matcher.matches("test12.rs").unwrap());
741 }
742
743 #[test]
744 fn test_path_normalization_in_matching() {
745 let mut matcher = GlobMatcher::new();
746 matcher.add_pattern("src/**/*.rs").unwrap();
747
748 assert!(matcher.matches("src/lib.rs").unwrap());
750 assert!(matcher.matches("src\\lib.rs").unwrap()); assert!(matcher.matches("src/subdir/lib.rs").unwrap());
752 assert!(matcher.matches("src\\subdir\\lib.rs").unwrap()); }
754}