use crate::utils::normalize_path;
use globset::{Glob, GlobBuilder, GlobSet, GlobSetBuilder};
use scribe_core::Result;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::Path;
#[derive(Debug)]
pub struct GlobMatcher {
patterns: Vec<GlobPattern>,
compiled_set: Option<GlobSet>,
options: GlobOptions,
cache: HashMap<String, bool>,
cache_hits: u64,
cache_misses: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GlobPattern {
pub pattern: String,
pub case_sensitive: bool,
pub literal_separator: bool,
pub backslash_escape: bool,
pub require_literal_separator: bool,
pub require_literal_leading_dot: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GlobOptions {
pub case_sensitive: bool,
pub literal_separator: bool,
pub backslash_escape: bool,
pub require_literal_separator: bool,
pub require_literal_leading_dot: bool,
pub cache_enabled: bool,
pub cache_size_limit: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GlobMatchResult {
pub matched: bool,
pub pattern_index: Option<usize>,
pub pattern: Option<String>,
pub match_method: MatchMethod,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum MatchMethod {
Cached,
Compiled,
Individual,
Literal,
}
impl Default for GlobOptions {
fn default() -> Self {
Self {
case_sensitive: true,
literal_separator: false,
backslash_escape: false,
require_literal_separator: false,
require_literal_leading_dot: false,
cache_enabled: true,
cache_size_limit: 1000,
}
}
}
impl GlobPattern {
pub fn new(pattern: &str) -> Result<Self> {
Self::with_options(pattern, &GlobOptions::default())
}
pub fn with_options(pattern: &str, options: &GlobOptions) -> Result<Self> {
let _glob = Glob::new(pattern)?;
Ok(Self {
pattern: pattern.to_string(),
case_sensitive: options.case_sensitive,
literal_separator: options.literal_separator,
backslash_escape: options.backslash_escape,
require_literal_separator: options.require_literal_separator,
require_literal_leading_dot: options.require_literal_leading_dot,
})
}
pub fn matches<P: AsRef<Path>>(&self, path: P) -> Result<bool> {
let normalized_path = normalize_path(path);
let path_str = normalized_path.to_string_lossy();
let mut glob_builder = globset::GlobBuilder::new(&self.pattern);
glob_builder.case_insensitive(!self.case_sensitive);
glob_builder.literal_separator(self.literal_separator);
glob_builder.backslash_escape(self.backslash_escape);
let glob = glob_builder.build()?;
let matcher = glob.compile_matcher();
Ok(matcher.is_match(path_str.as_ref()))
}
pub fn is_literal(&self) -> bool {
!self.pattern.contains('*')
&& !self.pattern.contains('?')
&& !self.pattern.contains('[')
&& !self.pattern.contains('{')
}
pub fn as_str(&self) -> &str {
&self.pattern
}
}
impl GlobMatcher {
pub fn new() -> Self {
Self::with_options(GlobOptions::default())
}
pub fn with_options(options: GlobOptions) -> Self {
Self {
patterns: Vec::new(),
compiled_set: None,
options,
cache: HashMap::new(),
cache_hits: 0,
cache_misses: 0,
}
}
pub fn add_pattern(&mut self, pattern: &str) -> Result<()> {
let glob_pattern = GlobPattern::with_options(pattern, &self.options)?;
self.patterns.push(glob_pattern);
self.compiled_set = None;
Ok(())
}
pub fn add_patterns<I, S>(&mut self, patterns: I) -> Result<()>
where
I: IntoIterator<Item = S>,
S: AsRef<str>,
{
for pattern in patterns {
self.add_pattern(pattern.as_ref())?;
}
Ok(())
}
pub fn add_patterns_csv(&mut self, csv: &str) -> Result<()> {
let patterns = crate::utils::parse_csv_patterns(csv);
for pattern in patterns {
self.add_pattern(&pattern)?;
}
Ok(())
}
pub fn clear(&mut self) {
self.patterns.clear();
self.compiled_set = None;
self.cache.clear();
}
pub fn matches<P: AsRef<Path>>(&mut self, path: P) -> Result<bool> {
let result = self.match_with_details(path)?;
Ok(result.matched)
}
pub fn match_with_details<P: AsRef<Path>>(&mut self, path: P) -> Result<GlobMatchResult> {
let normalized_path = normalize_path(path);
let path_str = normalized_path.to_string_lossy().to_string();
if self.options.cache_enabled {
if let Some(&cached_result) = self.cache.get(&path_str) {
self.cache_hits += 1;
return Ok(GlobMatchResult {
matched: cached_result,
pattern_index: None, pattern: None,
match_method: MatchMethod::Cached,
});
}
self.cache_misses += 1;
}
if self.patterns.is_empty() {
return Ok(GlobMatchResult {
matched: false,
pattern_index: None,
pattern: None,
match_method: MatchMethod::Individual,
});
}
let result = if self.patterns.len() > 1 {
self.match_with_compiled_set(&normalized_path)?
} else {
self.match_with_individual_pattern(&normalized_path)?
};
if self.options.cache_enabled {
if self.cache.len() >= self.options.cache_size_limit {
let keys_to_remove: Vec<String> = self
.cache
.keys()
.take(self.cache.len() / 2)
.cloned()
.collect();
for key in keys_to_remove {
self.cache.remove(&key);
}
}
self.cache.insert(path_str, result.matched);
}
Ok(result)
}
fn match_with_compiled_set(&mut self, path: &Path) -> Result<GlobMatchResult> {
if self.compiled_set.is_none() {
self.compiled_set = Some(self.compile_patterns()?);
}
let compiled_set = self.compiled_set.as_ref().unwrap();
let path_str = path.to_string_lossy();
let matches: Vec<usize> = compiled_set.matches(path_str.as_ref());
if matches.is_empty() {
Ok(GlobMatchResult {
matched: false,
pattern_index: None,
pattern: None,
match_method: MatchMethod::Compiled,
})
} else {
let pattern_index = matches[0];
let pattern = self.patterns.get(pattern_index).map(|p| p.pattern.clone());
Ok(GlobMatchResult {
matched: true,
pattern_index: Some(pattern_index),
pattern,
match_method: MatchMethod::Compiled,
})
}
}
fn match_with_individual_pattern(&self, path: &Path) -> Result<GlobMatchResult> {
for (index, pattern) in self.patterns.iter().enumerate() {
if pattern.matches(path)? {
return Ok(GlobMatchResult {
matched: true,
pattern_index: Some(index),
pattern: Some(pattern.pattern.clone()),
match_method: if pattern.is_literal() {
MatchMethod::Literal
} else {
MatchMethod::Individual
},
});
}
}
Ok(GlobMatchResult {
matched: false,
pattern_index: None,
pattern: None,
match_method: MatchMethod::Individual,
})
}
fn compile_patterns(&self) -> Result<GlobSet> {
let mut builder = GlobSetBuilder::new();
for pattern in &self.patterns {
let mut glob_builder = GlobBuilder::new(&pattern.pattern);
glob_builder.case_insensitive(!pattern.case_sensitive);
glob_builder.literal_separator(pattern.literal_separator);
glob_builder.backslash_escape(pattern.backslash_escape);
let glob = glob_builder.build()?;
builder.add(glob);
}
Ok(builder.build()?)
}
pub fn pattern_count(&self) -> usize {
self.patterns.len()
}
pub fn patterns(&self) -> &[GlobPattern] {
&self.patterns
}
pub fn cache_stats(&self) -> (u64, u64, usize) {
(self.cache_hits, self.cache_misses, self.cache.len())
}
pub fn clear_cache(&mut self) {
self.cache.clear();
self.cache_hits = 0;
self.cache_misses = 0;
}
pub fn is_compiled(&self) -> bool {
self.compiled_set.is_some()
}
pub fn recompile(&mut self) -> Result<()> {
if !self.patterns.is_empty() {
self.compiled_set = Some(self.compile_patterns()?);
}
Ok(())
}
pub fn cache_hit_ratio(&self) -> f64 {
let total = self.cache_hits + self.cache_misses;
if total == 0 {
0.0
} else {
self.cache_hits as f64 / total as f64
}
}
pub fn optimize(&mut self) {
self.patterns.sort_by_key(|p| !p.is_literal());
self.compiled_set = None;
}
pub fn match_all<P: AsRef<Path>>(&mut self, path: P) -> Result<Vec<usize>> {
if self.compiled_set.is_none() && self.patterns.len() > 1 {
self.compiled_set = Some(self.compile_patterns()?);
}
if let Some(ref compiled_set) = self.compiled_set {
let path_str = path.as_ref().to_string_lossy();
Ok(compiled_set.matches(path_str.as_ref()))
} else {
let mut matches = Vec::new();
for (index, pattern) in self.patterns.iter().enumerate() {
if pattern.matches(&path)? {
matches.push(index);
}
}
Ok(matches)
}
}
pub fn is_empty(&self) -> bool {
self.patterns.is_empty()
}
pub fn set_cache_enabled(&mut self, enabled: bool) {
self.options.cache_enabled = enabled;
if !enabled {
self.clear_cache();
}
}
pub fn set_cache_size_limit(&mut self, limit: usize) {
self.options.cache_size_limit = limit;
if self.cache.len() > limit {
let keys_to_remove: Vec<String> = self.cache.keys().skip(limit).cloned().collect();
for key in keys_to_remove {
self.cache.remove(&key);
}
}
}
}
impl Default for GlobMatcher {
fn default() -> Self {
Self::new()
}
}
impl GlobMatcher {
pub fn for_extensions(extensions: &[&str]) -> Result<Self> {
let mut matcher = Self::new();
for ext in extensions {
let pattern = crate::utils::extension_to_glob(ext);
matcher.add_pattern(&pattern)?;
}
Ok(matcher)
}
pub fn for_directories(directories: &[&str]) -> Result<Self> {
let mut matcher = Self::new();
for dir in directories {
let pattern = format!("{}/**/*", dir.trim_end_matches('/'));
matcher.add_pattern(&pattern)?;
}
Ok(matcher)
}
pub fn case_insensitive() -> Self {
Self::with_options(GlobOptions {
case_sensitive: false,
..Default::default()
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_glob_pattern_creation() {
let pattern = GlobPattern::new("**/*.rs").unwrap();
assert_eq!(pattern.pattern, "**/*.rs");
assert!(pattern.case_sensitive);
assert!(pattern.matches("src/lib.rs").unwrap());
assert!(pattern.matches("tests/integration/test.rs").unwrap());
assert!(!pattern.matches("src/lib.py").unwrap());
}
#[test]
fn test_glob_pattern_literal_detection() {
let literal = GlobPattern::new("src/lib.rs").unwrap();
assert!(literal.is_literal());
let glob = GlobPattern::new("src/**/*.rs").unwrap();
assert!(!glob.is_literal());
let question_mark = GlobPattern::new("src/lib?.rs").unwrap();
assert!(!question_mark.is_literal());
let bracket = GlobPattern::new("src/lib[123].rs").unwrap();
assert!(!bracket.is_literal());
let brace = GlobPattern::new("src/lib.{rs,py}").unwrap();
assert!(!brace.is_literal());
}
#[test]
fn test_case_insensitive_matching() {
let options = GlobOptions {
case_sensitive: false,
..Default::default()
};
let pattern = GlobPattern::with_options("**/*.RS", &options).unwrap();
assert!(pattern.matches("src/lib.rs").unwrap());
assert!(pattern.matches("src/LIB.RS").unwrap());
assert!(pattern.matches("src/Lib.Rs").unwrap());
}
#[test]
fn test_glob_matcher_single_pattern() {
let mut matcher = GlobMatcher::new();
matcher.add_pattern("**/*.rs").unwrap();
assert!(matcher.matches("src/lib.rs").unwrap());
assert!(matcher.matches("tests/test.rs").unwrap());
assert!(!matcher.matches("src/lib.py").unwrap());
}
#[test]
fn test_glob_matcher_multiple_patterns() {
let mut matcher = GlobMatcher::new();
matcher.add_pattern("**/*.rs").unwrap();
matcher.add_pattern("**/*.py").unwrap();
matcher.add_pattern("**/*.js").unwrap();
assert!(matcher.matches("src/lib.rs").unwrap());
assert!(matcher.matches("src/main.py").unwrap());
assert!(matcher.matches("src/app.js").unwrap());
assert!(!matcher.matches("src/data.json").unwrap());
}
#[test]
fn test_glob_matcher_csv_patterns() {
let mut matcher = GlobMatcher::new();
matcher
.add_patterns_csv("**/*.rs, **/*.py , **/*.js")
.unwrap();
assert!(matcher.matches("src/lib.rs").unwrap());
assert!(matcher.matches("src/main.py").unwrap());
assert!(matcher.matches("src/app.js").unwrap());
assert!(!matcher.matches("src/data.json").unwrap());
assert_eq!(matcher.pattern_count(), 3);
}
#[test]
fn test_glob_matcher_detailed_results() {
let mut matcher = GlobMatcher::new();
matcher.add_pattern("**/*.rs").unwrap();
matcher.add_pattern("**/*.py").unwrap();
let result = matcher.match_with_details("src/lib.rs").unwrap();
assert!(result.matched);
assert_eq!(result.pattern_index, Some(0));
assert_eq!(result.pattern, Some("**/*.rs".to_string()));
let result = matcher.match_with_details("src/main.py").unwrap();
assert!(result.matched);
assert_eq!(result.pattern_index, Some(1));
assert_eq!(result.pattern, Some("**/*.py".to_string()));
let result = matcher.match_with_details("src/data.json").unwrap();
assert!(!result.matched);
assert_eq!(result.pattern_index, None);
}
#[test]
fn test_glob_matcher_cache() {
let mut matcher = GlobMatcher::with_options(GlobOptions {
cache_enabled: true,
cache_size_limit: 10,
..Default::default()
});
matcher.add_pattern("**/*.rs").unwrap();
assert!(matcher.matches("src/lib.rs").unwrap());
let (hits, misses, size) = matcher.cache_stats();
assert_eq!(hits, 0);
assert_eq!(misses, 1);
assert_eq!(size, 1);
assert!(matcher.matches("src/lib.rs").unwrap());
let (hits, misses, size) = matcher.cache_stats();
assert_eq!(hits, 1);
assert_eq!(misses, 1);
assert_eq!(size, 1);
assert_eq!(matcher.cache_hit_ratio(), 0.5);
}
#[test]
fn test_glob_matcher_cache_eviction() {
let mut matcher = GlobMatcher::with_options(GlobOptions {
cache_enabled: true,
cache_size_limit: 2,
..Default::default()
});
matcher.add_pattern("**/*").unwrap();
matcher.matches("file1.rs").unwrap();
matcher.matches("file2.py").unwrap();
assert_eq!(matcher.cache_stats().2, 2);
matcher.matches("file3.js").unwrap();
assert_eq!(matcher.cache_stats().2, 2); }
#[test]
fn test_glob_matcher_optimization() {
let mut matcher = GlobMatcher::new();
matcher.add_pattern("**/*.rs").unwrap(); matcher.add_pattern("exact/path.py").unwrap(); matcher.add_pattern("src/**/*.js").unwrap();
assert_eq!(matcher.patterns()[0].pattern, "**/*.rs");
assert_eq!(matcher.patterns()[1].pattern, "exact/path.py");
assert_eq!(matcher.patterns()[2].pattern, "src/**/*.js");
matcher.optimize();
assert_eq!(matcher.patterns()[0].pattern, "exact/path.py");
assert!(matcher.patterns()[0].is_literal());
}
#[test]
fn test_glob_matcher_match_all() {
let mut matcher = GlobMatcher::new();
matcher.add_pattern("**/*.rs").unwrap();
matcher.add_pattern("src/**").unwrap();
matcher.add_pattern("**/*lib*").unwrap();
let matches = matcher.match_all("src/lib.rs").unwrap();
assert_eq!(matches.len(), 3); assert!(matches.contains(&0)); assert!(matches.contains(&1)); assert!(matches.contains(&2));
let matches = matcher.match_all("tests/test.rs").unwrap();
assert_eq!(matches.len(), 1); assert!(matches.contains(&0));
}
#[test]
fn test_glob_matcher_convenience_methods() {
let mut matcher = GlobMatcher::for_extensions(&["rs", "py", "js"]).unwrap();
assert!(matcher.matches("src/lib.rs").unwrap());
assert!(matcher.matches("src/main.py").unwrap());
assert!(matcher.matches("src/app.js").unwrap());
assert!(!matcher.matches("src/data.json").unwrap());
assert_eq!(matcher.pattern_count(), 3);
let mut matcher = GlobMatcher::for_directories(&["src", "tests"]).unwrap();
assert!(matcher.matches("src/lib.rs").unwrap());
assert!(matcher.matches("tests/test.rs").unwrap());
assert!(!matcher.matches("docs/readme.md").unwrap());
assert_eq!(matcher.pattern_count(), 2);
}
#[test]
fn test_glob_matcher_case_insensitive() {
let mut matcher = GlobMatcher::case_insensitive();
matcher.add_pattern("**/*.RS").unwrap();
assert!(matcher.matches("src/lib.rs").unwrap());
assert!(matcher.matches("src/LIB.RS").unwrap());
assert!(matcher.matches("src/Lib.Rs").unwrap());
}
#[test]
fn test_glob_matcher_empty() {
let mut matcher = GlobMatcher::new();
assert!(matcher.is_empty());
assert!(!matcher.matches("any/path").unwrap());
matcher.add_pattern("**/*.rs").unwrap();
assert!(!matcher.is_empty());
matcher.clear();
assert!(matcher.is_empty());
assert!(!matcher.matches("any/path.rs").unwrap());
}
#[test]
fn test_glob_matcher_compilation() {
let mut matcher = GlobMatcher::new();
assert!(!matcher.is_compiled());
matcher.add_pattern("**/*.rs").unwrap();
matcher.add_pattern("**/*.py").unwrap();
assert!(!matcher.is_compiled());
matcher.matches("src/lib.rs").unwrap();
assert!(matcher.is_compiled());
matcher.add_pattern("**/*.js").unwrap();
assert!(!matcher.is_compiled());
matcher.recompile().unwrap();
assert!(matcher.is_compiled());
}
#[test]
fn test_complex_glob_patterns() {
let mut matcher = GlobMatcher::new();
matcher.add_pattern("**/*.{rs,py,js}").unwrap();
assert!(matcher.matches("src/lib.rs").unwrap());
assert!(matcher.matches("src/main.py").unwrap());
assert!(matcher.matches("src/app.js").unwrap());
assert!(!matcher.matches("src/data.json").unwrap());
matcher.clear();
matcher.add_pattern("test[0-9].rs").unwrap();
assert!(matcher.matches("test1.rs").unwrap());
assert!(matcher.matches("test9.rs").unwrap());
assert!(!matcher.matches("testA.rs").unwrap());
matcher.clear();
matcher.add_pattern("test?.rs").unwrap();
assert!(matcher.matches("test1.rs").unwrap());
assert!(matcher.matches("testA.rs").unwrap());
assert!(!matcher.matches("test12.rs").unwrap());
}
#[test]
fn test_path_normalization_in_matching() {
let mut matcher = GlobMatcher::new();
matcher.add_pattern("src/**/*.rs").unwrap();
assert!(matcher.matches("src/lib.rs").unwrap());
assert!(matcher.matches("src\\lib.rs").unwrap()); assert!(matcher.matches("src/subdir/lib.rs").unwrap());
assert!(matcher.matches("src\\subdir\\lib.rs").unwrap()); }
}