use crate::bounded_byte_cache::{BoundedByteCache, DEFAULT_MAX_BYTES, DEFAULT_MAX_ENTRIES};
use libdd_common::MutexExt;
use std::fmt;
use std::sync::{Arc, Mutex};
pub struct GlobMatcher {
pattern_lower: String,
pattern_is_ascii: bool,
pattern_has_wildcards: bool,
pattern_is_star: bool,
cache: Arc<Mutex<BoundedByteCache<Vec<u8>, bool>>>,
}
impl fmt::Debug for GlobMatcher {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("GlobMatcher")
.field("pattern_lower", &self.pattern_lower)
.field("cache_size", &self.cache.lock_or_panic().len())
.finish()
}
}
impl GlobMatcher {
pub fn new(pattern: &str) -> Self {
let pattern_lower = pattern.to_lowercase();
GlobMatcher {
pattern_is_ascii: pattern_lower.is_ascii(),
pattern_has_wildcards: pattern_lower.contains('*') || pattern_lower.contains('?'),
pattern_is_star: !pattern_lower.is_empty() && pattern_lower.bytes().all(|b| b == b'*'),
pattern_lower,
cache: Arc::new(Mutex::new(BoundedByteCache::new(
DEFAULT_MAX_ENTRIES,
DEFAULT_MAX_BYTES,
))),
}
}
pub fn pattern(&self) -> &str {
&self.pattern_lower
}
pub fn matches(&self, subject: &str) -> bool {
if self.pattern_is_star {
return true;
}
if self.pattern_is_ascii && subject.is_ascii() {
return self.matches_ascii(subject.as_bytes());
}
self.matches_unicode(subject)
}
fn matches_ascii(&self, subject: &[u8]) -> bool {
let pattern = self.pattern_lower.as_bytes();
if !self.pattern_has_wildcards {
return pattern.eq_ignore_ascii_case(subject);
}
if let Some(&result) = self.cache.lock_or_panic().get(subject) {
return result;
}
let result = glob_match_bytes::<true>(pattern, subject);
self.cache.lock_or_panic().put(subject.to_vec(), result);
result
}
fn matches_unicode(&self, subject: &str) -> bool {
let subject_lower = subject.to_lowercase();
if self.pattern_lower == subject_lower {
return true;
}
if !self.pattern_has_wildcards {
return false;
}
let subject_lower_bytes = subject_lower.into_bytes();
if let Some(&result) = self.cache.lock_or_panic().get(&subject_lower_bytes) {
return result;
}
let result = glob_match_bytes::<false>(self.pattern_lower.as_bytes(), &subject_lower_bytes);
self.cache.lock_or_panic().put(subject_lower_bytes, result);
result
}
}
impl Clone for GlobMatcher {
fn clone(&self) -> Self {
GlobMatcher {
pattern_lower: self.pattern_lower.clone(),
pattern_is_ascii: self.pattern_is_ascii,
pattern_has_wildcards: self.pattern_has_wildcards,
pattern_is_star: self.pattern_is_star,
cache: Arc::clone(&self.cache),
}
}
}
fn glob_match_bytes<const ASCII_FOLD: bool>(pattern: &[u8], subject: &[u8]) -> bool {
let mut px = 0; let mut sx = 0; let mut next_px = 0; let mut next_sx = 0;
while px < pattern.len() || sx < subject.len() {
if px < pattern.len() {
let p = pattern[px];
if p == b'?' {
if sx < subject.len() {
px += 1;
sx += 1;
continue;
}
} else if p == b'*' {
next_px = px;
next_sx = sx + 1;
px += 1;
continue;
} else if sx < subject.len() && {
let s = subject[sx];
let folded = if ASCII_FOLD {
s.to_ascii_lowercase()
} else {
s
};
folded == p
} {
px += 1;
sx += 1;
continue;
}
}
if 0 < next_sx && next_sx <= subject.len() {
px = next_px;
sx = next_sx;
continue;
}
return false;
}
true
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_glob_exact_match() {
let matcher = GlobMatcher::new("hello");
assert!(matcher.matches("hello"));
assert!(matcher.matches("HELLO")); assert!(!matcher.matches("hello world"));
assert!(!matcher.matches("hell"));
}
#[test]
fn test_glob_question_mark() {
let matcher = GlobMatcher::new("h?llo");
assert!(matcher.matches("hello"));
assert!(matcher.matches("hallo"));
assert!(!matcher.matches("hlo"));
assert!(!matcher.matches("heello"));
}
#[test]
fn test_glob_asterisk() {
let matcher = GlobMatcher::new("h*o");
assert!(matcher.matches("hello"));
assert!(matcher.matches("ho"));
assert!(matcher.matches("hello world o"));
assert!(!matcher.matches("hell"));
let matcher = GlobMatcher::new("h*");
assert!(matcher.matches("hello"));
assert!(matcher.matches("h"));
assert!(!matcher.matches("world"));
}
#[test]
fn test_glob_complex() {
let matcher = GlobMatcher::new("c*t?r*");
assert!(matcher.matches("contoroller"));
assert!(matcher.matches("cater"));
assert!(matcher.matches("ctfr!"));
assert!(!matcher.matches("car"));
let matcher = GlobMatcher::new("*service*");
assert!(matcher.matches("myservice"));
assert!(matcher.matches("service"));
assert!(matcher.matches("my service name"));
assert!(!matcher.matches("svc"));
}
#[test]
fn test_debug_impl() {
let matcher = GlobMatcher::new("svc-*");
let dbg = format!("{matcher:?}");
assert!(dbg.contains("svc-*"));
}
#[test]
fn test_double_star_matches_everything() {
let matcher = GlobMatcher::new("**");
assert!(matcher.matches("anything"));
assert!(matcher.matches(""));
}
#[test]
fn test_all_star_patterns_short_circuit() {
for pattern in ["*", "**", "***", "****"] {
let matcher = GlobMatcher::new(pattern);
assert!(
matcher.pattern_is_star,
"pattern {:?} should set pattern_is_star",
pattern
);
assert!(matcher.matches(""));
assert!(matcher.matches("anything"));
assert!(matcher.matches("caf\u{00e9}"));
}
for pattern in ["", "a*", "*a*", "?"] {
let matcher = GlobMatcher::new(pattern);
assert!(
!matcher.pattern_is_star,
"pattern {:?} should not set pattern_is_star",
pattern
);
}
}
#[test]
fn test_unicode_exact_match_no_wildcard() {
let matcher = GlobMatcher::new("caf\u{00e9}");
assert!(!matcher.pattern_has_wildcards);
assert!(matcher.matches("caf\u{00e9}")); assert!(matcher.matches("CAF\u{00c9}")); assert!(!matcher.matches("cafe")); assert!(!matcher.matches("caf\u{00e9}s")); }
#[test]
fn test_unicode_subject_against_ascii_pattern() {
let matcher = GlobMatcher::new("caf*");
assert!(matcher.matches("caf\u{00e9}")); assert!(!matcher.matches("\u{00e9}cole")); }
#[test]
fn test_unicode_pattern() {
let matcher = GlobMatcher::new("caf\u{00e9}*");
assert!(matcher.matches("caf\u{00e9}-shop"));
assert!(matcher.matches("CAF\u{00c9}-SHOP")); assert!(!matcher.matches("cafe-shop"));
}
#[test]
fn test_unicode_repeated_calls() {
let matcher = GlobMatcher::new("caf\u{00e9}*");
for _ in 0..10 {
assert!(matcher.matches("caf\u{00e9}-controller"));
assert!(!matcher.matches("x\u{00e9}"));
}
}
#[test]
fn test_clone_independent() {
let matcher = GlobMatcher::new("caf*");
let clone = matcher.clone();
assert!(matcher.matches("caf\u{00e9}"));
assert!(clone.matches("caf\u{00e9}"));
}
#[test]
fn test_ascii_no_wildcard_skips_cache() {
let matcher = GlobMatcher::new("svc-web");
assert!(matcher.matches("svc-web"));
assert!(!matcher.matches("svc-db"));
let cache = matcher.cache.lock_or_panic();
assert_eq!(
cache.len(),
0,
"non-wildcard ASCII path should not touch cache"
);
}
#[test]
fn test_ascii_wildcard_populates_cache() {
let matcher = GlobMatcher::new("svc-*");
assert!(matcher.matches("svc-web"));
assert!(matcher.matches("svc-db"));
let cache = matcher.cache.lock_or_panic();
assert_eq!(
cache.len(),
2,
"ASCII wildcard path should cache each unique subject"
);
}
#[test]
fn test_unicode_path_populates_cache() {
let matcher = GlobMatcher::new("caf*");
assert!(matcher.matches("caf\u{00e9}"));
let cache = matcher.cache.lock_or_panic();
assert_eq!(cache.len(), 1);
}
#[test]
fn test_clone_shares_cache() {
let matcher = GlobMatcher::new("caf*");
let clone = matcher.clone();
assert!(matcher.matches("caf\u{00e9}"));
let cache = clone.cache.lock_or_panic();
assert_eq!(cache.len(), 1);
}
}