use regex::Regex;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct CaseOptions {
pub ignore_case: bool,
pub ignore_word_separate: bool,
}
impl CaseOptions {
pub fn new() -> Self {
Self::default()
}
pub fn with_ignore_case(mut self) -> Self {
self.ignore_case = true;
self
}
pub fn with_ignore_word_separate(mut self) -> Self {
self.ignore_word_separate = true;
self
}
pub fn is_default(&self) -> bool {
!self.ignore_case && !self.ignore_word_separate
}
}
pub fn normalize_to_words(s: &str) -> Vec<String> {
let mut words = Vec::new();
let mut current_word = String::new();
let chars: Vec<char> = s.chars().collect();
let len = chars.len();
for i in 0..len {
let c = chars[i];
if c == '_' {
if !current_word.is_empty() {
words.push(current_word.to_ascii_lowercase());
current_word.clear();
}
} else if c.is_ascii_uppercase() {
let prev_lower = i > 0 && chars[i - 1].is_ascii_lowercase();
let next_lower = i + 1 < len && chars[i + 1].is_ascii_lowercase();
if (prev_lower || (i > 0 && !current_word.is_empty() && next_lower))
&& !current_word.is_empty()
{
words.push(current_word.to_ascii_lowercase());
current_word.clear();
}
current_word.push(c);
} else {
current_word.push(c);
}
}
if !current_word.is_empty() {
words.push(current_word.to_ascii_lowercase());
}
words
}
fn normalize_pattern_to_words(pattern: &str) -> Vec<PatternWord> {
let mut result = Vec::new();
let mut current = String::new();
let mut in_wildcard_seq = false;
for c in pattern.chars() {
match c {
'*' => {
if !current.is_empty() {
result.extend(
normalize_to_words(¤t)
.into_iter()
.map(PatternWord::Literal),
);
current.clear();
}
if !in_wildcard_seq {
result.push(PatternWord::AnyWords);
in_wildcard_seq = true;
}
}
'?' => {
if !current.is_empty() {
result.extend(
normalize_to_words(¤t)
.into_iter()
.map(PatternWord::Literal),
);
current.clear();
}
result.push(PatternWord::AnyChar);
in_wildcard_seq = false;
}
'_' => {
if !current.is_empty() {
result.extend(
normalize_to_words(¤t)
.into_iter()
.map(PatternWord::Literal),
);
current.clear();
}
in_wildcard_seq = false;
}
_ => {
current.push(c);
in_wildcard_seq = false;
}
}
}
if !current.is_empty() {
result.extend(
normalize_to_words(¤t)
.into_iter()
.map(PatternWord::Literal),
);
}
result
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum PatternWord {
Literal(String),
AnyWords,
AnyChar,
}
fn match_word_pattern(pattern: &[PatternWord], target: &[String]) -> bool {
match_word_pattern_recursive(pattern, target, 0, 0)
}
fn match_word_pattern_recursive(
pattern: &[PatternWord],
target: &[String],
pi: usize,
ti: usize,
) -> bool {
if pi == pattern.len() && ti == target.len() {
return true;
}
if pi == pattern.len() {
return false;
}
match &pattern[pi] {
PatternWord::AnyWords => {
for skip in 0..=(target.len() - ti) {
if match_word_pattern_recursive(pattern, target, pi + 1, ti + skip) {
return true;
}
}
false
}
PatternWord::Literal(word) => {
if ti < target.len() && target[ti] == *word {
match_word_pattern_recursive(pattern, target, pi + 1, ti + 1)
} else {
false
}
}
PatternWord::AnyChar => {
if ti < target.len() {
match_word_pattern_recursive(pattern, target, pi + 1, ti + 1)
} else {
false
}
}
}
}
#[derive(Debug, Clone)]
pub enum Pattern {
Glob(GlobPattern),
Regex(RegexPattern),
Exact(ExactPattern),
}
#[derive(Debug, Clone)]
pub struct GlobPattern {
pattern: String,
compiled: glob::Pattern,
case_options: CaseOptions,
normalized_words: Option<Vec<PatternWord>>,
}
#[derive(Debug, Clone)]
pub struct RegexPattern {
pattern: String,
compiled: Regex,
case_options: CaseOptions,
}
#[derive(Debug, Clone)]
pub struct ExactPattern {
pattern: String,
case_options: CaseOptions,
normalized_words: Option<Vec<String>>,
}
#[derive(Debug, Clone, thiserror::Error)]
pub enum PatternError {
#[error("invalid glob pattern: {0}")]
InvalidGlob(String),
#[error("invalid regex pattern: {0}")]
InvalidRegex(String),
}
impl Pattern {
pub fn glob(pattern: impl Into<String>) -> Self {
Self::glob_with_options(pattern, CaseOptions::default())
}
pub fn glob_with_options(pattern: impl Into<String>, case_options: CaseOptions) -> Self {
let pattern: String = pattern.into();
let compile_pattern = if case_options.ignore_case && !case_options.ignore_word_separate {
pattern.to_ascii_lowercase()
} else {
pattern.clone()
};
match glob::Pattern::new(&compile_pattern) {
Ok(compiled) => {
let normalized_words = if case_options.ignore_word_separate {
Some(normalize_pattern_to_words(&pattern))
} else {
None
};
Pattern::Glob(GlobPattern {
pattern,
compiled,
case_options,
normalized_words,
})
}
Err(_) => Self::exact_with_options(pattern, case_options),
}
}
pub fn regex(pattern: impl Into<String>) -> Result<Self, PatternError> {
Self::regex_with_options(pattern, CaseOptions::default())
}
pub fn regex_with_options(
pattern: impl Into<String>,
case_options: CaseOptions,
) -> Result<Self, PatternError> {
let pattern: String = pattern.into();
let compiled = if case_options.ignore_case {
regex::RegexBuilder::new(&pattern)
.case_insensitive(true)
.build()
.map_err(|e| PatternError::InvalidRegex(e.to_string()))?
} else {
Regex::new(&pattern).map_err(|e| PatternError::InvalidRegex(e.to_string()))?
};
Ok(Pattern::Regex(RegexPattern {
pattern,
compiled,
case_options,
}))
}
pub fn exact(name: impl Into<String>) -> Self {
Self::exact_with_options(name, CaseOptions::default())
}
pub fn exact_with_options(name: impl Into<String>, case_options: CaseOptions) -> Self {
let pattern: String = name.into();
let normalized_words = if case_options.ignore_word_separate {
Some(normalize_to_words(&pattern))
} else {
None
};
Pattern::Exact(ExactPattern {
pattern,
case_options,
normalized_words,
})
}
pub fn matches(&self, s: &str) -> bool {
match self {
Pattern::Glob(g) => {
if g.case_options.ignore_word_separate {
let target_words = normalize_to_words(s);
let word_match = match &g.normalized_words {
Some(pattern_words) => match_word_pattern(pattern_words, &target_words),
None => false,
};
if word_match {
return true;
}
let joined_target = target_words.join("");
g.compiled.matches(&joined_target)
} else if g.case_options.ignore_case {
g.compiled.matches(&s.to_ascii_lowercase())
} else {
g.compiled.matches(s)
}
}
Pattern::Regex(r) => {
r.compiled.is_match(s)
}
Pattern::Exact(e) => {
if e.case_options.ignore_word_separate {
let target_words = normalize_to_words(s);
let word_match = match &e.normalized_words {
Some(pattern_words) => &target_words == pattern_words,
None => false,
};
if word_match {
return true;
}
let joined_target = target_words.join("");
if e.case_options.ignore_case {
joined_target.eq_ignore_ascii_case(&e.pattern)
} else {
joined_target == e.pattern
}
} else if e.case_options.ignore_case {
s.eq_ignore_ascii_case(&e.pattern)
} else {
s == e.pattern
}
}
}
}
pub fn as_str(&self) -> &str {
match self {
Pattern::Glob(g) => &g.pattern,
Pattern::Regex(r) => &r.pattern,
Pattern::Exact(e) => &e.pattern,
}
}
pub fn case_options(&self) -> CaseOptions {
match self {
Pattern::Glob(g) => g.case_options,
Pattern::Regex(r) => r.case_options,
Pattern::Exact(e) => e.case_options,
}
}
pub fn is_glob(&self) -> bool {
matches!(self, Pattern::Glob(_))
}
pub fn is_regex(&self) -> bool {
matches!(self, Pattern::Regex(_))
}
pub fn is_exact(&self) -> bool {
matches!(self, Pattern::Exact(_))
}
pub fn has_wildcards(&self) -> bool {
match self {
Pattern::Glob(g) => g.pattern.contains('*') || g.pattern.contains('?'),
Pattern::Regex(_) => true, Pattern::Exact(_) => false,
}
}
}
impl Serialize for Pattern {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
use serde::ser::SerializeStruct;
let case_opts = self.case_options();
let has_case_opts = !case_opts.is_default();
let field_count = if has_case_opts { 4 } else { 2 };
let mut state = serializer.serialize_struct("Pattern", field_count)?;
match self {
Pattern::Glob(g) => {
state.serialize_field("type", "glob")?;
state.serialize_field("pattern", &g.pattern)?;
}
Pattern::Regex(r) => {
state.serialize_field("type", "regex")?;
state.serialize_field("pattern", &r.pattern)?;
}
Pattern::Exact(e) => {
state.serialize_field("type", "exact")?;
state.serialize_field("pattern", &e.pattern)?;
}
}
if has_case_opts {
state.serialize_field("ignore_case", &case_opts.ignore_case)?;
state.serialize_field("ignore_word_separate", &case_opts.ignore_word_separate)?;
}
state.end()
}
}
impl<'de> Deserialize<'de> for Pattern {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
#[derive(Deserialize)]
struct PatternData {
#[serde(rename = "type")]
pattern_type: String,
pattern: String,
#[serde(default)]
ignore_case: bool,
#[serde(default)]
ignore_word_separate: bool,
}
let data = PatternData::deserialize(deserializer)?;
let case_options = CaseOptions {
ignore_case: data.ignore_case,
ignore_word_separate: data.ignore_word_separate,
};
match data.pattern_type.as_str() {
"glob" => Ok(Pattern::glob_with_options(data.pattern, case_options)),
"regex" => Pattern::regex_with_options(data.pattern, case_options)
.map_err(serde::de::Error::custom),
"exact" => Ok(Pattern::exact_with_options(data.pattern, case_options)),
other => Err(serde::de::Error::custom(format!(
"unknown pattern type: {}",
other
))),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_glob_suffix() {
let pattern = Pattern::glob("*Config");
assert!(pattern.matches("AppConfig"));
assert!(pattern.matches("UserConfig"));
assert!(pattern.matches("Config"));
assert!(!pattern.matches("config"));
assert!(!pattern.matches("ConfigManager"));
}
#[test]
fn test_glob_prefix() {
let pattern = Pattern::glob("get_*");
assert!(pattern.matches("get_name"));
assert!(pattern.matches("get_value"));
assert!(pattern.matches("get_"));
assert!(!pattern.matches("set_name"));
}
#[test]
fn test_glob_contains() {
let pattern = Pattern::glob("*Error*");
assert!(pattern.matches("ParseError"));
assert!(pattern.matches("ErrorHandler"));
assert!(pattern.matches("MyErrorType"));
assert!(!pattern.matches("ParseException"));
}
#[test]
fn test_glob_question_mark() {
let pattern = Pattern::glob("get?");
assert!(pattern.matches("get1"));
assert!(pattern.matches("getX"));
assert!(!pattern.matches("get"));
assert!(!pattern.matches("get12"));
}
#[test]
fn test_regex_prefix() {
let pattern = Pattern::regex(r"^(get|set)_.*").unwrap();
assert!(pattern.matches("get_name"));
assert!(pattern.matches("set_value"));
assert!(!pattern.matches("fetch_data"));
assert!(!pattern.matches("reset_name"));
}
#[test]
fn test_regex_suffix() {
let pattern = Pattern::regex(r".*Error$").unwrap();
assert!(pattern.matches("ParseError"));
assert!(pattern.matches("NetworkError"));
assert!(!pattern.matches("ErrorHandler"));
}
#[test]
fn test_exact() {
let pattern = Pattern::exact("Config");
assert!(pattern.matches("Config"));
assert!(!pattern.matches("config"));
assert!(!pattern.matches("AppConfig"));
}
#[test]
fn test_has_wildcards() {
assert!(Pattern::glob("*Config").has_wildcards());
assert!(Pattern::glob("get?").has_wildcards());
assert!(!Pattern::glob("Config").has_wildcards());
assert!(Pattern::regex(r".*").unwrap().has_wildcards());
assert!(!Pattern::exact("Config").has_wildcards());
}
#[test]
fn test_invalid_regex() {
let result = Pattern::regex(r"[invalid");
assert!(matches!(result, Err(PatternError::InvalidRegex(_))));
}
#[test]
fn test_serde_glob() {
let pattern = Pattern::glob("*Config");
let json = serde_json::to_string(&pattern).unwrap();
let deserialized: Pattern = serde_json::from_str(&json).unwrap();
assert!(deserialized.matches("AppConfig"));
}
#[test]
fn test_serde_regex() {
let pattern = Pattern::regex(r"^get_.*").unwrap();
let json = serde_json::to_string(&pattern).unwrap();
let deserialized: Pattern = serde_json::from_str(&json).unwrap();
assert!(deserialized.matches("get_name"));
}
#[test]
fn test_serde_exact() {
let pattern = Pattern::exact("Config");
let json = serde_json::to_string(&pattern).unwrap();
let deserialized: Pattern = serde_json::from_str(&json).unwrap();
assert!(deserialized.matches("Config"));
assert!(!deserialized.matches("AppConfig"));
}
#[test]
fn test_normalize_to_words_snake_case() {
assert_eq!(
normalize_to_words("get_user_name"),
vec!["get", "user", "name"]
);
assert_eq!(normalize_to_words("HTTP_CLIENT"), vec!["http", "client"]);
}
#[test]
fn test_normalize_to_words_camel_case() {
assert_eq!(
normalize_to_words("getUserName"),
vec!["get", "user", "name"]
);
assert_eq!(normalize_to_words("parseJSON"), vec!["parse", "json"]);
}
#[test]
fn test_normalize_to_words_pascal_case() {
assert_eq!(
normalize_to_words("GetUserName"),
vec!["get", "user", "name"]
);
assert_eq!(normalize_to_words("HTTPClient"), vec!["http", "client"]);
}
#[test]
fn test_normalize_to_words_mixed() {
assert_eq!(
normalize_to_words("myHTTPClient"),
vec!["my", "http", "client"]
);
assert_eq!(
normalize_to_words("XMLHttpRequest"),
vec!["xml", "http", "request"]
);
}
#[test]
fn test_glob_ignore_case() {
let pattern = Pattern::glob_with_options("*config", CaseOptions::new().with_ignore_case());
assert!(pattern.matches("AppConfig"));
assert!(pattern.matches("APPCONFIG"));
assert!(pattern.matches("appconfig"));
assert!(pattern.matches("Config"));
assert!(!pattern.matches("ConfigManager"));
}
#[test]
fn test_glob_ignore_word_separate() {
let pattern =
Pattern::glob_with_options("get_user*", CaseOptions::new().with_ignore_word_separate());
assert!(pattern.matches("get_user_name"));
assert!(pattern.matches("getUserName"));
assert!(pattern.matches("GetUserName"));
assert!(pattern.matches("GET_USER_NAME"));
assert!(!pattern.matches("fetch_user_name"));
}
#[test]
fn test_glob_ignore_word_separate_suffix() {
let pattern =
Pattern::glob_with_options("*Config", CaseOptions::new().with_ignore_word_separate());
assert!(pattern.matches("AppConfig"));
assert!(pattern.matches("app_config"));
assert!(pattern.matches("appConfig"));
assert!(pattern.matches("APP_CONFIG"));
}
#[test]
fn test_exact_ignore_case() {
let pattern = Pattern::exact_with_options("config", CaseOptions::new().with_ignore_case());
assert!(pattern.matches("Config"));
assert!(pattern.matches("CONFIG"));
assert!(pattern.matches("config"));
assert!(!pattern.matches("AppConfig"));
}
#[test]
fn test_exact_ignore_word_separate() {
let pattern = Pattern::exact_with_options(
"get_user_name",
CaseOptions::new().with_ignore_word_separate(),
);
assert!(pattern.matches("get_user_name"));
assert!(pattern.matches("getUserName"));
assert!(pattern.matches("GetUserName"));
assert!(!pattern.matches("get_user"));
}
#[test]
fn test_regex_ignore_case() {
let pattern =
Pattern::regex_with_options(r"config", CaseOptions::new().with_ignore_case()).unwrap();
assert!(pattern.matches("AppConfig"));
assert!(pattern.matches("APPCONFIG"));
assert!(pattern.matches("appconfig"));
}
#[test]
fn test_serde_with_case_options() {
let pattern = Pattern::glob_with_options("*config", CaseOptions::new().with_ignore_case());
let json = serde_json::to_string(&pattern).unwrap();
assert!(json.contains("ignore_case"));
let deserialized: Pattern = serde_json::from_str(&json).unwrap();
assert!(deserialized.matches("AppConfig"));
assert!(deserialized.matches("APPCONFIG"));
}
#[test]
fn test_case_options_builder() {
let opts = CaseOptions::new()
.with_ignore_case()
.with_ignore_word_separate();
assert!(opts.ignore_case);
assert!(opts.ignore_word_separate);
assert!(!opts.is_default());
let default = CaseOptions::default();
assert!(default.is_default());
}
#[test]
fn test_ignore_word_separate_with_no_boundaries_in_pattern() {
let pattern = Pattern::exact_with_options(
"astregapply",
CaseOptions::new().with_ignore_word_separate(),
);
assert!(pattern.matches("ASTRegApply"));
assert!(pattern.matches("ast_reg_apply"));
assert!(pattern.matches("astRegApply"));
let pattern = Pattern::glob_with_options(
"astregapply",
CaseOptions::new().with_ignore_word_separate(),
);
assert!(pattern.matches("ASTRegApply"));
assert!(pattern.matches("ast_reg_apply"));
assert!(pattern.matches("AstRegApply"));
let pattern =
Pattern::glob_with_options("*apply", CaseOptions::new().with_ignore_word_separate());
assert!(pattern.matches("ASTRegApply"));
assert!(pattern.matches("ast_reg_apply"));
}
}