use regex::Regex;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PatternKind {
Literal,
Glob,
Regex,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
pub enum Mode {
Literal,
Glob,
Regex,
}
impl Mode {
pub fn kind(self) -> PatternKind {
match self {
Mode::Literal => PatternKind::Literal,
Mode::Glob => PatternKind::Glob,
Mode::Regex => PatternKind::Regex,
}
}
}
pub fn classify_with(pat: &str, mode: Option<Mode>) -> PatternKind {
match mode {
Some(m) => m.kind(),
None => classify(pat),
}
}
pub fn promote_with(pat: &str, mode: Option<Mode>) -> String {
match classify_with(pat, mode) {
PatternKind::Literal => regex::escape(pat),
PatternKind::Glob => glob_to_regex(pat),
PatternKind::Regex => pat.to_string(),
}
}
pub fn compile_with(pat: &str, mode: Option<Mode>) -> Result<Regex, regex::Error> {
Regex::new(&promote_with(pat, mode))
}
pub fn compile_anchored_with(pat: &str, mode: Option<Mode>) -> Result<Regex, regex::Error> {
Regex::new(&format!("^(?:{})$", promote_with(pat, mode)))
}
pub fn compile_name_set_with(spec: &str, mode: Option<Mode>) -> Result<Vec<Regex>, regex::Error> {
spec.split('|')
.filter(|s| !s.is_empty())
.map(|alt| compile_anchored_with(alt, mode))
.collect()
}
const GLOB_META: [char; 3] = ['*', '?', '['];
const REGEX_META: [char; 10] = ['^', '$', '(', ')', '|', '+', '{', '}', '\\', '.'];
pub fn classify(pat: &str) -> PatternKind {
let has_glob = pat.chars().any(|c| GLOB_META.contains(&c));
let has_regex_meta = pat.chars().any(|c| REGEX_META.contains(&c));
if !has_glob && !has_regex_meta {
PatternKind::Literal
} else if has_regex_meta && Regex::new(pat).is_ok() {
PatternKind::Regex
} else {
PatternKind::Glob
}
}
pub fn glob_to_regex(glob: &str) -> String {
let mut out = String::new();
let mut chars = glob.chars().peekable();
while let Some(c) = chars.next() {
match c {
'*' => out.push_str("[^/]*"),
'?' => out.push_str("[^/]"),
'[' => {
out.push('[');
if matches!(chars.peek(), Some('!')) {
out.push('^');
chars.next();
}
for cc in chars.by_ref() {
out.push(cc);
if cc == ']' {
break;
}
}
}
'.' | '+' | '(' | ')' | '|' | '^' | '$' | '{' | '}' | '\\' => {
out.push('\\');
out.push(c);
}
_ => out.push(c),
}
}
out
}
pub fn promote(pat: &str) -> String {
match classify(pat) {
PatternKind::Literal => regex::escape(pat),
PatternKind::Glob => glob_to_regex(pat),
PatternKind::Regex => pat.to_string(),
}
}
pub fn compile(pat: &str) -> Result<Regex, regex::Error> {
Regex::new(&promote(pat))
}
pub fn compile_anchored(pat: &str) -> Result<Regex, regex::Error> {
Regex::new(&format!("^(?:{})$", promote(pat)))
}
pub fn compile_name_set(spec: &str) -> Result<Vec<Regex>, regex::Error> {
spec.split('|')
.filter(|s| !s.is_empty())
.map(compile_anchored)
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn classifies_literal_when_no_metacharacters() {
assert_eq!(classify("ERROR:"), PatternKind::Literal);
assert_eq!(classify("knn_entries"), PatternKind::Literal);
}
#[test]
fn classifies_glob_when_not_valid_regex() {
assert_eq!(classify("*.java"), PatternKind::Glob);
assert_eq!(classify("data_[0-9]"), PatternKind::Glob);
}
#[test]
fn classifies_regex_when_explicit_and_valid() {
assert_eq!(classify("^ERROR"), PatternKind::Regex);
assert_eq!(classify("foo|bar"), PatternKind::Regex);
assert_eq!(classify(r"\d+"), PatternKind::Regex);
assert_eq!(classify("foo.bar"), PatternKind::Regex);
}
#[test]
fn literal_matches_as_unanchored_substring() {
let re = compile("ERROR:").unwrap();
assert!(re.is_match("first line\nERROR: bad input\n"));
assert!(!re.is_match("all good here"));
}
#[test]
fn name_set_anchors_each_glob_alternative() {
let set = compile_name_set("*.java|*.kt").unwrap();
let matches = |name: &str| set.iter().any(|r| r.is_match(name));
assert!(matches("Widget.java"));
assert!(matches("Widget.kt"));
assert!(!matches("Widget.javax"));
assert!(!matches("Widget.java.bak"));
}
#[test]
fn regex_alternation_is_preserved_for_content() {
let re = compile("SimpleMFD|knn_entries").unwrap();
assert!(re.is_match("...knn_entries..."));
assert!(!re.is_match("nothing relevant"));
}
#[test]
fn explicit_mode_overrides_promotion() {
let code = r#"WireSource::Port(_) => todo!("x"),"#;
let re = compile_with(code, Some(Mode::Literal)).unwrap();
assert!(re.is_match(&format!(" {code}\n")));
let re = compile_with("a.c", Some(Mode::Regex)).unwrap();
assert!(re.is_match("abc"));
let re = compile_with("a.c", Some(Mode::Literal)).unwrap();
assert!(!re.is_match("abc"));
assert!(re.is_match("a.c"));
}
#[test]
fn absent_mode_keeps_promotion() {
assert_eq!(classify_with("*.rs", None), PatternKind::Glob);
assert_eq!(classify_with("*.rs", Some(Mode::Literal)), PatternKind::Literal);
}
}