use anyhow::Result;
use regex::Regex;
use std::ops::Range;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MatchType {
Literal,
Regex,
Fuzzy,
}
pub trait Matcher: Send + Sync {
fn find_match(&self, text: &str) -> Option<Range<usize>>;
fn find_all_matches(&self, text: &str) -> Vec<Range<usize>>;
fn match_type(&self) -> MatchType;
fn pattern(&self) -> &str;
fn is_case_sensitive(&self) -> bool;
}
#[derive(Debug, Clone)]
pub struct LiteralMatcher {
pattern: String,
pattern_lower: String,
case_sensitive: bool,
}
impl LiteralMatcher {
pub fn new(pattern: &str, ignore_case: bool) -> Self {
Self {
pattern: pattern.to_string(),
pattern_lower: pattern.to_lowercase(),
case_sensitive: !ignore_case,
}
}
}
impl Matcher for LiteralMatcher {
fn find_match(&self, text: &str) -> Option<Range<usize>> {
if self.case_sensitive {
text.find(&self.pattern).map(|start| start..(start + self.pattern.len()))
} else {
text.to_lowercase().find(&self.pattern_lower)
.map(|start| start..(start + self.pattern.len()))
}
}
fn find_all_matches(&self, text: &str) -> Vec<Range<usize>> {
let mut matches = Vec::new();
let mut start = 0;
if self.case_sensitive {
while let Some(pos) = text[start..].find(&self.pattern) {
let absolute_pos = start + pos;
matches.push(absolute_pos..(absolute_pos + self.pattern.len()));
start = absolute_pos + 1;
}
} else {
let text_lower = text.to_lowercase();
while let Some(pos) = text_lower[start..].find(&self.pattern_lower) {
let absolute_pos = start + pos;
matches.push(absolute_pos..(absolute_pos + self.pattern.len()));
start = absolute_pos + 1;
}
}
matches
}
fn match_type(&self) -> MatchType {
MatchType::Literal
}
fn pattern(&self) -> &str {
&self.pattern
}
fn is_case_sensitive(&self) -> bool {
self.case_sensitive
}
}
#[derive(Debug)]
pub struct RegexMatcher {
pattern: String,
regex: Regex,
case_sensitive: bool,
}
impl RegexMatcher {
pub fn new(pattern: &str, ignore_case: bool) -> Result<Self> {
let mut regex_builder = regex::RegexBuilder::new(pattern);
regex_builder.case_insensitive(ignore_case);
let regex = regex_builder.build()
.map_err(|e| anyhow::anyhow!("Invalid regex pattern '{}': {}", pattern, e))?;
Ok(Self {
pattern: pattern.to_string(),
regex,
case_sensitive: !ignore_case,
})
}
pub fn with_flags(pattern: &str, flags: &str) -> Result<Self> {
let mut regex_builder = regex::RegexBuilder::new(pattern);
for flag in flags.chars() {
match flag {
'i' => { regex_builder.case_insensitive(true); }
'm' => { regex_builder.multi_line(true); }
's' => { regex_builder.dot_matches_new_line(true); }
'x' => { regex_builder.ignore_whitespace(true); }
'u' => { regex_builder.unicode(true); }
_ => return Err(anyhow::anyhow!("Unknown regex flag: {}", flag)),
}
}
let regex = regex_builder.build()
.map_err(|e| anyhow::anyhow!("Invalid regex pattern '{}': {}", pattern, e))?;
Ok(Self {
pattern: pattern.to_string(),
regex,
case_sensitive: !flags.contains('i'),
})
}
}
impl Matcher for RegexMatcher {
fn find_match(&self, text: &str) -> Option<Range<usize>> {
self.regex.find(text).map(|m| m.range())
}
fn find_all_matches(&self, text: &str) -> Vec<Range<usize>> {
self.regex.find_iter(text).map(|m| m.range()).collect()
}
fn match_type(&self) -> MatchType {
MatchType::Regex
}
fn pattern(&self) -> &str {
&self.pattern
}
fn is_case_sensitive(&self) -> bool {
self.case_sensitive
}
}
impl Clone for RegexMatcher {
fn clone(&self) -> Self {
Self::new(&self.pattern, !self.case_sensitive)
.expect("Regex should be valid since it was created before")
}
}
#[derive(Debug, Clone)]
pub struct FuzzyMatcher {
pattern: String,
case_sensitive: bool,
threshold: f64,
}
impl FuzzyMatcher {
pub fn new(pattern: &str, ignore_case: bool, threshold: f64) -> Self {
Self {
pattern: pattern.to_string(),
case_sensitive: !ignore_case,
threshold: threshold.clamp(0.0, 1.0),
}
}
fn similarity(&self, a: &str, b: &str) -> f64 {
let a = if self.case_sensitive { a } else { &a.to_lowercase() };
let b = if self.case_sensitive { b } else { &b.to_lowercase() };
if a == b {
return 1.0;
}
let len_a = a.chars().count();
let len_b = b.chars().count();
if len_a == 0 || len_b == 0 {
return 0.0;
}
let max_len = len_a.max(len_b);
let distance = self.levenshtein_distance(a, b);
1.0 - (distance as f64 / max_len as f64)
}
fn levenshtein_distance(&self, a: &str, b: &str) -> usize {
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let len_a = a_chars.len();
let len_b = b_chars.len();
if len_a == 0 { return len_b; }
if len_b == 0 { return len_a; }
let mut prev_row: Vec<usize> = (0..=len_b).collect();
let mut curr_row = vec![0; len_b + 1];
for i in 1..=len_a {
curr_row[0] = i;
for j in 1..=len_b {
let cost = if a_chars[i - 1] == b_chars[j - 1] { 0 } else { 1 };
curr_row[j] = (prev_row[j] + 1)
.min(curr_row[j - 1] + 1)
.min(prev_row[j - 1] + cost);
}
std::mem::swap(&mut prev_row, &mut curr_row);
}
prev_row[len_b]
}
fn find_fuzzy_matches(&self, text: &str) -> Vec<Range<usize>> {
let mut matches = Vec::new();
let pattern_len = self.pattern.chars().count();
let text_chars: Vec<char> = text.chars().collect();
for window_size in (pattern_len.saturating_sub(2))..=(pattern_len + 2) {
if window_size == 0 || window_size > text_chars.len() {
continue;
}
for start in 0..=(text_chars.len() - window_size) {
let end = start + window_size;
let window: String = text_chars[start..end].iter().collect();
if self.similarity(&self.pattern, &window) >= self.threshold {
let byte_start = text_chars[..start].iter().map(|c| c.len_utf8()).sum();
let byte_end = byte_start + window.len();
matches.push(byte_start..byte_end);
}
}
}
matches.sort_by_key(|range| range.start);
self.deduplicate_overlapping_matches(matches)
}
fn deduplicate_overlapping_matches(&self, matches: Vec<Range<usize>>) -> Vec<Range<usize>> {
if matches.is_empty() {
return matches;
}
let mut result = vec![matches[0].clone()];
for current in matches.into_iter().skip(1) {
let last = result.last().unwrap();
if current.start < last.end {
if current.len() > last.len() {
result.pop();
result.push(current);
}
} else {
result.push(current);
}
}
result
}
}
impl Matcher for FuzzyMatcher {
fn find_match(&self, text: &str) -> Option<Range<usize>> {
self.find_all_matches(text).into_iter().next()
}
fn find_all_matches(&self, text: &str) -> Vec<Range<usize>> {
self.find_fuzzy_matches(text)
}
fn match_type(&self) -> MatchType {
MatchType::Fuzzy
}
fn pattern(&self) -> &str {
&self.pattern
}
fn is_case_sensitive(&self) -> bool {
self.case_sensitive
}
}
pub fn create_matcher(
pattern: &str,
match_type: MatchType,
ignore_case: bool,
) -> Result<Box<dyn Matcher>> {
match match_type {
MatchType::Literal => Ok(Box::new(LiteralMatcher::new(pattern, ignore_case))),
MatchType::Regex => Ok(Box::new(RegexMatcher::new(pattern, ignore_case)?)),
MatchType::Fuzzy => Ok(Box::new(FuzzyMatcher::new(pattern, ignore_case, 0.8))),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_literal_matcher() {
let matcher = LiteralMatcher::new("test", false);
assert_eq!(matcher.find_match("this is a test"), Some(10..14));
assert_eq!(matcher.find_match("TEST case"), Some(0..4));
assert_eq!(matcher.find_match("nothing here"), None);
let matches = matcher.find_all_matches("test test test");
assert_eq!(matches.len(), 3);
assert_eq!(matches[0], 0..4);
assert_eq!(matches[1], 5..9);
assert_eq!(matches[2], 10..14);
}
#[test]
fn test_literal_matcher_case_sensitive() {
let matcher = LiteralMatcher::new("Test", true);
assert_eq!(matcher.find_match("this is a Test"), Some(10..14));
assert_eq!(matcher.find_match("this is a test"), None);
}
#[test]
fn test_regex_matcher() {
let matcher = RegexMatcher::new(r"\d+", false).unwrap();
assert_eq!(matcher.find_match("hello 123 world"), Some(6..9));
assert_eq!(matcher.find_match("no numbers here"), None);
let matches = matcher.find_all_matches("123 and 456 and 789");
assert_eq!(matches.len(), 3);
assert_eq!(matches[0], 0..3);
assert_eq!(matches[1], 8..11);
assert_eq!(matches[2], 16..19);
}
#[test]
fn test_regex_matcher_invalid() {
let result = RegexMatcher::new("[invalid", false);
assert!(result.is_err());
}
#[test]
fn test_fuzzy_matcher() {
let matcher = FuzzyMatcher::new("test", false, 0.7);
assert!(!matcher.find_all_matches("test").is_empty());
assert!(!matcher.find_all_matches("tset").is_empty());
assert!(matcher.find_all_matches("hello").is_empty());
}
#[test]
fn test_fuzzy_similarity() {
let matcher = FuzzyMatcher::new("test", false, 0.8);
assert_eq!(matcher.similarity("test", "test"), 1.0);
assert!(matcher.similarity("test", "tset") > 0.5);
assert!(matcher.similarity("test", "hello") < 0.5);
}
#[test]
fn test_levenshtein_distance() {
let matcher = FuzzyMatcher::new("test", false, 0.8);
assert_eq!(matcher.levenshtein_distance("test", "test"), 0);
assert_eq!(matcher.levenshtein_distance("test", "tset"), 2);
assert_eq!(matcher.levenshtein_distance("test", "hello"), 5);
}
#[test]
fn test_create_matcher() {
let literal = create_matcher("test", MatchType::Literal, false).unwrap();
assert_eq!(literal.match_type(), MatchType::Literal);
let regex = create_matcher(r"\d+", MatchType::Regex, false).unwrap();
assert_eq!(regex.match_type(), MatchType::Regex);
let fuzzy = create_matcher("test", MatchType::Fuzzy, false).unwrap();
assert_eq!(fuzzy.match_type(), MatchType::Fuzzy);
}
#[test]
fn test_matcher_properties() {
let matcher = LiteralMatcher::new("Test", true);
assert_eq!(matcher.pattern(), "Test");
assert!(matcher.is_case_sensitive());
assert_eq!(matcher.match_type(), MatchType::Literal);
let matcher = LiteralMatcher::new("test", false);
assert!(!matcher.is_case_sensitive());
}
}