use crate::core::{MemScopeError, MemScopeResult};
use regex::Regex;
use std::collections::HashMap;
pub struct PatternMatcher {
patterns: Vec<CompiledPattern>,
fuzzy_threshold: f64,
cache: std::sync::Mutex<HashMap<String, Vec<PatternMatch>>>,
}
#[derive(Debug, Clone)]
pub struct CompiledPattern {
id: String,
regex: Regex,
weight: f64,
tags: Vec<String>,
description: String,
}
impl CompiledPattern {
pub fn description(&self) -> &str {
&self.description
}
pub fn tags(&self) -> &[String] {
&self.tags
}
}
#[derive(Debug, Clone)]
pub struct PatternMatch {
pub pattern_id: String,
pub score: f64,
pub match_type: MatchType,
pub captured_groups: Vec<String>,
pub position: (usize, usize),
}
#[derive(Debug, Clone, PartialEq)]
pub enum MatchType {
Exact,
Partial,
Fuzzy,
Substring,
Prefix,
Suffix,
}
impl PatternMatcher {
pub fn new() -> Self {
Self {
patterns: Vec::new(),
fuzzy_threshold: 0.7,
cache: std::sync::Mutex::new(HashMap::new()),
}
}
pub fn add_pattern(
&mut self,
id: &str,
pattern: &str,
weight: f64,
description: &str,
) -> MemScopeResult<()> {
let regex = Regex::new(pattern).map_err(|e| {
MemScopeError::error(
"pattern_matcher",
"add_pattern",
format!("Invalid pattern '{}': {}", pattern, e),
)
})?;
let compiled = CompiledPattern {
id: id.to_string(),
regex,
weight,
tags: Vec::new(),
description: description.to_string(),
};
self.patterns.push(compiled);
self.clear_cache();
Ok(())
}
pub fn add_pattern_with_tags(
&mut self,
id: &str,
pattern: &str,
weight: f64,
description: &str,
tags: Vec<String>,
) -> MemScopeResult<()> {
let regex = Regex::new(pattern).map_err(|e| {
MemScopeError::error(
"pattern_matcher",
"add_pattern_with_tags",
format!("Invalid pattern '{}': {}", pattern, e),
)
})?;
let compiled = CompiledPattern {
id: id.to_string(),
regex,
weight,
tags,
description: description.to_string(),
};
self.patterns.push(compiled);
self.clear_cache();
Ok(())
}
pub fn find_matches(&self, input: &str) -> Vec<PatternMatch> {
if let Ok(cache) = self.cache.lock() {
if let Some(cached_matches) = cache.get(input) {
return cached_matches.clone();
}
}
let mut matches = Vec::new();
for pattern in &self.patterns {
if let Some(pattern_match) = self.test_pattern(pattern, input) {
matches.push(pattern_match);
}
}
matches.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
if let Ok(mut cache) = self.cache.lock() {
cache.insert(input.to_string(), matches.clone());
}
matches
}
pub fn find_best_match(&self, input: &str) -> Option<PatternMatch> {
self.find_matches(input).into_iter().next()
}
pub fn find_matches_by_tag(&self, input: &str, tag: &str) -> Vec<PatternMatch> {
let all_matches = self.find_matches(input);
all_matches
.into_iter()
.filter(|m| {
if let Some(pattern) = self.patterns.iter().find(|p| p.id == m.pattern_id) {
pattern.tags.contains(&tag.to_string())
} else {
false
}
})
.collect()
}
fn test_pattern(&self, pattern: &CompiledPattern, input: &str) -> Option<PatternMatch> {
if let Some(regex_match) = pattern.regex.find(input) {
let captured_groups = pattern
.regex
.captures(input)
.map(|caps| {
caps.iter()
.skip(1)
.filter_map(|m| m.map(|m| m.as_str().to_string()))
.collect()
})
.unwrap_or_default();
let match_type = if regex_match.start() == 0 && regex_match.end() == input.len() {
MatchType::Exact
} else if regex_match.start() == 0 {
MatchType::Prefix
} else if regex_match.end() == input.len() {
MatchType::Suffix
} else {
MatchType::Partial
};
let score = self.calculate_score(pattern, input, ®ex_match, &match_type);
return Some(PatternMatch {
pattern_id: pattern.id.clone(),
score,
match_type,
captured_groups,
position: (regex_match.start(), regex_match.end()),
});
}
if self.fuzzy_threshold > 0.0 {
if let Some(fuzzy_match) = self.fuzzy_match(pattern, input) {
return Some(fuzzy_match);
}
}
None
}
fn fuzzy_match(&self, pattern: &CompiledPattern, input: &str) -> Option<PatternMatch> {
let pattern_str = pattern.regex.as_str();
let clean_pattern = self.clean_pattern_for_fuzzy(pattern_str);
let similarity = self.calculate_similarity(&clean_pattern, input);
if similarity >= self.fuzzy_threshold {
Some(PatternMatch {
pattern_id: pattern.id.clone(),
score: similarity * pattern.weight * 0.8, match_type: MatchType::Fuzzy,
captured_groups: Vec::new(),
position: (0, input.len()),
})
} else {
None
}
}
fn calculate_score(
&self,
pattern: &CompiledPattern,
input: &str,
regex_match: ®ex::Match,
match_type: &MatchType,
) -> f64 {
let mut score = pattern.weight;
let type_bonus = match match_type {
MatchType::Exact => 1.0,
MatchType::Prefix => 0.9,
MatchType::Suffix => 0.8,
MatchType::Partial => 0.7,
MatchType::Substring => 0.6,
MatchType::Fuzzy => 0.5,
};
score *= type_bonus;
let coverage = regex_match.len() as f64 / input.len() as f64;
score *= 0.5 + coverage * 0.5;
let position_bonus = 1.0 - (regex_match.start() as f64 / input.len() as f64) * 0.1;
score *= position_bonus;
score.min(1.0)
}
fn clean_pattern_for_fuzzy(&self, pattern: &str) -> String {
pattern
.replace("^", "")
.replace("$", "")
.replace("\\", "")
.replace(".*", "")
.replace(".+", "")
.replace("?", "")
.replace("*", "")
.replace("+", "")
.replace("(", "")
.replace(")", "")
.replace("[", "")
.replace("]", "")
.replace("{", "")
.replace("}", "")
.replace("|", "")
}
fn calculate_similarity(&self, s1: &str, s2: &str) -> f64 {
let len1 = s1.chars().count();
let len2 = s2.chars().count();
if len1 == 0 {
return if len2 == 0 { 1.0 } else { 0.0 };
}
if len2 == 0 {
return 0.0;
}
let s1_chars: Vec<char> = s1.chars().collect();
let s2_chars: Vec<char> = s2.chars().collect();
let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
for (i, row) in matrix.iter_mut().enumerate().take(len1 + 1) {
row[0] = i;
}
for (j, row) in matrix[0].iter_mut().enumerate().take(len2 + 1) {
*row = j;
}
for i in 1..=len1 {
for j in 1..=len2 {
let cost = if s1_chars[i - 1] == s2_chars[j - 1] {
0
} else {
1
};
matrix[i][j] = std::cmp::min(
std::cmp::min(
matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, ),
matrix[i - 1][j - 1] + cost, );
}
}
let distance = matrix[len1][len2];
let max_len = std::cmp::max(len1, len2);
1.0 - (distance as f64 / max_len as f64)
}
pub fn set_fuzzy_threshold(&mut self, threshold: f64) {
self.fuzzy_threshold = threshold.clamp(0.0, 1.0);
self.clear_cache();
}
pub fn get_fuzzy_threshold(&self) -> f64 {
self.fuzzy_threshold
}
pub fn clear_cache(&self) {
if let Ok(mut cache) = self.cache.lock() {
cache.clear();
}
}
pub fn get_stats(&self) -> MemScopeResult<PatternMatcherStats> {
let cache = self.cache.lock().map_err(|e| {
MemScopeError::system(
crate::core::error::SystemErrorType::Locking,
format!("Failed to acquire pattern cache lock: {}", e),
)
})?;
let total_patterns = self.patterns.len();
let cached_inputs = cache.len();
let mut tag_distribution = HashMap::new();
for pattern in &self.patterns {
for tag in &pattern.tags {
*tag_distribution.entry(tag.clone()).or_insert(0) += 1;
}
}
Ok(PatternMatcherStats {
total_patterns,
cached_inputs,
fuzzy_threshold: self.fuzzy_threshold,
tag_distribution,
})
}
pub fn get_pattern_ids(&self) -> Vec<String> {
self.patterns.iter().map(|p| p.id.clone()).collect()
}
pub fn get_pattern(&self, id: &str) -> Option<&CompiledPattern> {
self.patterns.iter().find(|p| p.id == id)
}
pub fn remove_pattern(&mut self, id: &str) -> bool {
let initial_len = self.patterns.len();
self.patterns.retain(|p| p.id != id);
let removed = self.patterns.len() != initial_len;
if removed {
self.clear_cache();
}
removed
}
}
impl Default for PatternMatcher {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct PatternMatcherStats {
pub total_patterns: usize,
pub cached_inputs: usize,
pub fuzzy_threshold: f64,
pub tag_distribution: HashMap<String, usize>,
}
pub struct PatternMatcherBuilder {
matcher: PatternMatcher,
}
impl PatternMatcherBuilder {
pub fn new() -> Self {
Self {
matcher: PatternMatcher::new(),
}
}
pub fn with_rust_patterns(mut self) -> MemScopeResult<Self> {
self.matcher.add_pattern_with_tags(
"primitives",
r"^(i8|i16|i32|i64|i128|isize|u8|u16|u32|u64|u128|usize|f32|f64|bool|char)$",
1.0,
"Rust primitive types",
vec!["rust".to_string(), "primitive".to_string()],
)?;
self.matcher.add_pattern_with_tags(
"strings",
r"^(String|&str|str)$",
1.0,
"Rust string types",
vec!["rust".to_string(), "string".to_string()],
)?;
self.matcher.add_pattern_with_tags(
"collections",
r"^(Vec|HashMap|BTreeMap|HashSet|BTreeSet|VecDeque|LinkedList)<",
0.9,
"Rust collection types",
vec!["rust".to_string(), "collection".to_string()],
)?;
self.matcher.add_pattern_with_tags(
"smart_pointers",
r"^(Box|Arc|Rc|Weak)<",
0.9,
"Rust smart pointer types",
vec!["rust".to_string(), "smart_pointer".to_string()],
)?;
Ok(self)
}
pub fn fuzzy_threshold(mut self, threshold: f64) -> Self {
self.matcher.set_fuzzy_threshold(threshold);
self
}
pub fn build(self) -> PatternMatcher {
self.matcher
}
}
impl Default for PatternMatcherBuilder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exact_match() {
let mut matcher = PatternMatcher::new();
matcher
.add_pattern("vec", r"^Vec<", 1.0, "Vector pattern")
.unwrap();
let matches = matcher.find_matches("Vec<i32>");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].match_type, MatchType::Prefix);
}
#[test]
fn test_fuzzy_matching() {
let mut matcher = PatternMatcher::new();
matcher.set_fuzzy_threshold(0.6);
matcher
.add_pattern("vector", r"Vector", 1.0, "Vector pattern")
.unwrap();
let matches = matcher.find_matches("Vektor"); assert_eq!(matches.len(), 1);
assert_eq!(matches[0].match_type, MatchType::Fuzzy);
}
#[test]
fn test_pattern_with_tags() {
let mut matcher = PatternMatcher::new();
matcher
.add_pattern_with_tags(
"rust_vec",
r"^Vec<",
1.0,
"Rust vector",
vec!["rust".to_string(), "collection".to_string()],
)
.unwrap();
let matches = matcher.find_matches_by_tag("Vec<i32>", "rust");
assert_eq!(matches.len(), 1);
let matches = matcher.find_matches_by_tag("Vec<i32>", "java");
assert_eq!(matches.len(), 0);
}
#[test]
fn test_builder_with_rust_patterns() {
let matcher = PatternMatcherBuilder::new()
.with_rust_patterns()
.unwrap()
.fuzzy_threshold(0.8)
.build();
let matches = matcher.find_matches("Vec<i32>");
assert!(!matches.is_empty());
let matches = matcher.find_matches("i32");
assert!(!matches.is_empty());
}
#[test]
fn test_similarity_calculation() {
let matcher = PatternMatcher::new();
assert_eq!(matcher.calculate_similarity("hello", "hello"), 1.0);
assert_eq!(matcher.calculate_similarity("hello", ""), 0.0);
assert_eq!(matcher.calculate_similarity("", "hello"), 0.0);
assert_eq!(matcher.calculate_similarity("", ""), 1.0);
let sim = matcher.calculate_similarity("hello", "hallo");
assert!(sim > 0.5 && sim < 1.0);
}
#[test]
fn test_cache_functionality() {
let mut matcher = PatternMatcher::new();
matcher
.add_pattern("test", r"test", 1.0, "Test pattern")
.unwrap();
let matches1 = matcher.find_matches("test");
let matches2 = matcher.find_matches("test");
assert_eq!(matches1.len(), matches2.len());
assert_eq!(matches1[0].pattern_id, matches2[0].pattern_id);
}
#[test]
fn test_pattern_management() {
let mut matcher = PatternMatcher::new();
matcher
.add_pattern("test1", r"test1", 1.0, "Test pattern 1")
.unwrap();
matcher
.add_pattern("test2", r"test2", 1.0, "Test pattern 2")
.unwrap();
assert_eq!(matcher.get_pattern_ids().len(), 2);
assert!(matcher.remove_pattern("test1"));
assert_eq!(matcher.get_pattern_ids().len(), 1);
assert!(!matcher.remove_pattern("nonexistent"));
}
}