use regex::Regex;
use std::collections::HashSet;
use std::path::Path;
use thiserror::Error;
use tracing::{debug, info, warn};
#[derive(Error, Debug)]
pub enum PatternError {
#[error("Invalid regex pattern: {0}")]
InvalidRegex(#[from] regex::Error),
#[error("Invalid content key format: {0}")]
InvalidContentKey(String),
#[error("Invalid encoding key format: {0}")]
InvalidEncodingKey(String),
#[error("Pattern type could not be determined: {0}")]
UnknownPattern(String),
}
#[derive(Debug, Clone)]
pub enum PatternType {
Glob(String),
Regex(Regex),
ContentKey(String),
EncodingKey(String),
FilePath(String),
}
impl PartialEq for PatternType {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(PatternType::Glob(a), PatternType::Glob(b)) => a == b,
(PatternType::Regex(a), PatternType::Regex(b)) => a.as_str() == b.as_str(),
(PatternType::ContentKey(a), PatternType::ContentKey(b)) => a == b,
(PatternType::EncodingKey(a), PatternType::EncodingKey(b)) => a == b,
(PatternType::FilePath(a), PatternType::FilePath(b)) => a == b,
_ => false,
}
}
}
#[derive(Debug, Clone)]
pub struct PatternConfig {
pub case_sensitive: bool,
pub max_matches_per_pattern: Option<usize>,
pub include_directories: bool,
pub priority_extensions: Vec<String>,
}
impl Default for PatternConfig {
fn default() -> Self {
Self {
case_sensitive: false,
max_matches_per_pattern: None,
include_directories: false,
priority_extensions: vec!["dbc".to_string(), "db2".to_string(), "lua".to_string()],
}
}
}
#[derive(Debug)]
pub struct CompiledPattern {
pub pattern_type: PatternType,
pub original: String,
pub config: PatternConfig,
}
#[derive(Debug, Clone)]
pub struct PatternMatch {
pub file_path: String,
pub pattern: String,
pub metadata: MatchMetadata,
}
#[derive(Debug, Clone, Default)]
pub struct MatchMetadata {
pub file_size: Option<u64>,
pub content_key: Option<String>,
pub encoding_key: Option<String>,
pub file_type: Option<String>,
pub priority_score: u32,
}
pub struct PatternExtractor {
config: PatternConfig,
compiled_patterns: Vec<CompiledPattern>,
}
impl PatternExtractor {
pub fn new() -> Self {
Self {
config: PatternConfig::default(),
compiled_patterns: Vec::new(),
}
}
pub fn with_config(config: PatternConfig) -> Self {
Self {
config,
compiled_patterns: Vec::new(),
}
}
pub fn add_pattern(&mut self, pattern: &str) -> Result<(), PatternError> {
let pattern_type = self.detect_pattern_type(pattern)?;
let compiled = CompiledPattern {
pattern_type,
original: pattern.to_string(),
config: self.config.clone(),
};
info!("Added pattern: {} -> {:?}", pattern, compiled.pattern_type);
self.compiled_patterns.push(compiled);
Ok(())
}
pub fn add_patterns(&mut self, patterns: &[String]) -> Result<(), PatternError> {
for pattern in patterns {
self.add_pattern(pattern)?;
}
Ok(())
}
fn detect_pattern_type(&self, pattern: &str) -> Result<PatternType, PatternError> {
if pattern.starts_with('/') && pattern.ends_with('/') && pattern.len() > 2 {
let regex_str = &pattern[1..pattern.len() - 1];
let regex = if self.config.case_sensitive {
Regex::new(regex_str)?
} else {
Regex::new(&format!("(?i){regex_str}"))?
};
return Ok(PatternType::Regex(regex));
}
if pattern.len() == 32 && pattern.chars().all(|c| c.is_ascii_hexdigit()) {
return Ok(PatternType::ContentKey(pattern.to_lowercase()));
}
if pattern.len() == 18 && pattern.chars().all(|c| c.is_ascii_hexdigit()) {
return Ok(PatternType::EncodingKey(pattern.to_lowercase()));
}
if pattern.contains('*')
|| pattern.contains('?')
|| pattern.contains('[')
|| pattern.contains('{')
{
return Ok(PatternType::Glob(pattern.to_string()));
}
Ok(PatternType::FilePath(pattern.to_string()))
}
pub fn match_files(&self, file_paths: &[String]) -> Vec<PatternMatch> {
let mut matches = Vec::new();
let mut seen_files = HashSet::new();
info!(
"Matching {} patterns against {} files",
self.compiled_patterns.len(),
file_paths.len()
);
for compiled_pattern in &self.compiled_patterns {
let pattern_matches = self.match_pattern(compiled_pattern, file_paths);
debug!(
"Pattern '{}' matched {} files",
compiled_pattern.original,
pattern_matches.len()
);
let mut added_for_pattern = 0;
for mut pattern_match in pattern_matches {
if seen_files.contains(&pattern_match.file_path) {
continue;
}
if let Some(limit) = compiled_pattern.config.max_matches_per_pattern {
if added_for_pattern >= limit {
debug!(
"Reached limit of {} matches for pattern '{}'",
limit, compiled_pattern.original
);
break;
}
}
pattern_match.metadata.priority_score = self.calculate_priority(&pattern_match);
seen_files.insert(pattern_match.file_path.clone());
matches.push(pattern_match);
added_for_pattern += 1;
}
}
matches.sort_by(|a, b| b.metadata.priority_score.cmp(&a.metadata.priority_score));
info!("Total matches found: {}", matches.len());
matches
}
fn match_pattern(
&self,
compiled_pattern: &CompiledPattern,
file_paths: &[String],
) -> Vec<PatternMatch> {
match &compiled_pattern.pattern_type {
PatternType::Glob(glob_pattern) => {
self.match_glob_pattern(glob_pattern, file_paths, &compiled_pattern.original)
}
PatternType::Regex(regex) => {
self.match_regex_pattern(regex, file_paths, &compiled_pattern.original)
}
PatternType::ContentKey(ckey) => {
self.match_content_key(ckey, &compiled_pattern.original)
}
PatternType::EncodingKey(ekey) => {
self.match_encoding_key(ekey, &compiled_pattern.original)
}
PatternType::FilePath(path) => {
self.match_file_path(path, file_paths, &compiled_pattern.original)
}
}
}
fn match_glob_pattern(
&self,
glob_pattern: &str,
file_paths: &[String],
original: &str,
) -> Vec<PatternMatch> {
let mut matches = Vec::new();
let regex_pattern = self.glob_to_regex(glob_pattern);
let regex = match Regex::new(®ex_pattern) {
Ok(r) => r,
Err(e) => {
warn!(
"Failed to compile glob pattern '{}' to regex: {}",
glob_pattern, e
);
return matches;
}
};
for file_path in file_paths {
let test_path = if self.config.case_sensitive {
file_path.clone()
} else {
file_path.to_lowercase()
};
if regex.is_match(&test_path) {
matches.push(PatternMatch {
file_path: file_path.clone(),
pattern: original.to_string(),
metadata: self.create_metadata_for_file(file_path),
});
}
}
matches
}
fn match_regex_pattern(
&self,
regex: &Regex,
file_paths: &[String],
original: &str,
) -> Vec<PatternMatch> {
let mut matches = Vec::new();
for file_path in file_paths {
if regex.is_match(file_path) {
matches.push(PatternMatch {
file_path: file_path.clone(),
pattern: original.to_string(),
metadata: self.create_metadata_for_file(file_path),
});
}
}
matches
}
fn match_content_key(&self, _ckey: &str, original: &str) -> Vec<PatternMatch> {
vec![PatternMatch {
file_path: format!("content_key_{_ckey}.data"),
pattern: original.to_string(),
metadata: MatchMetadata {
content_key: Some(_ckey.to_string()),
priority_score: 100, ..Default::default()
},
}]
}
fn match_encoding_key(&self, _ekey: &str, original: &str) -> Vec<PatternMatch> {
vec![PatternMatch {
file_path: format!("encoding_key_{_ekey}.data"),
pattern: original.to_string(),
metadata: MatchMetadata {
encoding_key: Some(_ekey.to_string()),
priority_score: 90, ..Default::default()
},
}]
}
fn match_file_path(
&self,
target_path: &str,
file_paths: &[String],
original: &str,
) -> Vec<PatternMatch> {
let mut matches = Vec::new();
let normalized_target = self.normalize_path(target_path);
for file_path in file_paths {
let normalized_file = self.normalize_path(file_path);
if normalized_target == normalized_file {
matches.push(PatternMatch {
file_path: file_path.clone(),
pattern: original.to_string(),
metadata: self.create_metadata_for_file(file_path),
});
}
}
matches
}
fn glob_to_regex(&self, glob: &str) -> String {
let mut regex = String::new();
let mut chars = glob.chars().peekable();
regex.push('^');
while let Some(ch) = chars.next() {
match ch {
'*' => {
if chars.peek() == Some(&'*') {
chars.next(); if chars.peek() == Some(&'/') {
chars.next(); regex.push_str("(?:[^/]+/)*"); } else {
regex.push_str(".*"); }
} else {
regex.push_str("[^/]*"); }
}
'?' => regex.push_str("[^/]"),
'[' => {
regex.push('[');
for ch in chars.by_ref() {
regex.push(ch);
if ch == ']' {
break;
}
}
}
'{' => {
regex.push('(');
for ch in chars.by_ref() {
if ch == '}' {
break;
} else if ch == ',' {
regex.push('|');
} else {
if "^$()[]{}|+.\\".contains(ch) {
regex.push('\\');
}
regex.push(ch);
}
}
regex.push(')');
}
ch if "^$()[]{}|+.\\".contains(ch) => {
regex.push('\\');
regex.push(ch);
}
ch => regex.push(ch),
}
}
regex.push('$');
if !self.config.case_sensitive {
format!("(?i){regex}")
} else {
regex
}
}
fn normalize_path(&self, path: &str) -> String {
let mut normalized = path.replace('\\', "/");
if !self.config.case_sensitive {
normalized = normalized.to_lowercase();
}
normalized
}
fn create_metadata_for_file(&self, file_path: &str) -> MatchMetadata {
let file_type = Path::new(file_path)
.extension()
.and_then(|ext| ext.to_str())
.map(|ext| ext.to_lowercase());
MatchMetadata {
file_type,
..Default::default()
}
}
fn calculate_priority(&self, pattern_match: &PatternMatch) -> u32 {
let mut score = 10;
if let Some(file_type) = &pattern_match.metadata.file_type {
if self.config.priority_extensions.contains(file_type) {
score += 50;
}
score += match file_type.as_str() {
"dbc" | "db2" => 40, "lua" | "xml" => 30, "ogg" | "mp3" => 20, "blp" | "tga" => 20, "m2" | "wmo" => 25, _ => 0,
};
}
if pattern_match.metadata.content_key.is_some() {
score += 100;
}
if pattern_match.metadata.encoding_key.is_some() {
score += 90;
}
score
}
pub fn get_stats(&self) -> PatternStats {
let mut stats = PatternStats::default();
for pattern in &self.compiled_patterns {
match &pattern.pattern_type {
PatternType::Glob(_) => stats.glob_patterns += 1,
PatternType::Regex(_) => stats.regex_patterns += 1,
PatternType::ContentKey(_) => stats.content_keys += 1,
PatternType::EncodingKey(_) => stats.encoding_keys += 1,
PatternType::FilePath(_) => stats.file_paths += 1,
}
}
stats.total_patterns = self.compiled_patterns.len();
stats
}
}
impl Default for PatternExtractor {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Default)]
pub struct PatternStats {
pub total_patterns: usize,
pub glob_patterns: usize,
pub regex_patterns: usize,
pub content_keys: usize,
pub encoding_keys: usize,
pub file_paths: usize,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_pattern_detection() {
let extractor = PatternExtractor::new();
assert!(matches!(
extractor.detect_pattern_type("*.dbc").unwrap(),
PatternType::Glob(_)
));
assert!(matches!(
extractor.detect_pattern_type("interface/**/*.lua").unwrap(),
PatternType::Glob(_)
));
assert!(matches!(
extractor.detect_pattern_type("/sound/.*\\.ogg$/").unwrap(),
PatternType::Regex(_)
));
assert!(matches!(
extractor
.detect_pattern_type("0123456789abcdef0123456789abcdef")
.unwrap(),
PatternType::ContentKey(_)
));
assert!(matches!(
extractor.detect_pattern_type("0123456789abcdef01").unwrap(),
PatternType::EncodingKey(_)
));
assert!(matches!(
extractor
.detect_pattern_type("world/maps/azeroth/azeroth.wdt")
.unwrap(),
PatternType::FilePath(_)
));
}
#[test]
fn test_glob_matching() {
let mut extractor = PatternExtractor::new();
extractor.add_pattern("*.dbc").unwrap();
let files = vec![
"achievement.dbc".to_string(),
"spell.dbc".to_string(),
"item.db2".to_string(),
"interface/framexml/uiparent.lua".to_string(),
];
let matches = extractor.match_files(&files);
assert_eq!(matches.len(), 2);
assert!(matches.iter().any(|m| m.file_path == "achievement.dbc"));
assert!(matches.iter().any(|m| m.file_path == "spell.dbc"));
}
#[test]
fn test_regex_matching() {
let mut extractor = PatternExtractor::new();
extractor.add_pattern("/.*\\.lua$/").unwrap();
let files = vec![
"interface/framexml/uiparent.lua".to_string(),
"scripts/addon.lua".to_string(),
"spell.dbc".to_string(),
];
let matches = extractor.match_files(&files);
assert_eq!(matches.len(), 2); }
#[test]
fn test_glob_to_regex_conversion() {
let extractor = PatternExtractor::new();
assert_eq!(extractor.glob_to_regex("*.dbc"), "(?i)^[^/]*\\.dbc$");
assert_eq!(extractor.glob_to_regex("test?.txt"), "(?i)^test[^/]\\.txt$");
assert_eq!(
extractor.glob_to_regex("**/*.lua"),
"(?i)^(?:[^/]+/)*[^/]*\\.lua$"
);
}
#[test]
fn test_priority_calculation() {
let extractor = PatternExtractor::new();
let dbc_match = PatternMatch {
file_path: "spell.dbc".to_string(),
pattern: "*.dbc".to_string(),
metadata: MatchMetadata {
file_type: Some("dbc".to_string()),
..Default::default()
},
};
let score = extractor.calculate_priority(&dbc_match);
assert!(score > 50); }
}