1use regex::Regex;
4use std::collections::HashSet;
5use std::path::Path;
6use thiserror::Error;
7use tracing::{debug, info, warn};
8
9#[derive(Error, Debug)]
10pub enum PatternError {
11 #[error("Invalid regex pattern: {0}")]
12 InvalidRegex(#[from] regex::Error),
13 #[error("Invalid content key format: {0}")]
14 InvalidContentKey(String),
15 #[error("Invalid encoding key format: {0}")]
16 InvalidEncodingKey(String),
17 #[error("Pattern type could not be determined: {0}")]
18 UnknownPattern(String),
19}
20
21#[derive(Debug, Clone)]
23pub enum PatternType {
24 Glob(String),
26 Regex(Regex),
28 ContentKey(String),
30 EncodingKey(String),
32 FilePath(String),
34}
35
36impl PartialEq for PatternType {
37 fn eq(&self, other: &Self) -> bool {
38 match (self, other) {
39 (PatternType::Glob(a), PatternType::Glob(b)) => a == b,
40 (PatternType::Regex(a), PatternType::Regex(b)) => a.as_str() == b.as_str(),
41 (PatternType::ContentKey(a), PatternType::ContentKey(b)) => a == b,
42 (PatternType::EncodingKey(a), PatternType::EncodingKey(b)) => a == b,
43 (PatternType::FilePath(a), PatternType::FilePath(b)) => a == b,
44 _ => false,
45 }
46 }
47}
48
49#[derive(Debug, Clone)]
51pub struct PatternConfig {
52 pub case_sensitive: bool,
54 pub max_matches_per_pattern: Option<usize>,
56 pub include_directories: bool,
58 pub priority_extensions: Vec<String>,
60}
61
62impl Default for PatternConfig {
63 fn default() -> Self {
64 Self {
65 case_sensitive: false,
66 max_matches_per_pattern: None,
67 include_directories: false,
68 priority_extensions: vec!["dbc".to_string(), "db2".to_string(), "lua".to_string()],
69 }
70 }
71}
72
73#[derive(Debug)]
75pub struct CompiledPattern {
76 pub pattern_type: PatternType,
77 pub original: String,
78 pub config: PatternConfig,
79}
80
81#[derive(Debug, Clone)]
83pub struct PatternMatch {
84 pub file_path: String,
86 pub pattern: String,
88 pub metadata: MatchMetadata,
90}
91
92#[derive(Debug, Clone, Default)]
94pub struct MatchMetadata {
95 pub file_size: Option<u64>,
97 pub content_key: Option<String>,
99 pub encoding_key: Option<String>,
101 pub file_type: Option<String>,
103 pub priority_score: u32,
105}
106
107pub struct PatternExtractor {
109 config: PatternConfig,
110 compiled_patterns: Vec<CompiledPattern>,
111}
112
113impl PatternExtractor {
114 pub fn new() -> Self {
116 Self {
117 config: PatternConfig::default(),
118 compiled_patterns: Vec::new(),
119 }
120 }
121
122 pub fn with_config(config: PatternConfig) -> Self {
124 Self {
125 config,
126 compiled_patterns: Vec::new(),
127 }
128 }
129
130 pub fn add_pattern(&mut self, pattern: &str) -> Result<(), PatternError> {
132 let pattern_type = self.detect_pattern_type(pattern)?;
133 let compiled = CompiledPattern {
134 pattern_type,
135 original: pattern.to_string(),
136 config: self.config.clone(),
137 };
138
139 info!("Added pattern: {} -> {:?}", pattern, compiled.pattern_type);
140 self.compiled_patterns.push(compiled);
141 Ok(())
142 }
143
144 pub fn add_patterns(&mut self, patterns: &[String]) -> Result<(), PatternError> {
146 for pattern in patterns {
147 self.add_pattern(pattern)?;
148 }
149 Ok(())
150 }
151
152 fn detect_pattern_type(&self, pattern: &str) -> Result<PatternType, PatternError> {
154 if pattern.starts_with('/') && pattern.ends_with('/') && pattern.len() > 2 {
156 let regex_str = &pattern[1..pattern.len() - 1];
157 let regex = if self.config.case_sensitive {
158 Regex::new(regex_str)?
159 } else {
160 Regex::new(&format!("(?i){regex_str}"))?
161 };
162 return Ok(PatternType::Regex(regex));
163 }
164
165 if pattern.len() == 32 && pattern.chars().all(|c| c.is_ascii_hexdigit()) {
167 return Ok(PatternType::ContentKey(pattern.to_lowercase()));
168 }
169
170 if pattern.len() == 18 && pattern.chars().all(|c| c.is_ascii_hexdigit()) {
172 return Ok(PatternType::EncodingKey(pattern.to_lowercase()));
173 }
174
175 if pattern.contains('*')
177 || pattern.contains('?')
178 || pattern.contains('[')
179 || pattern.contains('{')
180 {
181 return Ok(PatternType::Glob(pattern.to_string()));
182 }
183
184 Ok(PatternType::FilePath(pattern.to_string()))
186 }
187
188 pub fn match_files(&self, file_paths: &[String]) -> Vec<PatternMatch> {
190 let mut matches = Vec::new();
191 let mut seen_files = HashSet::new();
192
193 info!(
194 "Matching {} patterns against {} files",
195 self.compiled_patterns.len(),
196 file_paths.len()
197 );
198
199 for compiled_pattern in &self.compiled_patterns {
200 let pattern_matches = self.match_pattern(compiled_pattern, file_paths);
201
202 debug!(
203 "Pattern '{}' matched {} files",
204 compiled_pattern.original,
205 pattern_matches.len()
206 );
207
208 let mut added_for_pattern = 0;
210 for mut pattern_match in pattern_matches {
211 if seen_files.contains(&pattern_match.file_path) {
212 continue;
213 }
214
215 if let Some(limit) = compiled_pattern.config.max_matches_per_pattern {
217 if added_for_pattern >= limit {
218 debug!(
219 "Reached limit of {} matches for pattern '{}'",
220 limit, compiled_pattern.original
221 );
222 break;
223 }
224 }
225
226 pattern_match.metadata.priority_score = self.calculate_priority(&pattern_match);
228
229 seen_files.insert(pattern_match.file_path.clone());
230 matches.push(pattern_match);
231 added_for_pattern += 1;
232 }
233 }
234
235 matches.sort_by(|a, b| b.metadata.priority_score.cmp(&a.metadata.priority_score));
237
238 info!("Total matches found: {}", matches.len());
239 matches
240 }
241
242 fn match_pattern(
244 &self,
245 compiled_pattern: &CompiledPattern,
246 file_paths: &[String],
247 ) -> Vec<PatternMatch> {
248 match &compiled_pattern.pattern_type {
249 PatternType::Glob(glob_pattern) => {
250 self.match_glob_pattern(glob_pattern, file_paths, &compiled_pattern.original)
251 }
252 PatternType::Regex(regex) => {
253 self.match_regex_pattern(regex, file_paths, &compiled_pattern.original)
254 }
255 PatternType::ContentKey(ckey) => {
256 self.match_content_key(ckey, &compiled_pattern.original)
257 }
258 PatternType::EncodingKey(ekey) => {
259 self.match_encoding_key(ekey, &compiled_pattern.original)
260 }
261 PatternType::FilePath(path) => {
262 self.match_file_path(path, file_paths, &compiled_pattern.original)
263 }
264 }
265 }
266
267 fn match_glob_pattern(
269 &self,
270 glob_pattern: &str,
271 file_paths: &[String],
272 original: &str,
273 ) -> Vec<PatternMatch> {
274 let mut matches = Vec::new();
275
276 let regex_pattern = self.glob_to_regex(glob_pattern);
278 let regex = match Regex::new(®ex_pattern) {
279 Ok(r) => r,
280 Err(e) => {
281 warn!(
282 "Failed to compile glob pattern '{}' to regex: {}",
283 glob_pattern, e
284 );
285 return matches;
286 }
287 };
288
289 for file_path in file_paths {
290 let test_path = if self.config.case_sensitive {
291 file_path.clone()
292 } else {
293 file_path.to_lowercase()
294 };
295
296 if regex.is_match(&test_path) {
297 matches.push(PatternMatch {
298 file_path: file_path.clone(),
299 pattern: original.to_string(),
300 metadata: self.create_metadata_for_file(file_path),
301 });
302 }
303 }
304
305 matches
306 }
307
308 fn match_regex_pattern(
310 &self,
311 regex: &Regex,
312 file_paths: &[String],
313 original: &str,
314 ) -> Vec<PatternMatch> {
315 let mut matches = Vec::new();
316
317 for file_path in file_paths {
318 if regex.is_match(file_path) {
319 matches.push(PatternMatch {
320 file_path: file_path.clone(),
321 pattern: original.to_string(),
322 metadata: self.create_metadata_for_file(file_path),
323 });
324 }
325 }
326
327 matches
328 }
329
330 fn match_content_key(&self, _ckey: &str, original: &str) -> Vec<PatternMatch> {
332 vec![PatternMatch {
335 file_path: format!("content_key_{_ckey}.data"),
336 pattern: original.to_string(),
337 metadata: MatchMetadata {
338 content_key: Some(_ckey.to_string()),
339 priority_score: 100, ..Default::default()
341 },
342 }]
343 }
344
345 fn match_encoding_key(&self, _ekey: &str, original: &str) -> Vec<PatternMatch> {
347 vec![PatternMatch {
350 file_path: format!("encoding_key_{_ekey}.data"),
351 pattern: original.to_string(),
352 metadata: MatchMetadata {
353 encoding_key: Some(_ekey.to_string()),
354 priority_score: 90, ..Default::default()
356 },
357 }]
358 }
359
360 fn match_file_path(
362 &self,
363 target_path: &str,
364 file_paths: &[String],
365 original: &str,
366 ) -> Vec<PatternMatch> {
367 let mut matches = Vec::new();
368
369 let normalized_target = self.normalize_path(target_path);
370
371 for file_path in file_paths {
372 let normalized_file = self.normalize_path(file_path);
373
374 if normalized_target == normalized_file {
375 matches.push(PatternMatch {
376 file_path: file_path.clone(),
377 pattern: original.to_string(),
378 metadata: self.create_metadata_for_file(file_path),
379 });
380 }
381 }
382
383 matches
384 }
385
386 fn glob_to_regex(&self, glob: &str) -> String {
388 let mut regex = String::new();
389 let mut chars = glob.chars().peekable();
390
391 regex.push('^');
392
393 while let Some(ch) = chars.next() {
394 match ch {
395 '*' => {
396 if chars.peek() == Some(&'*') {
397 chars.next(); if chars.peek() == Some(&'/') {
399 chars.next(); regex.push_str("(?:[^/]+/)*"); } else {
402 regex.push_str(".*"); }
404 } else {
405 regex.push_str("[^/]*"); }
407 }
408 '?' => regex.push_str("[^/]"),
409 '[' => {
410 regex.push('[');
411 for ch in chars.by_ref() {
413 regex.push(ch);
414 if ch == ']' {
415 break;
416 }
417 }
418 }
419 '{' => {
420 regex.push('(');
422 for ch in chars.by_ref() {
423 if ch == '}' {
424 break;
425 } else if ch == ',' {
426 regex.push('|');
427 } else {
428 if "^$()[]{}|+.\\".contains(ch) {
429 regex.push('\\');
430 }
431 regex.push(ch);
432 }
433 }
434 regex.push(')');
435 }
436 ch if "^$()[]{}|+.\\".contains(ch) => {
438 regex.push('\\');
439 regex.push(ch);
440 }
441 ch => regex.push(ch),
442 }
443 }
444
445 regex.push('$');
446
447 if !self.config.case_sensitive {
448 format!("(?i){regex}")
449 } else {
450 regex
451 }
452 }
453
454 fn normalize_path(&self, path: &str) -> String {
456 let mut normalized = path.replace('\\', "/");
457 if !self.config.case_sensitive {
458 normalized = normalized.to_lowercase();
459 }
460 normalized
461 }
462
463 fn create_metadata_for_file(&self, file_path: &str) -> MatchMetadata {
465 let file_type = Path::new(file_path)
466 .extension()
467 .and_then(|ext| ext.to_str())
468 .map(|ext| ext.to_lowercase());
469
470 MatchMetadata {
471 file_type,
472 ..Default::default()
473 }
474 }
475
476 fn calculate_priority(&self, pattern_match: &PatternMatch) -> u32 {
478 let mut score = 10; if let Some(file_type) = &pattern_match.metadata.file_type {
482 if self.config.priority_extensions.contains(file_type) {
483 score += 50;
484 }
485
486 score += match file_type.as_str() {
488 "dbc" | "db2" => 40, "lua" | "xml" => 30, "ogg" | "mp3" => 20, "blp" | "tga" => 20, "m2" | "wmo" => 25, _ => 0,
494 };
495 }
496
497 if pattern_match.metadata.content_key.is_some() {
499 score += 100;
500 }
501 if pattern_match.metadata.encoding_key.is_some() {
502 score += 90;
503 }
504
505 score
506 }
507
508 pub fn get_stats(&self) -> PatternStats {
510 let mut stats = PatternStats::default();
511
512 for pattern in &self.compiled_patterns {
513 match &pattern.pattern_type {
514 PatternType::Glob(_) => stats.glob_patterns += 1,
515 PatternType::Regex(_) => stats.regex_patterns += 1,
516 PatternType::ContentKey(_) => stats.content_keys += 1,
517 PatternType::EncodingKey(_) => stats.encoding_keys += 1,
518 PatternType::FilePath(_) => stats.file_paths += 1,
519 }
520 }
521
522 stats.total_patterns = self.compiled_patterns.len();
523 stats
524 }
525}
526
527impl Default for PatternExtractor {
528 fn default() -> Self {
529 Self::new()
530 }
531}
532
533#[derive(Debug, Default)]
535pub struct PatternStats {
536 pub total_patterns: usize,
537 pub glob_patterns: usize,
538 pub regex_patterns: usize,
539 pub content_keys: usize,
540 pub encoding_keys: usize,
541 pub file_paths: usize,
542}
543
544#[cfg(test)]
545mod tests {
546 use super::*;
547
548 #[test]
549 fn test_pattern_detection() {
550 let extractor = PatternExtractor::new();
551
552 assert!(matches!(
554 extractor.detect_pattern_type("*.dbc").unwrap(),
555 PatternType::Glob(_)
556 ));
557 assert!(matches!(
558 extractor.detect_pattern_type("interface/**/*.lua").unwrap(),
559 PatternType::Glob(_)
560 ));
561
562 assert!(matches!(
564 extractor.detect_pattern_type("/sound/.*\\.ogg$/").unwrap(),
565 PatternType::Regex(_)
566 ));
567
568 assert!(matches!(
570 extractor
571 .detect_pattern_type("0123456789abcdef0123456789abcdef")
572 .unwrap(),
573 PatternType::ContentKey(_)
574 ));
575
576 assert!(matches!(
578 extractor.detect_pattern_type("0123456789abcdef01").unwrap(),
579 PatternType::EncodingKey(_)
580 ));
581
582 assert!(matches!(
584 extractor
585 .detect_pattern_type("world/maps/azeroth/azeroth.wdt")
586 .unwrap(),
587 PatternType::FilePath(_)
588 ));
589 }
590
591 #[test]
592 fn test_glob_matching() {
593 let mut extractor = PatternExtractor::new();
594 extractor.add_pattern("*.dbc").unwrap();
595
596 let files = vec![
597 "achievement.dbc".to_string(),
598 "spell.dbc".to_string(),
599 "item.db2".to_string(),
600 "interface/framexml/uiparent.lua".to_string(),
601 ];
602
603 let matches = extractor.match_files(&files);
604 assert_eq!(matches.len(), 2); assert!(matches.iter().any(|m| m.file_path == "achievement.dbc"));
607 assert!(matches.iter().any(|m| m.file_path == "spell.dbc"));
608 }
609
610 #[test]
611 fn test_regex_matching() {
612 let mut extractor = PatternExtractor::new();
613 extractor.add_pattern("/.*\\.lua$/").unwrap();
614
615 let files = vec![
616 "interface/framexml/uiparent.lua".to_string(),
617 "scripts/addon.lua".to_string(),
618 "spell.dbc".to_string(),
619 ];
620
621 let matches = extractor.match_files(&files);
622 assert_eq!(matches.len(), 2); }
624
625 #[test]
626 fn test_glob_to_regex_conversion() {
627 let extractor = PatternExtractor::new();
628
629 assert_eq!(extractor.glob_to_regex("*.dbc"), "(?i)^[^/]*\\.dbc$");
630 assert_eq!(extractor.glob_to_regex("test?.txt"), "(?i)^test[^/]\\.txt$");
631 assert_eq!(
632 extractor.glob_to_regex("**/*.lua"),
633 "(?i)^(?:[^/]+/)*[^/]*\\.lua$"
634 );
635 }
636
637 #[test]
638 fn test_priority_calculation() {
639 let extractor = PatternExtractor::new();
640
641 let dbc_match = PatternMatch {
642 file_path: "spell.dbc".to_string(),
643 pattern: "*.dbc".to_string(),
644 metadata: MatchMetadata {
645 file_type: Some("dbc".to_string()),
646 ..Default::default()
647 },
648 };
649
650 let score = extractor.calculate_priority(&dbc_match);
651 assert!(score > 50); }
653}