threatflux_binary_analysis/utils/
patterns.rs

1//! Pattern matching utilities for binary analysis
2//!
3//! This module provides advanced pattern matching capabilities for identifying
4//! specific byte sequences, strings, and structural patterns in binary data.
5
6use crate::{BinaryError, Result};
7use std::collections::HashMap;
8
9/// Pattern matcher for binary data
10pub struct PatternMatcher {
11    patterns: Vec<Pattern>,
12    config: MatchConfig,
13}
14
15/// Pattern matching configuration
16#[derive(Debug, Clone)]
17pub struct MatchConfig {
18    /// Case sensitive string matching
19    pub case_sensitive: bool,
20    /// Maximum number of matches to find
21    pub max_matches: usize,
22    /// Enable wildcard matching
23    pub enable_wildcards: bool,
24    /// Minimum pattern length
25    pub min_pattern_length: usize,
26}
27
28impl Default for MatchConfig {
29    fn default() -> Self {
30        Self {
31            case_sensitive: true,
32            max_matches: 1000,
33            enable_wildcards: true,
34            min_pattern_length: 3,
35        }
36    }
37}
38
39/// A pattern to search for
40#[derive(Debug, Clone)]
41pub struct Pattern {
42    /// Pattern name/identifier
43    pub name: String,
44    /// Pattern type
45    pub pattern_type: PatternType,
46    /// Pattern data
47    pub data: PatternData,
48    /// Pattern category
49    pub category: PatternCategory,
50    /// Description
51    pub description: String,
52}
53
54/// Types of patterns
55#[derive(Debug, Clone, PartialEq, Eq)]
56pub enum PatternType {
57    /// Exact byte sequence
58    Bytes,
59    /// String pattern
60    String,
61    /// Regular expression
62    Regex,
63    /// Hex pattern with wildcards
64    HexWildcard,
65    /// Magic signature
66    Magic,
67    /// Structural pattern
68    Structural,
69}
70
71/// Pattern data
72#[derive(Debug, Clone)]
73pub enum PatternData {
74    /// Raw bytes
75    Bytes(Vec<u8>),
76    /// String data
77    String(String),
78    /// Hex pattern with wildcards (? for wildcard)
79    HexWildcard(String),
80    /// Regular expression pattern
81    Regex(String),
82}
83
84/// Pattern categories
85#[derive(Debug, Clone, PartialEq, Eq, Hash)]
86pub enum PatternCategory {
87    /// File format signatures
88    FileFormat,
89    /// Compiler signatures
90    Compiler,
91    /// Packer signatures
92    Packer,
93    /// Cryptographic constants
94    Crypto,
95    /// Malware signatures
96    Malware,
97    /// API strings
98    Api,
99    /// Debug information
100    Debug,
101    /// Copyright/version strings
102    Metadata,
103    /// Network protocols
104    Network,
105    /// Custom patterns
106    Custom,
107}
108
109/// Pattern match result
110#[derive(Debug, Clone)]
111pub struct PatternMatch {
112    /// Pattern that matched
113    pub pattern: Pattern,
114    /// Offset where match was found
115    pub offset: usize,
116    /// Length of the match
117    pub length: usize,
118    /// Matched data
119    pub data: Vec<u8>,
120    /// Confidence score (0.0 - 1.0)
121    pub confidence: f64,
122}
123
124/// Pattern search results
125#[derive(Debug, Clone)]
126pub struct SearchResults {
127    /// All matches found
128    pub matches: Vec<PatternMatch>,
129    /// Matches grouped by category
130    pub by_category: crate::types::PatternMatchMap,
131    /// Total bytes searched
132    pub bytes_searched: usize,
133    /// Search duration
134    pub duration_ms: u64,
135}
136
137impl Default for PatternMatcher {
138    fn default() -> Self {
139        Self::new()
140    }
141}
142
143impl PatternMatcher {
144    /// Create a new pattern matcher
145    pub fn new() -> Self {
146        Self {
147            patterns: Vec::new(),
148            config: MatchConfig::default(),
149        }
150    }
151
152    /// Create pattern matcher with configuration
153    pub fn with_config(config: MatchConfig) -> Self {
154        Self {
155            patterns: Vec::new(),
156            config,
157        }
158    }
159
160    /// Add a pattern to search for
161    pub fn add_pattern(&mut self, pattern: Pattern) {
162        self.patterns.push(pattern);
163    }
164
165    /// Add multiple patterns
166    pub fn add_patterns(&mut self, patterns: Vec<Pattern>) {
167        self.patterns.extend(patterns);
168    }
169
170    /// Load built-in pattern sets
171    pub fn load_builtin_patterns(&mut self, categories: &[PatternCategory]) {
172        for category in categories {
173            let patterns = get_builtin_patterns(category);
174            self.add_patterns(patterns);
175        }
176    }
177
178    /// Search for all patterns in the given data
179    pub fn search(&self, data: &[u8]) -> Result<SearchResults> {
180        let start_time = std::time::Instant::now();
181        let mut matches = Vec::new();
182        let mut by_category: crate::types::PatternMatchMap = HashMap::new();
183
184        for pattern in &self.patterns {
185            let pattern_matches = self.search_pattern(data, pattern)?;
186
187            for pattern_match in pattern_matches {
188                by_category
189                    .entry(pattern_match.pattern.category.clone())
190                    .or_default()
191                    .push(pattern_match.clone());
192
193                matches.push(pattern_match);
194
195                if matches.len() >= self.config.max_matches {
196                    break;
197                }
198            }
199
200            if matches.len() >= self.config.max_matches {
201                break;
202            }
203        }
204
205        let duration = start_time.elapsed();
206
207        Ok(SearchResults {
208            matches,
209            by_category,
210            bytes_searched: data.len(),
211            duration_ms: duration.as_millis() as u64,
212        })
213    }
214
215    /// Search for a specific pattern in data
216    fn search_pattern(&self, data: &[u8], pattern: &Pattern) -> Result<Vec<PatternMatch>> {
217        match &pattern.pattern_type {
218            PatternType::Bytes => self.search_bytes(data, pattern),
219            PatternType::String => self.search_string(data, pattern),
220            PatternType::HexWildcard => self.search_hex_wildcard(data, pattern),
221            PatternType::Magic => self.search_magic(data, pattern),
222            PatternType::Regex => self.search_regex(data, pattern),
223            PatternType::Structural => self.search_structural(data, pattern),
224        }
225    }
226
227    /// Search for exact byte sequences
228    fn search_bytes(&self, data: &[u8], pattern: &Pattern) -> Result<Vec<PatternMatch>> {
229        let mut matches = Vec::new();
230
231        if let PatternData::Bytes(pattern_bytes) = &pattern.data {
232            if pattern_bytes.len() < self.config.min_pattern_length {
233                return Ok(matches);
234            }
235
236            let mut start = 0;
237            while start + pattern_bytes.len() <= data.len() {
238                if let Some(pos) = data[start..]
239                    .windows(pattern_bytes.len())
240                    .position(|window| window == pattern_bytes)
241                {
242                    let offset = start + pos;
243                    matches.push(PatternMatch {
244                        pattern: pattern.clone(),
245                        offset,
246                        length: pattern_bytes.len(),
247                        data: data[offset..offset + pattern_bytes.len()].to_vec(),
248                        confidence: 1.0,
249                    });
250
251                    start = offset + 1;
252
253                    if matches.len() >= self.config.max_matches {
254                        break;
255                    }
256                } else {
257                    break;
258                }
259            }
260        }
261
262        Ok(matches)
263    }
264
265    /// Search for string patterns
266    fn search_string(&self, data: &[u8], pattern: &Pattern) -> Result<Vec<PatternMatch>> {
267        let mut matches = Vec::new();
268
269        if let PatternData::String(pattern_str) = &pattern.data {
270            if pattern_str.len() < self.config.min_pattern_length {
271                return Ok(matches);
272            }
273
274            let search_str = if self.config.case_sensitive {
275                pattern_str.clone()
276            } else {
277                pattern_str.to_lowercase()
278            };
279
280            let search_bytes = search_str.as_bytes();
281
282            // Convert data to string for searching
283            if let Ok(data_str) = String::from_utf8(data.to_vec()) {
284                let search_data = if self.config.case_sensitive {
285                    data_str
286                } else {
287                    data_str.to_lowercase()
288                };
289
290                let mut start = 0;
291                while let Some(pos) = search_data[start..].find(&search_str) {
292                    let offset = start + pos;
293                    matches.push(PatternMatch {
294                        pattern: pattern.clone(),
295                        offset,
296                        length: search_bytes.len(),
297                        data: data[offset..offset + search_bytes.len()].to_vec(),
298                        confidence: 1.0,
299                    });
300
301                    start = offset + 1;
302
303                    if matches.len() >= self.config.max_matches {
304                        break;
305                    }
306                }
307            }
308        }
309
310        Ok(matches)
311    }
312
313    /// Search for hex patterns with wildcards
314    fn search_hex_wildcard(&self, data: &[u8], pattern: &Pattern) -> Result<Vec<PatternMatch>> {
315        let mut matches = Vec::new();
316
317        if let PatternData::HexWildcard(hex_pattern) = &pattern.data {
318            let compiled_pattern = compile_hex_wildcard(hex_pattern)?;
319
320            let mut start = 0;
321            while start + compiled_pattern.len() <= data.len() {
322                if hex_wildcard_matches(
323                    &data[start..start + compiled_pattern.len()],
324                    &compiled_pattern,
325                ) {
326                    matches.push(PatternMatch {
327                        pattern: pattern.clone(),
328                        offset: start,
329                        length: compiled_pattern.len(),
330                        data: data[start..start + compiled_pattern.len()].to_vec(),
331                        confidence: 1.0,
332                    });
333
334                    if matches.len() >= self.config.max_matches {
335                        break;
336                    }
337                }
338                start += 1;
339            }
340        }
341
342        Ok(matches)
343    }
344
345    /// Search for magic signatures
346    fn search_magic(&self, data: &[u8], pattern: &Pattern) -> Result<Vec<PatternMatch>> {
347        // Magic signatures are typically at the beginning of files
348        let mut matches = Vec::new();
349
350        if let PatternData::Bytes(magic_bytes) = &pattern.data {
351            if data.len() >= magic_bytes.len() && &data[..magic_bytes.len()] == magic_bytes {
352                matches.push(PatternMatch {
353                    pattern: pattern.clone(),
354                    offset: 0,
355                    length: magic_bytes.len(),
356                    data: magic_bytes.clone(),
357                    confidence: 1.0,
358                });
359            }
360        }
361
362        Ok(matches)
363    }
364
365    /// Search using regular expressions
366    fn search_regex(&self, _data: &[u8], _pattern: &Pattern) -> Result<Vec<PatternMatch>> {
367        // Regex support would require the regex crate
368        // For now, return empty matches
369        Ok(Vec::new())
370    }
371
372    /// Search for structural patterns
373    fn search_structural(&self, _data: &[u8], _pattern: &Pattern) -> Result<Vec<PatternMatch>> {
374        // Structural pattern matching would be more complex
375        // For now, return empty matches
376        Ok(Vec::new())
377    }
378}
379
380/// Compile hex wildcard pattern
381fn compile_hex_wildcard(pattern: &str) -> crate::types::HexPatternResult {
382    let mut compiled = Vec::new();
383    let clean_pattern = pattern.replace(" ", "").replace("\n", "");
384
385    if clean_pattern.len() % 2 != 0 {
386        return Err(BinaryError::invalid_data(
387            "Hex pattern must have even length",
388        ));
389    }
390
391    for i in (0..clean_pattern.len()).step_by(2) {
392        let hex_byte = &clean_pattern[i..i + 2];
393
394        if hex_byte == "??" {
395            compiled.push(None); // Wildcard
396        } else {
397            let byte_value = u8::from_str_radix(hex_byte, 16).map_err(|_| {
398                BinaryError::invalid_data(format!("Invalid hex byte: {}", hex_byte))
399            })?;
400            compiled.push(Some(byte_value));
401        }
402    }
403
404    Ok(compiled)
405}
406
407/// Check if data matches hex wildcard pattern
408fn hex_wildcard_matches(data: &[u8], pattern: &crate::types::HexPattern) -> bool {
409    if data.len() != pattern.len() {
410        return false;
411    }
412
413    for (i, &byte) in data.iter().enumerate() {
414        match pattern[i] {
415            Some(expected) if expected != byte => return false,
416            None => continue, // Wildcard matches anything
417            _ => continue,
418        }
419    }
420
421    true
422}
423
424/// Get built-in patterns for a category
425fn get_builtin_patterns(category: &PatternCategory) -> Vec<Pattern> {
426    match category {
427        PatternCategory::FileFormat => get_file_format_patterns(),
428        PatternCategory::Compiler => get_compiler_patterns(),
429        PatternCategory::Packer => get_packer_patterns(),
430        PatternCategory::Crypto => get_crypto_patterns(),
431        PatternCategory::Malware => get_malware_patterns(),
432        PatternCategory::Api => get_api_patterns(),
433        _ => Vec::new(),
434    }
435}
436
437/// File format signature patterns
438fn get_file_format_patterns() -> Vec<Pattern> {
439    vec![
440        Pattern {
441            name: "PE_MZ".to_string(),
442            pattern_type: PatternType::Magic,
443            data: PatternData::Bytes(b"MZ".to_vec()),
444            category: PatternCategory::FileFormat,
445            description: "DOS/PE executable signature".to_string(),
446        },
447        Pattern {
448            name: "ELF".to_string(),
449            pattern_type: PatternType::Magic,
450            data: PatternData::Bytes(b"\x7fELF".to_vec()),
451            category: PatternCategory::FileFormat,
452            description: "ELF executable signature".to_string(),
453        },
454        Pattern {
455            name: "Mach_O_32".to_string(),
456            pattern_type: PatternType::Magic,
457            data: PatternData::Bytes(vec![0xfe, 0xed, 0xfa, 0xce]),
458            category: PatternCategory::FileFormat,
459            description: "Mach-O 32-bit signature".to_string(),
460        },
461        Pattern {
462            name: "Mach_O_64".to_string(),
463            pattern_type: PatternType::Magic,
464            data: PatternData::Bytes(vec![0xfe, 0xed, 0xfa, 0xcf]),
465            category: PatternCategory::FileFormat,
466            description: "Mach-O 64-bit signature".to_string(),
467        },
468    ]
469}
470
471/// Compiler signature patterns
472fn get_compiler_patterns() -> Vec<Pattern> {
473    vec![
474        Pattern {
475            name: "GCC".to_string(),
476            pattern_type: PatternType::String,
477            data: PatternData::String("GCC:".to_string()),
478            category: PatternCategory::Compiler,
479            description: "GCC compiler signature".to_string(),
480        },
481        Pattern {
482            name: "MSVC".to_string(),
483            pattern_type: PatternType::String,
484            data: PatternData::String("Microsoft C/C++".to_string()),
485            category: PatternCategory::Compiler,
486            description: "Microsoft Visual C++ signature".to_string(),
487        },
488    ]
489}
490
491/// Packer signature patterns
492fn get_packer_patterns() -> Vec<Pattern> {
493    vec![Pattern {
494        name: "UPX".to_string(),
495        pattern_type: PatternType::String,
496        data: PatternData::String("UPX!".to_string()),
497        category: PatternCategory::Packer,
498        description: "UPX packer signature".to_string(),
499    }]
500}
501
502/// Cryptographic constants patterns
503fn get_crypto_patterns() -> Vec<Pattern> {
504    vec![Pattern {
505        name: "MD5_Init".to_string(),
506        pattern_type: PatternType::Bytes,
507        data: PatternData::Bytes(vec![0x01, 0x23, 0x45, 0x67]), // MD5 initial value
508        category: PatternCategory::Crypto,
509        description: "MD5 initialization constants".to_string(),
510    }]
511}
512
513/// Malware signature patterns
514fn get_malware_patterns() -> Vec<Pattern> {
515    vec![Pattern {
516        name: "Suspicious_API".to_string(),
517        pattern_type: PatternType::String,
518        data: PatternData::String("VirtualAllocEx".to_string()),
519        category: PatternCategory::Malware,
520        description: "Suspicious Windows API call".to_string(),
521    }]
522}
523
524/// API string patterns
525fn get_api_patterns() -> Vec<Pattern> {
526    vec![Pattern {
527        name: "CreateProcess".to_string(),
528        pattern_type: PatternType::String,
529        data: PatternData::String("CreateProcessA".to_string()),
530        category: PatternCategory::Api,
531        description: "Windows CreateProcess API".to_string(),
532    }]
533}
534
535#[cfg(test)]
536mod tests {
537    use super::*;
538
539    // ==============================
540    // Pattern Matcher Creation Tests
541    // ==============================
542
543    #[test]
544    fn test_pattern_matcher_creation() {
545        let matcher = PatternMatcher::new();
546        assert_eq!(matcher.patterns.len(), 0);
547        assert!(matcher.config.case_sensitive);
548        assert_eq!(matcher.config.max_matches, 1000);
549        assert!(matcher.config.enable_wildcards);
550        assert_eq!(matcher.config.min_pattern_length, 3);
551    }
552
553    #[test]
554    fn test_pattern_matcher_default() {
555        let matcher = PatternMatcher::default();
556        assert_eq!(matcher.patterns.len(), 0);
557    }
558
559    #[test]
560    fn test_pattern_matcher_with_config() {
561        let config = MatchConfig {
562            case_sensitive: false,
563            max_matches: 500,
564            enable_wildcards: false,
565            min_pattern_length: 5,
566        };
567        let matcher = PatternMatcher::with_config(config.clone());
568        assert!(!matcher.config.case_sensitive);
569        assert_eq!(matcher.config.max_matches, 500);
570        assert!(!matcher.config.enable_wildcards);
571        assert_eq!(matcher.config.min_pattern_length, 5);
572    }
573
574    #[test]
575    fn test_match_config_default() {
576        let config = MatchConfig::default();
577        assert!(config.case_sensitive);
578        assert_eq!(config.max_matches, 1000);
579        assert!(config.enable_wildcards);
580        assert_eq!(config.min_pattern_length, 3);
581    }
582
583    // ==============================
584    // Pattern Addition Tests
585    // ==============================
586
587    #[test]
588    fn test_add_single_pattern() {
589        let mut matcher = PatternMatcher::new();
590        let pattern = create_test_pattern(
591            "test",
592            PatternType::Bytes,
593            PatternData::Bytes(b"test".to_vec()),
594        );
595
596        matcher.add_pattern(pattern);
597        assert_eq!(matcher.patterns.len(), 1);
598        assert_eq!(matcher.patterns[0].name, "test");
599    }
600
601    #[test]
602    fn test_add_multiple_patterns() {
603        let mut matcher = PatternMatcher::new();
604        let patterns = vec![
605            create_test_pattern(
606                "test1",
607                PatternType::Bytes,
608                PatternData::Bytes(b"test1".to_vec()),
609            ),
610            create_test_pattern(
611                "test2",
612                PatternType::String,
613                PatternData::String("test2".to_string()),
614            ),
615        ];
616
617        matcher.add_patterns(patterns);
618        assert_eq!(matcher.patterns.len(), 2);
619    }
620
621    // ==============================
622    // Hex Wildcard Compilation Tests
623    // ==============================
624
625    #[test]
626    fn test_hex_wildcard_compilation() {
627        let pattern = "48 65 ?? 6c 6f";
628        let compiled = compile_hex_wildcard(pattern).unwrap();
629
630        assert_eq!(compiled.len(), 5);
631        assert_eq!(compiled[0], Some(0x48));
632        assert_eq!(compiled[1], Some(0x65));
633        assert_eq!(compiled[2], None);
634        assert_eq!(compiled[3], Some(0x6c));
635        assert_eq!(compiled[4], Some(0x6f));
636    }
637
638    #[test]
639    fn test_hex_wildcard_compilation_no_spaces() {
640        let pattern = "48656c6f";
641        let compiled = compile_hex_wildcard(pattern).unwrap();
642
643        assert_eq!(compiled.len(), 4);
644        assert_eq!(compiled[0], Some(0x48));
645        assert_eq!(compiled[1], Some(0x65));
646        assert_eq!(compiled[2], Some(0x6c));
647        assert_eq!(compiled[3], Some(0x6f));
648    }
649
650    #[test]
651    fn test_hex_wildcard_compilation_with_newlines() {
652        let pattern = "48 65\n?? 6c\n6f";
653        let compiled = compile_hex_wildcard(pattern).unwrap();
654
655        assert_eq!(compiled.len(), 5);
656        assert_eq!(compiled[2], None);
657    }
658
659    #[test]
660    fn test_hex_wildcard_compilation_error_odd_length() {
661        let pattern = "48 65 6";
662        let result = compile_hex_wildcard(pattern);
663        assert!(result.is_err());
664    }
665
666    #[test]
667    fn test_hex_wildcard_compilation_error_invalid_hex() {
668        let pattern = "48 65 XY 6c";
669        let result = compile_hex_wildcard(pattern);
670        assert!(result.is_err());
671    }
672
673    #[test]
674    fn test_hex_wildcard_compilation_all_wildcards() {
675        let pattern = "?? ?? ??";
676        let compiled = compile_hex_wildcard(pattern).unwrap();
677
678        assert_eq!(compiled.len(), 3);
679        assert_eq!(compiled[0], None);
680        assert_eq!(compiled[1], None);
681        assert_eq!(compiled[2], None);
682    }
683
684    #[test]
685    fn test_hex_wildcard_matching() {
686        let data = &[0x48, 0x65, 0x78, 0x6c, 0x6f]; // "Hexlo"
687        let pattern = vec![Some(0x48), Some(0x65), None, Some(0x6c), Some(0x6f)];
688
689        assert!(hex_wildcard_matches(data, &pattern));
690
691        let wrong_pattern = vec![Some(0x48), Some(0x65), None, Some(0x6c), Some(0x70)];
692        assert!(!hex_wildcard_matches(data, &wrong_pattern));
693    }
694
695    #[test]
696    fn test_hex_wildcard_matching_length_mismatch() {
697        let data = &[0x48, 0x65, 0x78];
698        let pattern = vec![Some(0x48), Some(0x65), None, Some(0x6c)];
699
700        assert!(!hex_wildcard_matches(data, &pattern));
701    }
702
703    #[test]
704    fn test_hex_wildcard_matching_empty() {
705        let data = &[];
706        let pattern = vec![];
707
708        assert!(hex_wildcard_matches(data, &pattern));
709    }
710
711    // ==============================
712    // Byte Pattern Search Tests
713    // ==============================
714
715    #[test]
716    fn test_byte_pattern_search() {
717        let mut matcher = PatternMatcher::new();
718
719        let pattern = Pattern {
720            name: "test".to_string(),
721            pattern_type: PatternType::Bytes,
722            data: PatternData::Bytes(b"hello".to_vec()),
723            category: PatternCategory::Custom,
724            description: "Test pattern".to_string(),
725        };
726
727        matcher.add_pattern(pattern);
728
729        let data = b"This is a hello world test";
730        let results = matcher.search(data).unwrap();
731
732        assert_eq!(results.matches.len(), 1);
733        assert_eq!(results.matches[0].offset, 10);
734        assert_eq!(results.matches[0].length, 5);
735        assert_eq!(results.matches[0].data, b"hello");
736        assert_eq!(results.matches[0].confidence, 1.0);
737    }
738
739    #[test]
740    fn test_byte_pattern_search_multiple_matches() {
741        let mut matcher = PatternMatcher::new();
742        let pattern = create_test_pattern(
743            "test",
744            PatternType::Bytes,
745            PatternData::Bytes(b"abc".to_vec()),
746        );
747        matcher.add_pattern(pattern);
748
749        let data = b"abcabcabc";
750        let results = matcher.search(data).unwrap();
751
752        assert_eq!(results.matches.len(), 3); // Non-overlapping matches
753    }
754
755    #[test]
756    fn test_byte_pattern_search_overlapping_matches() {
757        let mut matcher = PatternMatcher::new();
758        let pattern = create_test_pattern(
759            "test",
760            PatternType::Bytes,
761            PatternData::Bytes(b"aaa".to_vec()),
762        );
763        matcher.add_pattern(pattern);
764
765        let data = b"aaaaa";
766        let results = matcher.search(data).unwrap();
767
768        assert_eq!(results.matches.len(), 3); // Overlapping matches at positions 0, 1, 2
769    }
770
771    #[test]
772    fn test_byte_pattern_search_no_match() {
773        let mut matcher = PatternMatcher::new();
774        let pattern = create_test_pattern(
775            "test",
776            PatternType::Bytes,
777            PatternData::Bytes(b"xyz".to_vec()),
778        );
779        matcher.add_pattern(pattern);
780
781        let data = b"hello world";
782        let results = matcher.search(data).unwrap();
783
784        assert_eq!(results.matches.len(), 0);
785    }
786
787    #[test]
788    fn test_byte_pattern_search_too_short() {
789        let mut matcher = PatternMatcher::new();
790        let pattern = create_test_pattern(
791            "test",
792            PatternType::Bytes,
793            PatternData::Bytes(b"ab".to_vec()),
794        );
795        matcher.add_pattern(pattern);
796
797        let data = b"hello world";
798        let results = matcher.search(data).unwrap();
799
800        assert_eq!(results.matches.len(), 0); // Pattern too short (< min_pattern_length)
801    }
802
803    #[test]
804    fn test_byte_pattern_search_max_matches_limit() {
805        let config = MatchConfig {
806            max_matches: 2,
807            ..Default::default()
808        };
809        let mut matcher = PatternMatcher::with_config(config);
810        let pattern = create_test_pattern(
811            "test",
812            PatternType::Bytes,
813            PatternData::Bytes(b"test".to_vec()),
814        );
815        matcher.add_pattern(pattern);
816
817        let data = b"test test test test";
818        let results = matcher.search(data).unwrap();
819
820        assert_eq!(results.matches.len(), 2); // Limited by max_matches
821    }
822
823    // ==============================
824    // String Pattern Search Tests
825    // ==============================
826
827    #[test]
828    fn test_string_pattern_search_case_sensitive() {
829        let mut matcher = PatternMatcher::new();
830        let pattern = create_test_pattern(
831            "test",
832            PatternType::String,
833            PatternData::String("Hello".to_string()),
834        );
835        matcher.add_pattern(pattern);
836
837        let data = b"Say Hello to the world";
838        let results = matcher.search(data).unwrap();
839
840        assert_eq!(results.matches.len(), 1);
841        assert_eq!(results.matches[0].offset, 4);
842    }
843
844    #[test]
845    fn test_string_pattern_search_case_insensitive() {
846        let config = MatchConfig {
847            case_sensitive: false,
848            ..Default::default()
849        };
850        let mut matcher = PatternMatcher::with_config(config);
851        let pattern = create_test_pattern(
852            "test",
853            PatternType::String,
854            PatternData::String("HELLO".to_string()),
855        );
856        matcher.add_pattern(pattern);
857
858        let data = b"Say hello to the world";
859        let results = matcher.search(data).unwrap();
860
861        assert_eq!(results.matches.len(), 1);
862        assert_eq!(results.matches[0].offset, 4);
863    }
864
865    #[test]
866    fn test_string_pattern_search_invalid_utf8() {
867        let mut matcher = PatternMatcher::new();
868        let pattern = create_test_pattern(
869            "test",
870            PatternType::String,
871            PatternData::String("test".to_string()),
872        );
873        matcher.add_pattern(pattern);
874
875        let data = &[0xFF, 0xFE, 0xFD, 0xFC]; // Invalid UTF-8
876        let results = matcher.search(data).unwrap();
877
878        assert_eq!(results.matches.len(), 0); // No matches for invalid UTF-8
879    }
880
881    #[test]
882    fn test_string_pattern_search_too_short() {
883        let mut matcher = PatternMatcher::new();
884        let pattern = create_test_pattern(
885            "test",
886            PatternType::String,
887            PatternData::String("ab".to_string()),
888        );
889        matcher.add_pattern(pattern);
890
891        let data = b"hello ab world";
892        let results = matcher.search(data).unwrap();
893
894        assert_eq!(results.matches.len(), 0); // Pattern too short
895    }
896
897    // ==============================
898    // Hex Wildcard Pattern Search Tests
899    // ==============================
900
901    #[test]
902    fn test_hex_wildcard_pattern_search() {
903        let mut matcher = PatternMatcher::new();
904        let pattern = create_test_pattern(
905            "test",
906            PatternType::HexWildcard,
907            PatternData::HexWildcard("48 65 ?? 6c 6f".to_string()),
908        );
909        matcher.add_pattern(pattern);
910
911        let data = b"Hello"; // 48 65 6c 6c 6f
912        let results = matcher.search(data).unwrap();
913
914        assert_eq!(results.matches.len(), 1);
915        assert_eq!(results.matches[0].offset, 0);
916        assert_eq!(results.matches[0].length, 5);
917    }
918
919    #[test]
920    fn test_hex_wildcard_pattern_search_invalid_pattern() {
921        let mut matcher = PatternMatcher::new();
922        let pattern = create_test_pattern(
923            "test",
924            PatternType::HexWildcard,
925            PatternData::HexWildcard("48 65 X".to_string()),
926        );
927        matcher.add_pattern(pattern);
928
929        let data = b"Hello";
930        let results = matcher.search(data);
931
932        assert!(results.is_err()); // Invalid hex pattern should error
933    }
934
935    // ==============================
936    // Magic Pattern Search Tests
937    // ==============================
938
939    #[test]
940    fn test_magic_pattern_search_match() {
941        let mut matcher = PatternMatcher::new();
942        let pattern =
943            create_test_pattern("PE", PatternType::Magic, PatternData::Bytes(b"MZ".to_vec()));
944        matcher.add_pattern(pattern);
945
946        let data = b"MZ\x90\x00\x03\x00"; // PE header start
947        let results = matcher.search(data).unwrap();
948
949        assert_eq!(results.matches.len(), 1);
950        assert_eq!(results.matches[0].offset, 0);
951        assert_eq!(results.matches[0].length, 2);
952    }
953
954    #[test]
955    fn test_magic_pattern_search_no_match_wrong_position() {
956        let mut matcher = PatternMatcher::new();
957        let pattern =
958            create_test_pattern("PE", PatternType::Magic, PatternData::Bytes(b"MZ".to_vec()));
959        matcher.add_pattern(pattern);
960
961        let data = b"XXMZ"; // Magic not at beginning
962        let results = matcher.search(data).unwrap();
963
964        assert_eq!(results.matches.len(), 0); // Magic patterns only match at offset 0
965    }
966
967    #[test]
968    fn test_magic_pattern_search_too_short() {
969        let mut matcher = PatternMatcher::new();
970        let pattern =
971            create_test_pattern("PE", PatternType::Magic, PatternData::Bytes(b"MZ".to_vec()));
972        matcher.add_pattern(pattern);
973
974        let data = b"M"; // Too short
975        let results = matcher.search(data).unwrap();
976
977        assert_eq!(results.matches.len(), 0);
978    }
979
980    // ==============================
981    // Regex and Structural Pattern Tests (Empty implementations)
982    // ==============================
983
984    #[test]
985    fn test_regex_pattern_search_returns_empty() {
986        let mut matcher = PatternMatcher::new();
987        let pattern = create_test_pattern(
988            "test",
989            PatternType::Regex,
990            PatternData::Regex("test.*".to_string()),
991        );
992        matcher.add_pattern(pattern);
993
994        let data = b"test pattern";
995        let results = matcher.search(data).unwrap();
996
997        assert_eq!(results.matches.len(), 0); // Regex not implemented yet
998    }
999
1000    #[test]
1001    fn test_structural_pattern_search_returns_empty() {
1002        let mut matcher = PatternMatcher::new();
1003        let pattern = create_test_pattern(
1004            "test",
1005            PatternType::Structural,
1006            PatternData::Bytes(b"test".to_vec()),
1007        );
1008        matcher.add_pattern(pattern);
1009
1010        let data = b"test pattern";
1011        let results = matcher.search(data).unwrap();
1012
1013        assert_eq!(results.matches.len(), 0); // Structural not implemented yet
1014    }
1015
1016    // ==============================
1017    // Built-in Pattern Tests
1018    // ==============================
1019
1020    #[test]
1021    fn test_builtin_patterns() {
1022        let patterns = get_file_format_patterns();
1023        assert!(!patterns.is_empty());
1024
1025        // Check for PE signature
1026        let pe_pattern = patterns.iter().find(|p| p.name == "PE_MZ");
1027        assert!(pe_pattern.is_some());
1028    }
1029
1030    #[test]
1031    fn test_file_format_patterns() {
1032        let patterns = get_file_format_patterns();
1033        assert!(patterns.len() >= 4);
1034
1035        let names: Vec<&str> = patterns.iter().map(|p| p.name.as_str()).collect();
1036        assert!(names.contains(&"PE_MZ"));
1037        assert!(names.contains(&"ELF"));
1038        assert!(names.contains(&"Mach_O_32"));
1039        assert!(names.contains(&"Mach_O_64"));
1040    }
1041
1042    #[test]
1043    fn test_compiler_patterns() {
1044        let patterns = get_compiler_patterns();
1045        assert!(!patterns.is_empty());
1046
1047        let names: Vec<&str> = patterns.iter().map(|p| p.name.as_str()).collect();
1048        assert!(names.contains(&"GCC"));
1049        assert!(names.contains(&"MSVC"));
1050    }
1051
1052    #[test]
1053    fn test_packer_patterns() {
1054        let patterns = get_packer_patterns();
1055        assert!(!patterns.is_empty());
1056
1057        let names: Vec<&str> = patterns.iter().map(|p| p.name.as_str()).collect();
1058        assert!(names.contains(&"UPX"));
1059    }
1060
1061    #[test]
1062    fn test_crypto_patterns() {
1063        let patterns = get_crypto_patterns();
1064        assert!(!patterns.is_empty());
1065
1066        let names: Vec<&str> = patterns.iter().map(|p| p.name.as_str()).collect();
1067        assert!(names.contains(&"MD5_Init"));
1068    }
1069
1070    #[test]
1071    fn test_malware_patterns() {
1072        let patterns = get_malware_patterns();
1073        assert!(!patterns.is_empty());
1074
1075        let names: Vec<&str> = patterns.iter().map(|p| p.name.as_str()).collect();
1076        assert!(names.contains(&"Suspicious_API"));
1077    }
1078
1079    #[test]
1080    fn test_api_patterns() {
1081        let patterns = get_api_patterns();
1082        assert!(!patterns.is_empty());
1083
1084        let names: Vec<&str> = patterns.iter().map(|p| p.name.as_str()).collect();
1085        assert!(names.contains(&"CreateProcess"));
1086    }
1087
1088    #[test]
1089    fn test_get_builtin_patterns_unknown_category() {
1090        let patterns = get_builtin_patterns(&PatternCategory::Debug);
1091        assert!(patterns.is_empty()); // Debug not implemented
1092
1093        let patterns = get_builtin_patterns(&PatternCategory::Network);
1094        assert!(patterns.is_empty()); // Network not implemented
1095    }
1096
1097    #[test]
1098    fn test_load_builtin_patterns() {
1099        let mut matcher = PatternMatcher::new();
1100        let categories = vec![
1101            PatternCategory::FileFormat,
1102            PatternCategory::Compiler,
1103            PatternCategory::Packer,
1104        ];
1105
1106        matcher.load_builtin_patterns(&categories);
1107        assert!(!matcher.patterns.is_empty());
1108
1109        // Verify patterns from all categories are loaded
1110        let format_count = matcher
1111            .patterns
1112            .iter()
1113            .filter(|p| p.category == PatternCategory::FileFormat)
1114            .count();
1115        let compiler_count = matcher
1116            .patterns
1117            .iter()
1118            .filter(|p| p.category == PatternCategory::Compiler)
1119            .count();
1120        let packer_count = matcher
1121            .patterns
1122            .iter()
1123            .filter(|p| p.category == PatternCategory::Packer)
1124            .count();
1125
1126        assert!(format_count > 0);
1127        assert!(compiler_count > 0);
1128        assert!(packer_count > 0);
1129    }
1130
1131    // ==============================
1132    // Search Results Tests
1133    // ==============================
1134
1135    #[test]
1136    fn test_search_results_structure() {
1137        let mut matcher = PatternMatcher::new();
1138        let pattern1 = create_test_pattern(
1139            "test1",
1140            PatternType::Bytes,
1141            PatternData::Bytes(b"test".to_vec()),
1142        );
1143        let pattern2 = create_test_pattern(
1144            "test2",
1145            PatternType::String,
1146            PatternData::String("hello".to_string()),
1147        );
1148
1149        matcher.add_pattern(pattern1);
1150        matcher.add_pattern(pattern2);
1151
1152        let data = b"This is a test and hello world";
1153        let results = matcher.search(data).unwrap();
1154
1155        assert_eq!(results.matches.len(), 2);
1156        assert_eq!(results.bytes_searched, data.len());
1157        assert!(results.duration_ms < 10000); // Should complete quickly in tests
1158        assert_eq!(results.by_category.len(), 1); // Both patterns are Custom category
1159        assert_eq!(results.by_category[&PatternCategory::Custom].len(), 2);
1160    }
1161
1162    #[test]
1163    fn test_search_results_empty() {
1164        let matcher = PatternMatcher::new();
1165        let data = b"test data";
1166        let results = matcher.search(data).unwrap();
1167
1168        assert_eq!(results.matches.len(), 0);
1169        assert_eq!(results.bytes_searched, data.len());
1170        assert!(results.by_category.is_empty());
1171    }
1172
1173    #[test]
1174    fn test_search_results_category_grouping() {
1175        let mut matcher = PatternMatcher::new();
1176        let pattern1 = Pattern {
1177            name: "pe".to_string(),
1178            pattern_type: PatternType::Magic,
1179            data: PatternData::Bytes(b"MZ".to_vec()),
1180            category: PatternCategory::FileFormat,
1181            description: "PE header".to_string(),
1182        };
1183        let pattern2 = Pattern {
1184            name: "gcc".to_string(),
1185            pattern_type: PatternType::String,
1186            data: PatternData::String("GCC".to_string()),
1187            category: PatternCategory::Compiler,
1188            description: "GCC compiler".to_string(),
1189        };
1190
1191        matcher.add_pattern(pattern1);
1192        matcher.add_pattern(pattern2);
1193
1194        let data = b"MZ This binary was compiled with GCC";
1195        let results = matcher.search(data).unwrap();
1196
1197        assert_eq!(results.matches.len(), 2);
1198        assert_eq!(results.by_category.len(), 2);
1199        assert!(results
1200            .by_category
1201            .contains_key(&PatternCategory::FileFormat));
1202        assert!(results.by_category.contains_key(&PatternCategory::Compiler));
1203    }
1204
1205    // ==============================
1206    // Pattern Data Type Tests
1207    // ==============================
1208
1209    #[test]
1210    fn test_pattern_types_equality() {
1211        assert_eq!(PatternType::Bytes, PatternType::Bytes);
1212        assert_ne!(PatternType::Bytes, PatternType::String);
1213    }
1214
1215    #[test]
1216    fn test_pattern_categories_equality() {
1217        assert_eq!(PatternCategory::FileFormat, PatternCategory::FileFormat);
1218        assert_ne!(PatternCategory::FileFormat, PatternCategory::Compiler);
1219    }
1220
1221    // ==============================
1222    // Edge Cases and Error Handling Tests
1223    // ==============================
1224
1225    #[test]
1226    fn test_search_empty_data() {
1227        let mut matcher = PatternMatcher::new();
1228        let pattern = create_test_pattern(
1229            "test",
1230            PatternType::Bytes,
1231            PatternData::Bytes(b"test".to_vec()),
1232        );
1233        matcher.add_pattern(pattern);
1234
1235        let data = b"";
1236        let results = matcher.search(data).unwrap();
1237
1238        assert_eq!(results.matches.len(), 0);
1239        assert_eq!(results.bytes_searched, 0);
1240    }
1241
1242    #[test]
1243    fn test_search_large_data() {
1244        let mut matcher = PatternMatcher::new();
1245        let pattern = create_test_pattern(
1246            "test",
1247            PatternType::Bytes,
1248            PatternData::Bytes(b"needle".to_vec()),
1249        );
1250        matcher.add_pattern(pattern);
1251
1252        let mut data = vec![b'X'; 100000];
1253        data.extend_from_slice(b"needle");
1254        data.extend_from_slice(&vec![b'Y'; 100000]);
1255
1256        let results = matcher.search(&data).unwrap();
1257
1258        assert_eq!(results.matches.len(), 1);
1259        assert_eq!(results.matches[0].offset, 100000);
1260    }
1261
1262    #[test]
1263    fn test_pattern_match_structure() {
1264        let mut matcher = PatternMatcher::new();
1265        let pattern = create_test_pattern(
1266            "test",
1267            PatternType::Bytes,
1268            PatternData::Bytes(b"test".to_vec()),
1269        );
1270        matcher.add_pattern(pattern.clone());
1271
1272        let data = b"find test here";
1273        let results = matcher.search(data).unwrap();
1274
1275        assert_eq!(results.matches.len(), 1);
1276        let m = &results.matches[0];
1277        assert_eq!(m.pattern.name, pattern.name);
1278        assert_eq!(m.offset, 5);
1279        assert_eq!(m.length, 4);
1280        assert_eq!(m.data, b"test");
1281        assert_eq!(m.confidence, 1.0);
1282    }
1283
1284    #[test]
1285    fn test_multiple_pattern_types_search() {
1286        let mut matcher = PatternMatcher::new();
1287
1288        // Add different pattern types
1289        matcher.add_pattern(create_test_pattern(
1290            "bytes",
1291            PatternType::Bytes,
1292            PatternData::Bytes(b"test".to_vec()),
1293        ));
1294        matcher.add_pattern(create_test_pattern(
1295            "string",
1296            PatternType::String,
1297            PatternData::String("hello".to_string()),
1298        ));
1299        matcher.add_pattern(create_test_pattern(
1300            "magic",
1301            PatternType::Magic,
1302            PatternData::Bytes(b"MZ".to_vec()),
1303        ));
1304        matcher.add_pattern(create_test_pattern(
1305            "hex",
1306            PatternType::HexWildcard,
1307            PatternData::HexWildcard("77 6F ?? 6C 64".to_string()),
1308        ));
1309
1310        let data = b"MZ test hello world";
1311        let results = matcher.search(data).unwrap();
1312
1313        assert!(results.matches.len() >= 3); // At least bytes, string, magic should match
1314    }
1315
1316    // ==============================
1317    // Helper Functions
1318    // ==============================
1319
1320    fn create_test_pattern(name: &str, pattern_type: PatternType, data: PatternData) -> Pattern {
1321        Pattern {
1322            name: name.to_string(),
1323            pattern_type,
1324            data,
1325            category: PatternCategory::Custom,
1326            description: format!("Test pattern: {}", name),
1327        }
1328    }
1329}