1use crate::{BinaryError, Result};
7use std::collections::HashMap;
8
9pub struct PatternMatcher {
11 patterns: Vec<Pattern>,
12 config: MatchConfig,
13}
14
15#[derive(Debug, Clone)]
17pub struct MatchConfig {
18 pub case_sensitive: bool,
20 pub max_matches: usize,
22 pub enable_wildcards: bool,
24 pub min_pattern_length: usize,
26}
27
28impl Default for MatchConfig {
29 fn default() -> Self {
30 Self {
31 case_sensitive: true,
32 max_matches: 1000,
33 enable_wildcards: true,
34 min_pattern_length: 3,
35 }
36 }
37}
38
39#[derive(Debug, Clone)]
41pub struct Pattern {
42 pub name: String,
44 pub pattern_type: PatternType,
46 pub data: PatternData,
48 pub category: PatternCategory,
50 pub description: String,
52}
53
54#[derive(Debug, Clone, PartialEq, Eq)]
56pub enum PatternType {
57 Bytes,
59 String,
61 Regex,
63 HexWildcard,
65 Magic,
67 Structural,
69}
70
71#[derive(Debug, Clone)]
73pub enum PatternData {
74 Bytes(Vec<u8>),
76 String(String),
78 HexWildcard(String),
80 Regex(String),
82}
83
84#[derive(Debug, Clone, PartialEq, Eq, Hash)]
86pub enum PatternCategory {
87 FileFormat,
89 Compiler,
91 Packer,
93 Crypto,
95 Malware,
97 Api,
99 Debug,
101 Metadata,
103 Network,
105 Custom,
107}
108
109#[derive(Debug, Clone)]
111pub struct PatternMatch {
112 pub pattern: Pattern,
114 pub offset: usize,
116 pub length: usize,
118 pub data: Vec<u8>,
120 pub confidence: f64,
122}
123
124#[derive(Debug, Clone)]
126pub struct SearchResults {
127 pub matches: Vec<PatternMatch>,
129 pub by_category: crate::types::PatternMatchMap,
131 pub bytes_searched: usize,
133 pub duration_ms: u64,
135}
136
137impl Default for PatternMatcher {
138 fn default() -> Self {
139 Self::new()
140 }
141}
142
143impl PatternMatcher {
144 pub fn new() -> Self {
146 Self {
147 patterns: Vec::new(),
148 config: MatchConfig::default(),
149 }
150 }
151
152 pub fn with_config(config: MatchConfig) -> Self {
154 Self {
155 patterns: Vec::new(),
156 config,
157 }
158 }
159
160 pub fn add_pattern(&mut self, pattern: Pattern) {
162 self.patterns.push(pattern);
163 }
164
165 pub fn add_patterns(&mut self, patterns: Vec<Pattern>) {
167 self.patterns.extend(patterns);
168 }
169
170 pub fn load_builtin_patterns(&mut self, categories: &[PatternCategory]) {
172 for category in categories {
173 let patterns = get_builtin_patterns(category);
174 self.add_patterns(patterns);
175 }
176 }
177
178 pub fn search(&self, data: &[u8]) -> Result<SearchResults> {
180 let start_time = std::time::Instant::now();
181 let mut matches = Vec::new();
182 let mut by_category: crate::types::PatternMatchMap = HashMap::new();
183
184 for pattern in &self.patterns {
185 let pattern_matches = self.search_pattern(data, pattern)?;
186
187 for pattern_match in pattern_matches {
188 by_category
189 .entry(pattern_match.pattern.category.clone())
190 .or_default()
191 .push(pattern_match.clone());
192
193 matches.push(pattern_match);
194
195 if matches.len() >= self.config.max_matches {
196 break;
197 }
198 }
199
200 if matches.len() >= self.config.max_matches {
201 break;
202 }
203 }
204
205 let duration = start_time.elapsed();
206
207 Ok(SearchResults {
208 matches,
209 by_category,
210 bytes_searched: data.len(),
211 duration_ms: duration.as_millis() as u64,
212 })
213 }
214
215 fn search_pattern(&self, data: &[u8], pattern: &Pattern) -> Result<Vec<PatternMatch>> {
217 match &pattern.pattern_type {
218 PatternType::Bytes => self.search_bytes(data, pattern),
219 PatternType::String => self.search_string(data, pattern),
220 PatternType::HexWildcard => self.search_hex_wildcard(data, pattern),
221 PatternType::Magic => self.search_magic(data, pattern),
222 PatternType::Regex => self.search_regex(data, pattern),
223 PatternType::Structural => self.search_structural(data, pattern),
224 }
225 }
226
227 fn search_bytes(&self, data: &[u8], pattern: &Pattern) -> Result<Vec<PatternMatch>> {
229 let mut matches = Vec::new();
230
231 if let PatternData::Bytes(pattern_bytes) = &pattern.data {
232 if pattern_bytes.len() < self.config.min_pattern_length {
233 return Ok(matches);
234 }
235
236 let mut start = 0;
237 while start + pattern_bytes.len() <= data.len() {
238 if let Some(pos) = data[start..]
239 .windows(pattern_bytes.len())
240 .position(|window| window == pattern_bytes)
241 {
242 let offset = start + pos;
243 matches.push(PatternMatch {
244 pattern: pattern.clone(),
245 offset,
246 length: pattern_bytes.len(),
247 data: data[offset..offset + pattern_bytes.len()].to_vec(),
248 confidence: 1.0,
249 });
250
251 start = offset + 1;
252
253 if matches.len() >= self.config.max_matches {
254 break;
255 }
256 } else {
257 break;
258 }
259 }
260 }
261
262 Ok(matches)
263 }
264
265 fn search_string(&self, data: &[u8], pattern: &Pattern) -> Result<Vec<PatternMatch>> {
267 let mut matches = Vec::new();
268
269 if let PatternData::String(pattern_str) = &pattern.data {
270 if pattern_str.len() < self.config.min_pattern_length {
271 return Ok(matches);
272 }
273
274 let search_str = if self.config.case_sensitive {
275 pattern_str.clone()
276 } else {
277 pattern_str.to_lowercase()
278 };
279
280 let search_bytes = search_str.as_bytes();
281
282 if let Ok(data_str) = String::from_utf8(data.to_vec()) {
284 let search_data = if self.config.case_sensitive {
285 data_str
286 } else {
287 data_str.to_lowercase()
288 };
289
290 let mut start = 0;
291 while let Some(pos) = search_data[start..].find(&search_str) {
292 let offset = start + pos;
293 matches.push(PatternMatch {
294 pattern: pattern.clone(),
295 offset,
296 length: search_bytes.len(),
297 data: data[offset..offset + search_bytes.len()].to_vec(),
298 confidence: 1.0,
299 });
300
301 start = offset + 1;
302
303 if matches.len() >= self.config.max_matches {
304 break;
305 }
306 }
307 }
308 }
309
310 Ok(matches)
311 }
312
313 fn search_hex_wildcard(&self, data: &[u8], pattern: &Pattern) -> Result<Vec<PatternMatch>> {
315 let mut matches = Vec::new();
316
317 if let PatternData::HexWildcard(hex_pattern) = &pattern.data {
318 let compiled_pattern = compile_hex_wildcard(hex_pattern)?;
319
320 let mut start = 0;
321 while start + compiled_pattern.len() <= data.len() {
322 if hex_wildcard_matches(
323 &data[start..start + compiled_pattern.len()],
324 &compiled_pattern,
325 ) {
326 matches.push(PatternMatch {
327 pattern: pattern.clone(),
328 offset: start,
329 length: compiled_pattern.len(),
330 data: data[start..start + compiled_pattern.len()].to_vec(),
331 confidence: 1.0,
332 });
333
334 if matches.len() >= self.config.max_matches {
335 break;
336 }
337 }
338 start += 1;
339 }
340 }
341
342 Ok(matches)
343 }
344
345 fn search_magic(&self, data: &[u8], pattern: &Pattern) -> Result<Vec<PatternMatch>> {
347 let mut matches = Vec::new();
349
350 if let PatternData::Bytes(magic_bytes) = &pattern.data {
351 if data.len() >= magic_bytes.len() && &data[..magic_bytes.len()] == magic_bytes {
352 matches.push(PatternMatch {
353 pattern: pattern.clone(),
354 offset: 0,
355 length: magic_bytes.len(),
356 data: magic_bytes.clone(),
357 confidence: 1.0,
358 });
359 }
360 }
361
362 Ok(matches)
363 }
364
365 fn search_regex(&self, _data: &[u8], _pattern: &Pattern) -> Result<Vec<PatternMatch>> {
367 Ok(Vec::new())
370 }
371
372 fn search_structural(&self, _data: &[u8], _pattern: &Pattern) -> Result<Vec<PatternMatch>> {
374 Ok(Vec::new())
377 }
378}
379
380fn compile_hex_wildcard(pattern: &str) -> crate::types::HexPatternResult {
382 let mut compiled = Vec::new();
383 let clean_pattern = pattern.replace(" ", "").replace("\n", "");
384
385 if clean_pattern.len() % 2 != 0 {
386 return Err(BinaryError::invalid_data(
387 "Hex pattern must have even length",
388 ));
389 }
390
391 for i in (0..clean_pattern.len()).step_by(2) {
392 let hex_byte = &clean_pattern[i..i + 2];
393
394 if hex_byte == "??" {
395 compiled.push(None); } else {
397 let byte_value = u8::from_str_radix(hex_byte, 16).map_err(|_| {
398 BinaryError::invalid_data(format!("Invalid hex byte: {}", hex_byte))
399 })?;
400 compiled.push(Some(byte_value));
401 }
402 }
403
404 Ok(compiled)
405}
406
407fn hex_wildcard_matches(data: &[u8], pattern: &crate::types::HexPattern) -> bool {
409 if data.len() != pattern.len() {
410 return false;
411 }
412
413 for (i, &byte) in data.iter().enumerate() {
414 match pattern[i] {
415 Some(expected) if expected != byte => return false,
416 None => continue, _ => continue,
418 }
419 }
420
421 true
422}
423
424fn get_builtin_patterns(category: &PatternCategory) -> Vec<Pattern> {
426 match category {
427 PatternCategory::FileFormat => get_file_format_patterns(),
428 PatternCategory::Compiler => get_compiler_patterns(),
429 PatternCategory::Packer => get_packer_patterns(),
430 PatternCategory::Crypto => get_crypto_patterns(),
431 PatternCategory::Malware => get_malware_patterns(),
432 PatternCategory::Api => get_api_patterns(),
433 _ => Vec::new(),
434 }
435}
436
437fn get_file_format_patterns() -> Vec<Pattern> {
439 vec![
440 Pattern {
441 name: "PE_MZ".to_string(),
442 pattern_type: PatternType::Magic,
443 data: PatternData::Bytes(b"MZ".to_vec()),
444 category: PatternCategory::FileFormat,
445 description: "DOS/PE executable signature".to_string(),
446 },
447 Pattern {
448 name: "ELF".to_string(),
449 pattern_type: PatternType::Magic,
450 data: PatternData::Bytes(b"\x7fELF".to_vec()),
451 category: PatternCategory::FileFormat,
452 description: "ELF executable signature".to_string(),
453 },
454 Pattern {
455 name: "Mach_O_32".to_string(),
456 pattern_type: PatternType::Magic,
457 data: PatternData::Bytes(vec![0xfe, 0xed, 0xfa, 0xce]),
458 category: PatternCategory::FileFormat,
459 description: "Mach-O 32-bit signature".to_string(),
460 },
461 Pattern {
462 name: "Mach_O_64".to_string(),
463 pattern_type: PatternType::Magic,
464 data: PatternData::Bytes(vec![0xfe, 0xed, 0xfa, 0xcf]),
465 category: PatternCategory::FileFormat,
466 description: "Mach-O 64-bit signature".to_string(),
467 },
468 ]
469}
470
471fn get_compiler_patterns() -> Vec<Pattern> {
473 vec![
474 Pattern {
475 name: "GCC".to_string(),
476 pattern_type: PatternType::String,
477 data: PatternData::String("GCC:".to_string()),
478 category: PatternCategory::Compiler,
479 description: "GCC compiler signature".to_string(),
480 },
481 Pattern {
482 name: "MSVC".to_string(),
483 pattern_type: PatternType::String,
484 data: PatternData::String("Microsoft C/C++".to_string()),
485 category: PatternCategory::Compiler,
486 description: "Microsoft Visual C++ signature".to_string(),
487 },
488 ]
489}
490
491fn get_packer_patterns() -> Vec<Pattern> {
493 vec![Pattern {
494 name: "UPX".to_string(),
495 pattern_type: PatternType::String,
496 data: PatternData::String("UPX!".to_string()),
497 category: PatternCategory::Packer,
498 description: "UPX packer signature".to_string(),
499 }]
500}
501
502fn get_crypto_patterns() -> Vec<Pattern> {
504 vec![Pattern {
505 name: "MD5_Init".to_string(),
506 pattern_type: PatternType::Bytes,
507 data: PatternData::Bytes(vec![0x01, 0x23, 0x45, 0x67]), category: PatternCategory::Crypto,
509 description: "MD5 initialization constants".to_string(),
510 }]
511}
512
513fn get_malware_patterns() -> Vec<Pattern> {
515 vec![Pattern {
516 name: "Suspicious_API".to_string(),
517 pattern_type: PatternType::String,
518 data: PatternData::String("VirtualAllocEx".to_string()),
519 category: PatternCategory::Malware,
520 description: "Suspicious Windows API call".to_string(),
521 }]
522}
523
524fn get_api_patterns() -> Vec<Pattern> {
526 vec![Pattern {
527 name: "CreateProcess".to_string(),
528 pattern_type: PatternType::String,
529 data: PatternData::String("CreateProcessA".to_string()),
530 category: PatternCategory::Api,
531 description: "Windows CreateProcess API".to_string(),
532 }]
533}
534
535#[cfg(test)]
536mod tests {
537 use super::*;
538
539 #[test]
544 fn test_pattern_matcher_creation() {
545 let matcher = PatternMatcher::new();
546 assert_eq!(matcher.patterns.len(), 0);
547 assert!(matcher.config.case_sensitive);
548 assert_eq!(matcher.config.max_matches, 1000);
549 assert!(matcher.config.enable_wildcards);
550 assert_eq!(matcher.config.min_pattern_length, 3);
551 }
552
553 #[test]
554 fn test_pattern_matcher_default() {
555 let matcher = PatternMatcher::default();
556 assert_eq!(matcher.patterns.len(), 0);
557 }
558
559 #[test]
560 fn test_pattern_matcher_with_config() {
561 let config = MatchConfig {
562 case_sensitive: false,
563 max_matches: 500,
564 enable_wildcards: false,
565 min_pattern_length: 5,
566 };
567 let matcher = PatternMatcher::with_config(config.clone());
568 assert!(!matcher.config.case_sensitive);
569 assert_eq!(matcher.config.max_matches, 500);
570 assert!(!matcher.config.enable_wildcards);
571 assert_eq!(matcher.config.min_pattern_length, 5);
572 }
573
574 #[test]
575 fn test_match_config_default() {
576 let config = MatchConfig::default();
577 assert!(config.case_sensitive);
578 assert_eq!(config.max_matches, 1000);
579 assert!(config.enable_wildcards);
580 assert_eq!(config.min_pattern_length, 3);
581 }
582
583 #[test]
588 fn test_add_single_pattern() {
589 let mut matcher = PatternMatcher::new();
590 let pattern = create_test_pattern(
591 "test",
592 PatternType::Bytes,
593 PatternData::Bytes(b"test".to_vec()),
594 );
595
596 matcher.add_pattern(pattern);
597 assert_eq!(matcher.patterns.len(), 1);
598 assert_eq!(matcher.patterns[0].name, "test");
599 }
600
601 #[test]
602 fn test_add_multiple_patterns() {
603 let mut matcher = PatternMatcher::new();
604 let patterns = vec![
605 create_test_pattern(
606 "test1",
607 PatternType::Bytes,
608 PatternData::Bytes(b"test1".to_vec()),
609 ),
610 create_test_pattern(
611 "test2",
612 PatternType::String,
613 PatternData::String("test2".to_string()),
614 ),
615 ];
616
617 matcher.add_patterns(patterns);
618 assert_eq!(matcher.patterns.len(), 2);
619 }
620
621 #[test]
626 fn test_hex_wildcard_compilation() {
627 let pattern = "48 65 ?? 6c 6f";
628 let compiled = compile_hex_wildcard(pattern).unwrap();
629
630 assert_eq!(compiled.len(), 5);
631 assert_eq!(compiled[0], Some(0x48));
632 assert_eq!(compiled[1], Some(0x65));
633 assert_eq!(compiled[2], None);
634 assert_eq!(compiled[3], Some(0x6c));
635 assert_eq!(compiled[4], Some(0x6f));
636 }
637
638 #[test]
639 fn test_hex_wildcard_compilation_no_spaces() {
640 let pattern = "48656c6f";
641 let compiled = compile_hex_wildcard(pattern).unwrap();
642
643 assert_eq!(compiled.len(), 4);
644 assert_eq!(compiled[0], Some(0x48));
645 assert_eq!(compiled[1], Some(0x65));
646 assert_eq!(compiled[2], Some(0x6c));
647 assert_eq!(compiled[3], Some(0x6f));
648 }
649
650 #[test]
651 fn test_hex_wildcard_compilation_with_newlines() {
652 let pattern = "48 65\n?? 6c\n6f";
653 let compiled = compile_hex_wildcard(pattern).unwrap();
654
655 assert_eq!(compiled.len(), 5);
656 assert_eq!(compiled[2], None);
657 }
658
659 #[test]
660 fn test_hex_wildcard_compilation_error_odd_length() {
661 let pattern = "48 65 6";
662 let result = compile_hex_wildcard(pattern);
663 assert!(result.is_err());
664 }
665
666 #[test]
667 fn test_hex_wildcard_compilation_error_invalid_hex() {
668 let pattern = "48 65 XY 6c";
669 let result = compile_hex_wildcard(pattern);
670 assert!(result.is_err());
671 }
672
673 #[test]
674 fn test_hex_wildcard_compilation_all_wildcards() {
675 let pattern = "?? ?? ??";
676 let compiled = compile_hex_wildcard(pattern).unwrap();
677
678 assert_eq!(compiled.len(), 3);
679 assert_eq!(compiled[0], None);
680 assert_eq!(compiled[1], None);
681 assert_eq!(compiled[2], None);
682 }
683
684 #[test]
685 fn test_hex_wildcard_matching() {
686 let data = &[0x48, 0x65, 0x78, 0x6c, 0x6f]; let pattern = vec![Some(0x48), Some(0x65), None, Some(0x6c), Some(0x6f)];
688
689 assert!(hex_wildcard_matches(data, &pattern));
690
691 let wrong_pattern = vec![Some(0x48), Some(0x65), None, Some(0x6c), Some(0x70)];
692 assert!(!hex_wildcard_matches(data, &wrong_pattern));
693 }
694
695 #[test]
696 fn test_hex_wildcard_matching_length_mismatch() {
697 let data = &[0x48, 0x65, 0x78];
698 let pattern = vec![Some(0x48), Some(0x65), None, Some(0x6c)];
699
700 assert!(!hex_wildcard_matches(data, &pattern));
701 }
702
703 #[test]
704 fn test_hex_wildcard_matching_empty() {
705 let data = &[];
706 let pattern = vec![];
707
708 assert!(hex_wildcard_matches(data, &pattern));
709 }
710
711 #[test]
716 fn test_byte_pattern_search() {
717 let mut matcher = PatternMatcher::new();
718
719 let pattern = Pattern {
720 name: "test".to_string(),
721 pattern_type: PatternType::Bytes,
722 data: PatternData::Bytes(b"hello".to_vec()),
723 category: PatternCategory::Custom,
724 description: "Test pattern".to_string(),
725 };
726
727 matcher.add_pattern(pattern);
728
729 let data = b"This is a hello world test";
730 let results = matcher.search(data).unwrap();
731
732 assert_eq!(results.matches.len(), 1);
733 assert_eq!(results.matches[0].offset, 10);
734 assert_eq!(results.matches[0].length, 5);
735 assert_eq!(results.matches[0].data, b"hello");
736 assert_eq!(results.matches[0].confidence, 1.0);
737 }
738
739 #[test]
740 fn test_byte_pattern_search_multiple_matches() {
741 let mut matcher = PatternMatcher::new();
742 let pattern = create_test_pattern(
743 "test",
744 PatternType::Bytes,
745 PatternData::Bytes(b"abc".to_vec()),
746 );
747 matcher.add_pattern(pattern);
748
749 let data = b"abcabcabc";
750 let results = matcher.search(data).unwrap();
751
752 assert_eq!(results.matches.len(), 3); }
754
755 #[test]
756 fn test_byte_pattern_search_overlapping_matches() {
757 let mut matcher = PatternMatcher::new();
758 let pattern = create_test_pattern(
759 "test",
760 PatternType::Bytes,
761 PatternData::Bytes(b"aaa".to_vec()),
762 );
763 matcher.add_pattern(pattern);
764
765 let data = b"aaaaa";
766 let results = matcher.search(data).unwrap();
767
768 assert_eq!(results.matches.len(), 3); }
770
771 #[test]
772 fn test_byte_pattern_search_no_match() {
773 let mut matcher = PatternMatcher::new();
774 let pattern = create_test_pattern(
775 "test",
776 PatternType::Bytes,
777 PatternData::Bytes(b"xyz".to_vec()),
778 );
779 matcher.add_pattern(pattern);
780
781 let data = b"hello world";
782 let results = matcher.search(data).unwrap();
783
784 assert_eq!(results.matches.len(), 0);
785 }
786
787 #[test]
788 fn test_byte_pattern_search_too_short() {
789 let mut matcher = PatternMatcher::new();
790 let pattern = create_test_pattern(
791 "test",
792 PatternType::Bytes,
793 PatternData::Bytes(b"ab".to_vec()),
794 );
795 matcher.add_pattern(pattern);
796
797 let data = b"hello world";
798 let results = matcher.search(data).unwrap();
799
800 assert_eq!(results.matches.len(), 0); }
802
803 #[test]
804 fn test_byte_pattern_search_max_matches_limit() {
805 let config = MatchConfig {
806 max_matches: 2,
807 ..Default::default()
808 };
809 let mut matcher = PatternMatcher::with_config(config);
810 let pattern = create_test_pattern(
811 "test",
812 PatternType::Bytes,
813 PatternData::Bytes(b"test".to_vec()),
814 );
815 matcher.add_pattern(pattern);
816
817 let data = b"test test test test";
818 let results = matcher.search(data).unwrap();
819
820 assert_eq!(results.matches.len(), 2); }
822
823 #[test]
828 fn test_string_pattern_search_case_sensitive() {
829 let mut matcher = PatternMatcher::new();
830 let pattern = create_test_pattern(
831 "test",
832 PatternType::String,
833 PatternData::String("Hello".to_string()),
834 );
835 matcher.add_pattern(pattern);
836
837 let data = b"Say Hello to the world";
838 let results = matcher.search(data).unwrap();
839
840 assert_eq!(results.matches.len(), 1);
841 assert_eq!(results.matches[0].offset, 4);
842 }
843
844 #[test]
845 fn test_string_pattern_search_case_insensitive() {
846 let config = MatchConfig {
847 case_sensitive: false,
848 ..Default::default()
849 };
850 let mut matcher = PatternMatcher::with_config(config);
851 let pattern = create_test_pattern(
852 "test",
853 PatternType::String,
854 PatternData::String("HELLO".to_string()),
855 );
856 matcher.add_pattern(pattern);
857
858 let data = b"Say hello to the world";
859 let results = matcher.search(data).unwrap();
860
861 assert_eq!(results.matches.len(), 1);
862 assert_eq!(results.matches[0].offset, 4);
863 }
864
865 #[test]
866 fn test_string_pattern_search_invalid_utf8() {
867 let mut matcher = PatternMatcher::new();
868 let pattern = create_test_pattern(
869 "test",
870 PatternType::String,
871 PatternData::String("test".to_string()),
872 );
873 matcher.add_pattern(pattern);
874
875 let data = &[0xFF, 0xFE, 0xFD, 0xFC]; let results = matcher.search(data).unwrap();
877
878 assert_eq!(results.matches.len(), 0); }
880
881 #[test]
882 fn test_string_pattern_search_too_short() {
883 let mut matcher = PatternMatcher::new();
884 let pattern = create_test_pattern(
885 "test",
886 PatternType::String,
887 PatternData::String("ab".to_string()),
888 );
889 matcher.add_pattern(pattern);
890
891 let data = b"hello ab world";
892 let results = matcher.search(data).unwrap();
893
894 assert_eq!(results.matches.len(), 0); }
896
897 #[test]
902 fn test_hex_wildcard_pattern_search() {
903 let mut matcher = PatternMatcher::new();
904 let pattern = create_test_pattern(
905 "test",
906 PatternType::HexWildcard,
907 PatternData::HexWildcard("48 65 ?? 6c 6f".to_string()),
908 );
909 matcher.add_pattern(pattern);
910
911 let data = b"Hello"; let results = matcher.search(data).unwrap();
913
914 assert_eq!(results.matches.len(), 1);
915 assert_eq!(results.matches[0].offset, 0);
916 assert_eq!(results.matches[0].length, 5);
917 }
918
919 #[test]
920 fn test_hex_wildcard_pattern_search_invalid_pattern() {
921 let mut matcher = PatternMatcher::new();
922 let pattern = create_test_pattern(
923 "test",
924 PatternType::HexWildcard,
925 PatternData::HexWildcard("48 65 X".to_string()),
926 );
927 matcher.add_pattern(pattern);
928
929 let data = b"Hello";
930 let results = matcher.search(data);
931
932 assert!(results.is_err()); }
934
935 #[test]
940 fn test_magic_pattern_search_match() {
941 let mut matcher = PatternMatcher::new();
942 let pattern =
943 create_test_pattern("PE", PatternType::Magic, PatternData::Bytes(b"MZ".to_vec()));
944 matcher.add_pattern(pattern);
945
946 let data = b"MZ\x90\x00\x03\x00"; let results = matcher.search(data).unwrap();
948
949 assert_eq!(results.matches.len(), 1);
950 assert_eq!(results.matches[0].offset, 0);
951 assert_eq!(results.matches[0].length, 2);
952 }
953
954 #[test]
955 fn test_magic_pattern_search_no_match_wrong_position() {
956 let mut matcher = PatternMatcher::new();
957 let pattern =
958 create_test_pattern("PE", PatternType::Magic, PatternData::Bytes(b"MZ".to_vec()));
959 matcher.add_pattern(pattern);
960
961 let data = b"XXMZ"; let results = matcher.search(data).unwrap();
963
964 assert_eq!(results.matches.len(), 0); }
966
967 #[test]
968 fn test_magic_pattern_search_too_short() {
969 let mut matcher = PatternMatcher::new();
970 let pattern =
971 create_test_pattern("PE", PatternType::Magic, PatternData::Bytes(b"MZ".to_vec()));
972 matcher.add_pattern(pattern);
973
974 let data = b"M"; let results = matcher.search(data).unwrap();
976
977 assert_eq!(results.matches.len(), 0);
978 }
979
980 #[test]
985 fn test_regex_pattern_search_returns_empty() {
986 let mut matcher = PatternMatcher::new();
987 let pattern = create_test_pattern(
988 "test",
989 PatternType::Regex,
990 PatternData::Regex("test.*".to_string()),
991 );
992 matcher.add_pattern(pattern);
993
994 let data = b"test pattern";
995 let results = matcher.search(data).unwrap();
996
997 assert_eq!(results.matches.len(), 0); }
999
1000 #[test]
1001 fn test_structural_pattern_search_returns_empty() {
1002 let mut matcher = PatternMatcher::new();
1003 let pattern = create_test_pattern(
1004 "test",
1005 PatternType::Structural,
1006 PatternData::Bytes(b"test".to_vec()),
1007 );
1008 matcher.add_pattern(pattern);
1009
1010 let data = b"test pattern";
1011 let results = matcher.search(data).unwrap();
1012
1013 assert_eq!(results.matches.len(), 0); }
1015
1016 #[test]
1021 fn test_builtin_patterns() {
1022 let patterns = get_file_format_patterns();
1023 assert!(!patterns.is_empty());
1024
1025 let pe_pattern = patterns.iter().find(|p| p.name == "PE_MZ");
1027 assert!(pe_pattern.is_some());
1028 }
1029
1030 #[test]
1031 fn test_file_format_patterns() {
1032 let patterns = get_file_format_patterns();
1033 assert!(patterns.len() >= 4);
1034
1035 let names: Vec<&str> = patterns.iter().map(|p| p.name.as_str()).collect();
1036 assert!(names.contains(&"PE_MZ"));
1037 assert!(names.contains(&"ELF"));
1038 assert!(names.contains(&"Mach_O_32"));
1039 assert!(names.contains(&"Mach_O_64"));
1040 }
1041
1042 #[test]
1043 fn test_compiler_patterns() {
1044 let patterns = get_compiler_patterns();
1045 assert!(!patterns.is_empty());
1046
1047 let names: Vec<&str> = patterns.iter().map(|p| p.name.as_str()).collect();
1048 assert!(names.contains(&"GCC"));
1049 assert!(names.contains(&"MSVC"));
1050 }
1051
1052 #[test]
1053 fn test_packer_patterns() {
1054 let patterns = get_packer_patterns();
1055 assert!(!patterns.is_empty());
1056
1057 let names: Vec<&str> = patterns.iter().map(|p| p.name.as_str()).collect();
1058 assert!(names.contains(&"UPX"));
1059 }
1060
1061 #[test]
1062 fn test_crypto_patterns() {
1063 let patterns = get_crypto_patterns();
1064 assert!(!patterns.is_empty());
1065
1066 let names: Vec<&str> = patterns.iter().map(|p| p.name.as_str()).collect();
1067 assert!(names.contains(&"MD5_Init"));
1068 }
1069
1070 #[test]
1071 fn test_malware_patterns() {
1072 let patterns = get_malware_patterns();
1073 assert!(!patterns.is_empty());
1074
1075 let names: Vec<&str> = patterns.iter().map(|p| p.name.as_str()).collect();
1076 assert!(names.contains(&"Suspicious_API"));
1077 }
1078
1079 #[test]
1080 fn test_api_patterns() {
1081 let patterns = get_api_patterns();
1082 assert!(!patterns.is_empty());
1083
1084 let names: Vec<&str> = patterns.iter().map(|p| p.name.as_str()).collect();
1085 assert!(names.contains(&"CreateProcess"));
1086 }
1087
1088 #[test]
1089 fn test_get_builtin_patterns_unknown_category() {
1090 let patterns = get_builtin_patterns(&PatternCategory::Debug);
1091 assert!(patterns.is_empty()); let patterns = get_builtin_patterns(&PatternCategory::Network);
1094 assert!(patterns.is_empty()); }
1096
1097 #[test]
1098 fn test_load_builtin_patterns() {
1099 let mut matcher = PatternMatcher::new();
1100 let categories = vec![
1101 PatternCategory::FileFormat,
1102 PatternCategory::Compiler,
1103 PatternCategory::Packer,
1104 ];
1105
1106 matcher.load_builtin_patterns(&categories);
1107 assert!(!matcher.patterns.is_empty());
1108
1109 let format_count = matcher
1111 .patterns
1112 .iter()
1113 .filter(|p| p.category == PatternCategory::FileFormat)
1114 .count();
1115 let compiler_count = matcher
1116 .patterns
1117 .iter()
1118 .filter(|p| p.category == PatternCategory::Compiler)
1119 .count();
1120 let packer_count = matcher
1121 .patterns
1122 .iter()
1123 .filter(|p| p.category == PatternCategory::Packer)
1124 .count();
1125
1126 assert!(format_count > 0);
1127 assert!(compiler_count > 0);
1128 assert!(packer_count > 0);
1129 }
1130
1131 #[test]
1136 fn test_search_results_structure() {
1137 let mut matcher = PatternMatcher::new();
1138 let pattern1 = create_test_pattern(
1139 "test1",
1140 PatternType::Bytes,
1141 PatternData::Bytes(b"test".to_vec()),
1142 );
1143 let pattern2 = create_test_pattern(
1144 "test2",
1145 PatternType::String,
1146 PatternData::String("hello".to_string()),
1147 );
1148
1149 matcher.add_pattern(pattern1);
1150 matcher.add_pattern(pattern2);
1151
1152 let data = b"This is a test and hello world";
1153 let results = matcher.search(data).unwrap();
1154
1155 assert_eq!(results.matches.len(), 2);
1156 assert_eq!(results.bytes_searched, data.len());
1157 assert!(results.duration_ms < 10000); assert_eq!(results.by_category.len(), 1); assert_eq!(results.by_category[&PatternCategory::Custom].len(), 2);
1160 }
1161
1162 #[test]
1163 fn test_search_results_empty() {
1164 let matcher = PatternMatcher::new();
1165 let data = b"test data";
1166 let results = matcher.search(data).unwrap();
1167
1168 assert_eq!(results.matches.len(), 0);
1169 assert_eq!(results.bytes_searched, data.len());
1170 assert!(results.by_category.is_empty());
1171 }
1172
1173 #[test]
1174 fn test_search_results_category_grouping() {
1175 let mut matcher = PatternMatcher::new();
1176 let pattern1 = Pattern {
1177 name: "pe".to_string(),
1178 pattern_type: PatternType::Magic,
1179 data: PatternData::Bytes(b"MZ".to_vec()),
1180 category: PatternCategory::FileFormat,
1181 description: "PE header".to_string(),
1182 };
1183 let pattern2 = Pattern {
1184 name: "gcc".to_string(),
1185 pattern_type: PatternType::String,
1186 data: PatternData::String("GCC".to_string()),
1187 category: PatternCategory::Compiler,
1188 description: "GCC compiler".to_string(),
1189 };
1190
1191 matcher.add_pattern(pattern1);
1192 matcher.add_pattern(pattern2);
1193
1194 let data = b"MZ This binary was compiled with GCC";
1195 let results = matcher.search(data).unwrap();
1196
1197 assert_eq!(results.matches.len(), 2);
1198 assert_eq!(results.by_category.len(), 2);
1199 assert!(results
1200 .by_category
1201 .contains_key(&PatternCategory::FileFormat));
1202 assert!(results.by_category.contains_key(&PatternCategory::Compiler));
1203 }
1204
1205 #[test]
1210 fn test_pattern_types_equality() {
1211 assert_eq!(PatternType::Bytes, PatternType::Bytes);
1212 assert_ne!(PatternType::Bytes, PatternType::String);
1213 }
1214
1215 #[test]
1216 fn test_pattern_categories_equality() {
1217 assert_eq!(PatternCategory::FileFormat, PatternCategory::FileFormat);
1218 assert_ne!(PatternCategory::FileFormat, PatternCategory::Compiler);
1219 }
1220
1221 #[test]
1226 fn test_search_empty_data() {
1227 let mut matcher = PatternMatcher::new();
1228 let pattern = create_test_pattern(
1229 "test",
1230 PatternType::Bytes,
1231 PatternData::Bytes(b"test".to_vec()),
1232 );
1233 matcher.add_pattern(pattern);
1234
1235 let data = b"";
1236 let results = matcher.search(data).unwrap();
1237
1238 assert_eq!(results.matches.len(), 0);
1239 assert_eq!(results.bytes_searched, 0);
1240 }
1241
1242 #[test]
1243 fn test_search_large_data() {
1244 let mut matcher = PatternMatcher::new();
1245 let pattern = create_test_pattern(
1246 "test",
1247 PatternType::Bytes,
1248 PatternData::Bytes(b"needle".to_vec()),
1249 );
1250 matcher.add_pattern(pattern);
1251
1252 let mut data = vec![b'X'; 100000];
1253 data.extend_from_slice(b"needle");
1254 data.extend_from_slice(&vec![b'Y'; 100000]);
1255
1256 let results = matcher.search(&data).unwrap();
1257
1258 assert_eq!(results.matches.len(), 1);
1259 assert_eq!(results.matches[0].offset, 100000);
1260 }
1261
1262 #[test]
1263 fn test_pattern_match_structure() {
1264 let mut matcher = PatternMatcher::new();
1265 let pattern = create_test_pattern(
1266 "test",
1267 PatternType::Bytes,
1268 PatternData::Bytes(b"test".to_vec()),
1269 );
1270 matcher.add_pattern(pattern.clone());
1271
1272 let data = b"find test here";
1273 let results = matcher.search(data).unwrap();
1274
1275 assert_eq!(results.matches.len(), 1);
1276 let m = &results.matches[0];
1277 assert_eq!(m.pattern.name, pattern.name);
1278 assert_eq!(m.offset, 5);
1279 assert_eq!(m.length, 4);
1280 assert_eq!(m.data, b"test");
1281 assert_eq!(m.confidence, 1.0);
1282 }
1283
1284 #[test]
1285 fn test_multiple_pattern_types_search() {
1286 let mut matcher = PatternMatcher::new();
1287
1288 matcher.add_pattern(create_test_pattern(
1290 "bytes",
1291 PatternType::Bytes,
1292 PatternData::Bytes(b"test".to_vec()),
1293 ));
1294 matcher.add_pattern(create_test_pattern(
1295 "string",
1296 PatternType::String,
1297 PatternData::String("hello".to_string()),
1298 ));
1299 matcher.add_pattern(create_test_pattern(
1300 "magic",
1301 PatternType::Magic,
1302 PatternData::Bytes(b"MZ".to_vec()),
1303 ));
1304 matcher.add_pattern(create_test_pattern(
1305 "hex",
1306 PatternType::HexWildcard,
1307 PatternData::HexWildcard("77 6F ?? 6C 64".to_string()),
1308 ));
1309
1310 let data = b"MZ test hello world";
1311 let results = matcher.search(data).unwrap();
1312
1313 assert!(results.matches.len() >= 3); }
1315
1316 fn create_test_pattern(name: &str, pattern_type: PatternType, data: PatternData) -> Pattern {
1321 Pattern {
1322 name: name.to_string(),
1323 pattern_type,
1324 data,
1325 category: PatternCategory::Custom,
1326 description: format!("Test pattern: {}", name),
1327 }
1328 }
1329}