wraith/util/
pattern.rs

1//! Pattern scanning for memory and modules
2//!
3//! Supports multiple pattern formats:
4//! - IDA-style: `"48 8B 05 ?? ?? ?? ?? 48 89"` (wildcards as `??` or `?`)
5//! - Code-style: bytes + mask (`"\x48\x8B"` with `"xx??"`)
6//! - Raw bytes with mask array
7
8#[cfg(all(not(feature = "std"), feature = "alloc"))]
9use alloc::{format, string::String, vec, vec::Vec};
10
11#[cfg(feature = "std")]
12use std::{format, string::String, vec, vec::Vec};
13
14use crate::error::{Result, WraithError};
15
16/// parsed byte pattern with wildcard mask
17#[derive(Debug, Clone)]
18pub struct Pattern {
19    pub(crate) bytes: Vec<u8>,
20    pub(crate) mask: Vec<bool>, // true = wildcard (match any)
21}
22
23impl Pattern {
24    /// create pattern from bytes and mask arrays
25    pub fn from_bytes_mask(bytes: &[u8], mask: &[bool]) -> Result<Self> {
26        if bytes.len() != mask.len() {
27            return Err(WraithError::PatternParseFailed {
28                reason: "bytes and mask length mismatch".into(),
29            });
30        }
31        if bytes.is_empty() {
32            return Err(WraithError::PatternParseFailed {
33                reason: "empty pattern".into(),
34            });
35        }
36        Ok(Self {
37            bytes: bytes.to_vec(),
38            mask: mask.to_vec(),
39        })
40    }
41
42    /// create pattern from bytes and string mask
43    ///
44    /// mask format: `x` = exact match, `?` = wildcard
45    /// example: `"xx????xx"` for 8 bytes with 4 wildcards in middle
46    pub fn from_code(bytes: &[u8], mask: &str) -> Result<Self> {
47        if bytes.len() != mask.len() {
48            return Err(WraithError::PatternParseFailed {
49                reason: format!(
50                    "bytes length ({}) != mask length ({})",
51                    bytes.len(),
52                    mask.len()
53                ),
54            });
55        }
56        if bytes.is_empty() {
57            return Err(WraithError::PatternParseFailed {
58                reason: "empty pattern".into(),
59            });
60        }
61
62        let mask_bits: Vec<bool> = mask
63            .chars()
64            .map(|c| c == '?' || c == '.')
65            .collect();
66
67        Ok(Self {
68            bytes: bytes.to_vec(),
69            mask: mask_bits,
70        })
71    }
72
73    /// parse IDA-style pattern string
74    ///
75    /// format: space-separated hex bytes, `?` or `??` for wildcards
76    /// examples:
77    /// - `"48 8B 05 ?? ?? ?? ?? 48 89"`
78    /// - `"48 8B 05 ? ? ? ? 48 89"`
79    /// - `"E8 ?? ?? ?? ?? 90 90"`
80    pub fn from_ida(pattern: &str) -> Result<Self> {
81        Self::parse(pattern)
82    }
83
84    /// auto-detect and parse pattern format
85    ///
86    /// handles both IDA-style (`"48 8B ??"`) and code-style input
87    pub fn parse(pattern: &str) -> Result<Self> {
88        let trimmed = pattern.trim();
89        if trimmed.is_empty() {
90            return Err(WraithError::PatternParseFailed {
91                reason: "empty pattern".into(),
92            });
93        }
94
95        let parts: Vec<&str> = trimmed.split_whitespace().collect();
96        if parts.is_empty() {
97            return Err(WraithError::PatternParseFailed {
98                reason: "empty pattern".into(),
99            });
100        }
101
102        let mut bytes = Vec::with_capacity(parts.len());
103        let mut mask = Vec::with_capacity(parts.len());
104
105        for part in parts {
106            if part == "?" || part == "??" || part == "*" || part == "**" {
107                bytes.push(0);
108                mask.push(true);
109            } else {
110                let byte = u8::from_str_radix(part, 16).map_err(|_| {
111                    WraithError::PatternParseFailed {
112                        reason: format!("invalid hex byte: '{}'", part),
113                    }
114                })?;
115                bytes.push(byte);
116                mask.push(false);
117            }
118        }
119
120        Ok(Self { bytes, mask })
121    }
122
123    /// get pattern length
124    pub fn len(&self) -> usize {
125        self.bytes.len()
126    }
127
128    /// check if pattern is empty
129    pub fn is_empty(&self) -> bool {
130        self.bytes.is_empty()
131    }
132
133    /// check if data matches this pattern at given offset
134    #[inline]
135    fn matches_at(&self, data: &[u8], offset: usize) -> bool {
136        if offset + self.bytes.len() > data.len() {
137            return false;
138        }
139
140        self.bytes
141            .iter()
142            .zip(self.mask.iter())
143            .enumerate()
144            .all(|(i, (&pattern_byte, &is_wildcard))| {
145                is_wildcard || data[offset + i] == pattern_byte
146            })
147    }
148}
149
150/// result of a pattern scan with context
151#[derive(Debug, Clone)]
152pub struct ScanMatch {
153    /// absolute address where pattern was found
154    pub address: usize,
155    /// offset from scan start (for slice scans)
156    pub offset: usize,
157    /// name of module containing this address (if applicable)
158    pub module_name: Option<String>,
159    /// base address of containing module (if applicable)
160    pub module_base: Option<usize>,
161}
162
163impl ScanMatch {
164    fn new(address: usize, offset: usize) -> Self {
165        Self {
166            address,
167            offset,
168            module_name: None,
169            module_base: None,
170        }
171    }
172
173    fn with_module(mut self, name: String, base: usize) -> Self {
174        self.module_name = Some(name);
175        self.module_base = Some(base);
176        self
177    }
178}
179
180/// configurable pattern scanner
181///
182/// uses SIMD acceleration (AVX2/SSE2) when available and alignment is 1
183pub struct Scanner {
184    pattern: Pattern,
185    alignment: usize,
186    max_results: Option<usize>,
187    /// cached SIMD scanner for acceleration
188    simd_scanner: Option<super::simd::SimdScanner>,
189}
190
191impl Scanner {
192    /// create scanner from pattern string (auto-detect format)
193    pub fn new(pattern: &str) -> Result<Self> {
194        let parsed = Pattern::parse(pattern)?;
195        let simd_scanner = Some(super::simd::SimdScanner::new(
196            parsed.bytes.clone(),
197            parsed.mask.clone(),
198        ));
199        Ok(Self {
200            pattern: parsed,
201            alignment: 1,
202            max_results: None,
203            simd_scanner,
204        })
205    }
206
207    /// create scanner from pre-parsed pattern
208    pub fn from_pattern(pattern: Pattern) -> Self {
209        let simd_scanner = Some(super::simd::SimdScanner::new(
210            pattern.bytes.clone(),
211            pattern.mask.clone(),
212        ));
213        Self {
214            pattern,
215            alignment: 1,
216            max_results: None,
217            simd_scanner,
218        }
219    }
220
221    /// set scan alignment (1, 2, 4, 8, 16)
222    ///
223    /// patterns will only be checked at addresses aligned to this value
224    /// note: SIMD acceleration is disabled when alignment > 1
225    pub fn alignment(mut self, align: usize) -> Self {
226        self.alignment = align.max(1);
227        // disable SIMD when alignment > 1 (SIMD assumes byte alignment)
228        if self.alignment > 1 {
229            self.simd_scanner = None;
230        }
231        self
232    }
233
234    /// limit maximum number of results
235    pub fn max_results(mut self, max: usize) -> Self {
236        self.max_results = Some(max);
237        self
238    }
239
240    /// scan byte slice for pattern, returning offsets
241    pub fn scan_slice(&self, data: &[u8]) -> Vec<usize> {
242        let pattern_len = self.pattern.len();
243
244        if pattern_len > data.len() {
245            return Vec::new();
246        }
247
248        // use SIMD when alignment is 1 and no max_results limit
249        // (SIMD finds all matches, then we'd have to truncate anyway)
250        if self.alignment == 1 {
251            if let Some(ref simd) = self.simd_scanner {
252                let results = simd.scan(data);
253                // apply max_results limit if set
254                if let Some(max) = self.max_results {
255                    return results.into_iter().take(max).collect();
256                }
257                return results;
258            }
259        }
260
261        // fallback to scalar implementation
262        self.scan_slice_scalar(data)
263    }
264
265    /// scalar fallback for aligned scans
266    fn scan_slice_scalar(&self, data: &[u8]) -> Vec<usize> {
267        let mut results = Vec::new();
268        let pattern_len = self.pattern.len();
269        let max_offset = data.len() - pattern_len;
270        let mut offset = 0;
271
272        while offset <= max_offset {
273            if self.pattern.matches_at(data, offset) {
274                results.push(offset);
275                if let Some(max) = self.max_results {
276                    if results.len() >= max {
277                        break;
278                    }
279                }
280            }
281            offset += self.alignment;
282        }
283
284        results
285    }
286
287    /// scan byte slice, returning first match offset
288    pub fn scan_slice_first(&self, data: &[u8]) -> Option<usize> {
289        let pattern_len = self.pattern.len();
290
291        if pattern_len > data.len() {
292            return None;
293        }
294
295        // use SIMD for first match when alignment is 1
296        if self.alignment == 1 {
297            if let Some(ref simd) = self.simd_scanner {
298                return simd.scan_first(data);
299            }
300        }
301
302        // fallback to scalar
303        let max_offset = data.len() - pattern_len;
304        let mut offset = 0;
305
306        while offset <= max_offset {
307            if self.pattern.matches_at(data, offset) {
308                return Some(offset);
309            }
310            offset += self.alignment;
311        }
312
313        None
314    }
315
316    /// scan memory range for pattern
317    ///
318    /// # Safety
319    /// caller must ensure the memory range [start, start+size) is readable
320    pub unsafe fn scan_range(&self, start: usize, size: usize) -> Result<Vec<ScanMatch>> {
321        if start == 0 {
322            return Err(WraithError::NullPointer {
323                context: "scan_range start",
324            });
325        }
326        if size == 0 || size < self.pattern.len() {
327            return Ok(Vec::new());
328        }
329
330        // SAFETY: caller guarantees this memory is readable
331        let data = unsafe { core::slice::from_raw_parts(start as *const u8, size) };
332        let offsets = self.scan_slice(data);
333
334        Ok(offsets
335            .into_iter()
336            .map(|offset| ScanMatch::new(start + offset, offset))
337            .collect())
338    }
339
340    /// scan memory range, returning first match
341    ///
342    /// # Safety
343    /// caller must ensure the memory range [start, start+size) is readable
344    pub unsafe fn scan_range_first(&self, start: usize, size: usize) -> Result<Option<ScanMatch>> {
345        if start == 0 {
346            return Err(WraithError::NullPointer {
347                context: "scan_range_first start",
348            });
349        }
350        if size == 0 || size < self.pattern.len() {
351            return Ok(None);
352        }
353
354        // SAFETY: caller guarantees this memory is readable
355        let data = unsafe { core::slice::from_raw_parts(start as *const u8, size) };
356
357        Ok(self
358            .scan_slice_first(data)
359            .map(|offset| ScanMatch::new(start + offset, offset)))
360    }
361}
362
363/// pattern scanner for byte slices (backward compatible API)
364pub struct PatternScanner<'a> {
365    data: &'a [u8],
366}
367
368impl<'a> PatternScanner<'a> {
369    pub fn new(data: &'a [u8]) -> Self {
370        Self { data }
371    }
372
373    /// scan for pattern with wildcards
374    ///
375    /// pattern format: `"48 8B ? ? 90"` where `?` is wildcard
376    pub fn find(&self, pattern: &str) -> Option<usize> {
377        let parsed = Pattern::parse(pattern).ok()?;
378        let scanner = Scanner::from_pattern(parsed);
379        scanner.scan_slice_first(self.data)
380    }
381
382    /// find all occurrences of pattern
383    pub fn find_all(&self, pattern: &str) -> Vec<usize> {
384        match Pattern::parse(pattern) {
385            Ok(parsed) => {
386                let scanner = Scanner::from_pattern(parsed);
387                scanner.scan_slice(self.data)
388            }
389            Err(_) => vec![],
390        }
391    }
392}
393
394// ============================================================================
395// Module and Memory Region Scanning (requires navigation feature)
396// ============================================================================
397
398#[cfg(feature = "navigation")]
399mod navigation_scan {
400    use super::*;
401    use crate::navigation::{MemoryRegion, MemoryRegionIterator, Module, ModuleQuery};
402    use crate::structures::Peb;
403
404    impl Scanner {
405        /// scan loaded module for pattern
406        pub fn scan_module(&self, module: &Module) -> Result<Vec<ScanMatch>> {
407            let base = module.base();
408            let size = module.size();
409            let name = module.name();
410
411            // SAFETY: module memory is mapped and readable for loaded modules
412            let matches = unsafe { self.scan_range(base, size)? };
413
414            Ok(matches
415                .into_iter()
416                .map(|m| m.with_module(name.clone(), base))
417                .collect())
418        }
419
420        /// scan module, returning first match
421        pub fn scan_module_first(&self, module: &Module) -> Result<Option<ScanMatch>> {
422            let base = module.base();
423            let size = module.size();
424            let name = module.name();
425
426            // SAFETY: module memory is mapped and readable
427            let result = unsafe { self.scan_range_first(base, size)? };
428
429            Ok(result.map(|m| m.with_module(name, base)))
430        }
431
432        /// scan memory region for pattern
433        pub fn scan_region(&self, region: &MemoryRegion) -> Result<Vec<ScanMatch>> {
434            if !region.is_committed() || !region.is_readable() {
435                return Ok(Vec::new());
436            }
437
438            // SAFETY: region is committed and readable
439            unsafe { self.scan_range(region.base_address, region.region_size) }
440        }
441
442        /// scan all executable memory regions
443        pub fn scan_executable_regions(&self) -> Result<Vec<ScanMatch>> {
444            let mut all_matches = Vec::new();
445
446            for region in MemoryRegionIterator::new() {
447                if !region.is_committed() || !region.is_readable() || !region.is_executable() {
448                    continue;
449                }
450
451                // SAFETY: region is committed and readable
452                let matches = unsafe { self.scan_range(region.base_address, region.region_size)? };
453                all_matches.extend(matches);
454
455                if let Some(max) = self.max_results {
456                    if all_matches.len() >= max {
457                        all_matches.truncate(max);
458                        break;
459                    }
460                }
461            }
462
463            Ok(all_matches)
464        }
465
466        /// scan all committed memory regions (more thorough, slower)
467        pub fn scan_all_regions(&self) -> Result<Vec<ScanMatch>> {
468            let mut all_matches = Vec::new();
469
470            for region in MemoryRegionIterator::new() {
471                if !region.is_committed() || !region.is_readable() {
472                    continue;
473                }
474
475                // SAFETY: region is committed and readable
476                let matches = unsafe { self.scan_range(region.base_address, region.region_size)? };
477                all_matches.extend(matches);
478
479                if let Some(max) = self.max_results {
480                    if all_matches.len() >= max {
481                        all_matches.truncate(max);
482                        break;
483                    }
484                }
485            }
486
487            Ok(all_matches)
488        }
489    }
490
491    /// find pattern in module by name
492    ///
493    /// convenience function that looks up the module and scans it
494    pub fn find_pattern_in_module(module_name: &str, pattern: &str) -> Result<Vec<ScanMatch>> {
495        let peb = Peb::current()?;
496        let query = ModuleQuery::new(&peb);
497        let module = query.find_by_name(module_name)?;
498        Scanner::new(pattern)?.scan_module(&module)
499    }
500
501    /// find first pattern match in module
502    pub fn find_pattern_in_module_first(
503        module_name: &str,
504        pattern: &str,
505    ) -> Result<Option<ScanMatch>> {
506        let peb = Peb::current()?;
507        let query = ModuleQuery::new(&peb);
508        let module = query.find_by_name(module_name)?;
509        Scanner::new(pattern)?.scan_module_first(&module)
510    }
511
512    /// scan all loaded modules for pattern
513    pub fn find_pattern_all_modules(pattern: &str) -> Result<Vec<ScanMatch>> {
514        let peb = Peb::current()?;
515        let scanner = Scanner::new(pattern)?;
516        let mut all_matches = Vec::new();
517
518        for module in crate::navigation::InLoadOrderIter::new(&peb)? {
519            match scanner.scan_module(&module) {
520                Ok(matches) => all_matches.extend(matches),
521                Err(_) => continue, // skip modules we can't scan
522            }
523        }
524
525        Ok(all_matches)
526    }
527
528    /// scan all executable regions for pattern
529    pub fn find_pattern_executable(pattern: &str) -> Result<Vec<ScanMatch>> {
530        Scanner::new(pattern)?.scan_executable_regions()
531    }
532
533    /// find pattern in specific memory region
534    pub fn find_pattern_in_region(region: &MemoryRegion, pattern: &str) -> Result<Vec<ScanMatch>> {
535        Scanner::new(pattern)?.scan_region(region)
536    }
537}
538
539#[cfg(feature = "navigation")]
540pub use navigation_scan::*;
541
542#[cfg(test)]
543mod tests {
544    use super::*;
545
546    #[test]
547    fn test_pattern_parse_ida() {
548        let p = Pattern::parse("48 8B 05").unwrap();
549        assert_eq!(p.bytes, vec![0x48, 0x8B, 0x05]);
550        assert_eq!(p.mask, vec![false, false, false]);
551    }
552
553    #[test]
554    fn test_pattern_parse_wildcards() {
555        let p = Pattern::parse("48 8B ?? ?? 90").unwrap();
556        assert_eq!(p.bytes, vec![0x48, 0x8B, 0, 0, 0x90]);
557        assert_eq!(p.mask, vec![false, false, true, true, false]);
558    }
559
560    #[test]
561    fn test_pattern_parse_single_wildcard() {
562        let p = Pattern::parse("48 ? 05").unwrap();
563        assert_eq!(p.bytes, vec![0x48, 0, 0x05]);
564        assert_eq!(p.mask, vec![false, true, false]);
565    }
566
567    #[test]
568    fn test_pattern_from_code() {
569        let bytes = [0x48, 0x8B, 0x05, 0x00];
570        let p = Pattern::from_code(&bytes, "xx??").unwrap();
571        assert_eq!(p.bytes, vec![0x48, 0x8B, 0x05, 0x00]);
572        assert_eq!(p.mask, vec![false, false, true, true]);
573    }
574
575    #[test]
576    fn test_scanner_find() {
577        let data = [0x48, 0x8B, 0x05, 0x12, 0x34, 0x56, 0x78, 0x90];
578        let scanner = PatternScanner::new(&data);
579
580        assert_eq!(scanner.find("48 8B 05"), Some(0));
581        assert_eq!(scanner.find("48 8B ? ? 34"), Some(0));
582        assert_eq!(scanner.find("FF FF"), None);
583    }
584
585    #[test]
586    fn test_scanner_find_all() {
587        let data = [0x48, 0x8B, 0x48, 0x8B, 0x48, 0x8B];
588        let scanner = PatternScanner::new(&data);
589
590        let results = scanner.find_all("48 8B");
591        assert_eq!(results, vec![0, 2, 4]);
592    }
593
594    #[test]
595    fn test_scanner_alignment() {
596        let data = [0x48, 0x8B, 0x48, 0x8B, 0x48, 0x8B, 0x48, 0x8B];
597        let scanner = Scanner::new("48 8B").unwrap().alignment(4);
598
599        let results = scanner.scan_slice(&data);
600        // only matches at offsets 0 and 4 (aligned to 4)
601        assert_eq!(results, vec![0, 4]);
602    }
603
604    #[test]
605    fn test_scanner_max_results() {
606        let data = [0x48, 0x48, 0x48, 0x48, 0x48, 0x48, 0x48, 0x48];
607        let scanner = Scanner::new("48").unwrap().max_results(3);
608
609        let results = scanner.scan_slice(&data);
610        assert_eq!(results.len(), 3);
611    }
612
613    #[test]
614    fn test_empty_pattern_error() {
615        assert!(Pattern::parse("").is_err());
616        assert!(Pattern::parse("   ").is_err());
617    }
618
619    #[test]
620    fn test_invalid_hex_error() {
621        assert!(Pattern::parse("48 ZZ 05").is_err());
622        assert!(Pattern::parse("GG").is_err());
623    }
624
625    #[test]
626    fn test_pattern_length_mismatch() {
627        assert!(Pattern::from_code(&[0x48, 0x8B], "x").is_err());
628        assert!(Pattern::from_bytes_mask(&[0x48], &[false, true]).is_err());
629    }
630}