wraith/util/
pattern.rs

1//! Pattern scanning for memory and modules
2//!
3//! Supports multiple pattern formats:
4//! - IDA-style: `"48 8B 05 ?? ?? ?? ?? 48 89"` (wildcards as `??` or `?`)
5//! - Code-style: bytes + mask (`"\x48\x8B"` with `"xx??"`)
6//! - Raw bytes with mask array
7
8use crate::error::{Result, WraithError};
9
10/// parsed byte pattern with wildcard mask
11#[derive(Debug, Clone)]
12pub struct Pattern {
13    pub(crate) bytes: Vec<u8>,
14    pub(crate) mask: Vec<bool>, // true = wildcard (match any)
15}
16
17impl Pattern {
18    /// create pattern from bytes and mask arrays
19    pub fn from_bytes_mask(bytes: &[u8], mask: &[bool]) -> Result<Self> {
20        if bytes.len() != mask.len() {
21            return Err(WraithError::PatternParseFailed {
22                reason: "bytes and mask length mismatch".into(),
23            });
24        }
25        if bytes.is_empty() {
26            return Err(WraithError::PatternParseFailed {
27                reason: "empty pattern".into(),
28            });
29        }
30        Ok(Self {
31            bytes: bytes.to_vec(),
32            mask: mask.to_vec(),
33        })
34    }
35
36    /// create pattern from bytes and string mask
37    ///
38    /// mask format: `x` = exact match, `?` = wildcard
39    /// example: `"xx????xx"` for 8 bytes with 4 wildcards in middle
40    pub fn from_code(bytes: &[u8], mask: &str) -> Result<Self> {
41        if bytes.len() != mask.len() {
42            return Err(WraithError::PatternParseFailed {
43                reason: format!(
44                    "bytes length ({}) != mask length ({})",
45                    bytes.len(),
46                    mask.len()
47                ),
48            });
49        }
50        if bytes.is_empty() {
51            return Err(WraithError::PatternParseFailed {
52                reason: "empty pattern".into(),
53            });
54        }
55
56        let mask_bits: Vec<bool> = mask
57            .chars()
58            .map(|c| c == '?' || c == '.')
59            .collect();
60
61        Ok(Self {
62            bytes: bytes.to_vec(),
63            mask: mask_bits,
64        })
65    }
66
67    /// parse IDA-style pattern string
68    ///
69    /// format: space-separated hex bytes, `?` or `??` for wildcards
70    /// examples:
71    /// - `"48 8B 05 ?? ?? ?? ?? 48 89"`
72    /// - `"48 8B 05 ? ? ? ? 48 89"`
73    /// - `"E8 ?? ?? ?? ?? 90 90"`
74    pub fn from_ida(pattern: &str) -> Result<Self> {
75        Self::parse(pattern)
76    }
77
78    /// auto-detect and parse pattern format
79    ///
80    /// handles both IDA-style (`"48 8B ??"`) and code-style input
81    pub fn parse(pattern: &str) -> Result<Self> {
82        let trimmed = pattern.trim();
83        if trimmed.is_empty() {
84            return Err(WraithError::PatternParseFailed {
85                reason: "empty pattern".into(),
86            });
87        }
88
89        let parts: Vec<&str> = trimmed.split_whitespace().collect();
90        if parts.is_empty() {
91            return Err(WraithError::PatternParseFailed {
92                reason: "empty pattern".into(),
93            });
94        }
95
96        let mut bytes = Vec::with_capacity(parts.len());
97        let mut mask = Vec::with_capacity(parts.len());
98
99        for part in parts {
100            if part == "?" || part == "??" || part == "*" || part == "**" {
101                bytes.push(0);
102                mask.push(true);
103            } else {
104                let byte = u8::from_str_radix(part, 16).map_err(|_| {
105                    WraithError::PatternParseFailed {
106                        reason: format!("invalid hex byte: '{}'", part),
107                    }
108                })?;
109                bytes.push(byte);
110                mask.push(false);
111            }
112        }
113
114        Ok(Self { bytes, mask })
115    }
116
117    /// get pattern length
118    pub fn len(&self) -> usize {
119        self.bytes.len()
120    }
121
122    /// check if pattern is empty
123    pub fn is_empty(&self) -> bool {
124        self.bytes.is_empty()
125    }
126
127    /// check if data matches this pattern at given offset
128    #[inline]
129    fn matches_at(&self, data: &[u8], offset: usize) -> bool {
130        if offset + self.bytes.len() > data.len() {
131            return false;
132        }
133
134        self.bytes
135            .iter()
136            .zip(self.mask.iter())
137            .enumerate()
138            .all(|(i, (&pattern_byte, &is_wildcard))| {
139                is_wildcard || data[offset + i] == pattern_byte
140            })
141    }
142}
143
144/// result of a pattern scan with context
145#[derive(Debug, Clone)]
146pub struct ScanMatch {
147    /// absolute address where pattern was found
148    pub address: usize,
149    /// offset from scan start (for slice scans)
150    pub offset: usize,
151    /// name of module containing this address (if applicable)
152    pub module_name: Option<String>,
153    /// base address of containing module (if applicable)
154    pub module_base: Option<usize>,
155}
156
157impl ScanMatch {
158    fn new(address: usize, offset: usize) -> Self {
159        Self {
160            address,
161            offset,
162            module_name: None,
163            module_base: None,
164        }
165    }
166
167    fn with_module(mut self, name: String, base: usize) -> Self {
168        self.module_name = Some(name);
169        self.module_base = Some(base);
170        self
171    }
172}
173
174/// configurable pattern scanner
175///
176/// uses SIMD acceleration (AVX2/SSE2) when available and alignment is 1
177pub struct Scanner {
178    pattern: Pattern,
179    alignment: usize,
180    max_results: Option<usize>,
181    /// cached SIMD scanner for acceleration
182    simd_scanner: Option<super::simd::SimdScanner>,
183}
184
185impl Scanner {
186    /// create scanner from pattern string (auto-detect format)
187    pub fn new(pattern: &str) -> Result<Self> {
188        let parsed = Pattern::parse(pattern)?;
189        let simd_scanner = Some(super::simd::SimdScanner::new(
190            parsed.bytes.clone(),
191            parsed.mask.clone(),
192        ));
193        Ok(Self {
194            pattern: parsed,
195            alignment: 1,
196            max_results: None,
197            simd_scanner,
198        })
199    }
200
201    /// create scanner from pre-parsed pattern
202    pub fn from_pattern(pattern: Pattern) -> Self {
203        let simd_scanner = Some(super::simd::SimdScanner::new(
204            pattern.bytes.clone(),
205            pattern.mask.clone(),
206        ));
207        Self {
208            pattern,
209            alignment: 1,
210            max_results: None,
211            simd_scanner,
212        }
213    }
214
215    /// set scan alignment (1, 2, 4, 8, 16)
216    ///
217    /// patterns will only be checked at addresses aligned to this value
218    /// note: SIMD acceleration is disabled when alignment > 1
219    pub fn alignment(mut self, align: usize) -> Self {
220        self.alignment = align.max(1);
221        // disable SIMD when alignment > 1 (SIMD assumes byte alignment)
222        if self.alignment > 1 {
223            self.simd_scanner = None;
224        }
225        self
226    }
227
228    /// limit maximum number of results
229    pub fn max_results(mut self, max: usize) -> Self {
230        self.max_results = Some(max);
231        self
232    }
233
234    /// scan byte slice for pattern, returning offsets
235    pub fn scan_slice(&self, data: &[u8]) -> Vec<usize> {
236        let pattern_len = self.pattern.len();
237
238        if pattern_len > data.len() {
239            return Vec::new();
240        }
241
242        // use SIMD when alignment is 1 and no max_results limit
243        // (SIMD finds all matches, then we'd have to truncate anyway)
244        if self.alignment == 1 {
245            if let Some(ref simd) = self.simd_scanner {
246                let results = simd.scan(data);
247                // apply max_results limit if set
248                if let Some(max) = self.max_results {
249                    return results.into_iter().take(max).collect();
250                }
251                return results;
252            }
253        }
254
255        // fallback to scalar implementation
256        self.scan_slice_scalar(data)
257    }
258
259    /// scalar fallback for aligned scans
260    fn scan_slice_scalar(&self, data: &[u8]) -> Vec<usize> {
261        let mut results = Vec::new();
262        let pattern_len = self.pattern.len();
263        let max_offset = data.len() - pattern_len;
264        let mut offset = 0;
265
266        while offset <= max_offset {
267            if self.pattern.matches_at(data, offset) {
268                results.push(offset);
269                if let Some(max) = self.max_results {
270                    if results.len() >= max {
271                        break;
272                    }
273                }
274            }
275            offset += self.alignment;
276        }
277
278        results
279    }
280
281    /// scan byte slice, returning first match offset
282    pub fn scan_slice_first(&self, data: &[u8]) -> Option<usize> {
283        let pattern_len = self.pattern.len();
284
285        if pattern_len > data.len() {
286            return None;
287        }
288
289        // use SIMD for first match when alignment is 1
290        if self.alignment == 1 {
291            if let Some(ref simd) = self.simd_scanner {
292                return simd.scan_first(data);
293            }
294        }
295
296        // fallback to scalar
297        let max_offset = data.len() - pattern_len;
298        let mut offset = 0;
299
300        while offset <= max_offset {
301            if self.pattern.matches_at(data, offset) {
302                return Some(offset);
303            }
304            offset += self.alignment;
305        }
306
307        None
308    }
309
310    /// scan memory range for pattern
311    ///
312    /// # Safety
313    /// caller must ensure the memory range [start, start+size) is readable
314    pub unsafe fn scan_range(&self, start: usize, size: usize) -> Result<Vec<ScanMatch>> {
315        if start == 0 {
316            return Err(WraithError::NullPointer {
317                context: "scan_range start",
318            });
319        }
320        if size == 0 || size < self.pattern.len() {
321            return Ok(Vec::new());
322        }
323
324        // SAFETY: caller guarantees this memory is readable
325        let data = unsafe { core::slice::from_raw_parts(start as *const u8, size) };
326        let offsets = self.scan_slice(data);
327
328        Ok(offsets
329            .into_iter()
330            .map(|offset| ScanMatch::new(start + offset, offset))
331            .collect())
332    }
333
334    /// scan memory range, returning first match
335    ///
336    /// # Safety
337    /// caller must ensure the memory range [start, start+size) is readable
338    pub unsafe fn scan_range_first(&self, start: usize, size: usize) -> Result<Option<ScanMatch>> {
339        if start == 0 {
340            return Err(WraithError::NullPointer {
341                context: "scan_range_first start",
342            });
343        }
344        if size == 0 || size < self.pattern.len() {
345            return Ok(None);
346        }
347
348        // SAFETY: caller guarantees this memory is readable
349        let data = unsafe { core::slice::from_raw_parts(start as *const u8, size) };
350
351        Ok(self
352            .scan_slice_first(data)
353            .map(|offset| ScanMatch::new(start + offset, offset)))
354    }
355}
356
357/// pattern scanner for byte slices (backward compatible API)
358pub struct PatternScanner<'a> {
359    data: &'a [u8],
360}
361
362impl<'a> PatternScanner<'a> {
363    pub fn new(data: &'a [u8]) -> Self {
364        Self { data }
365    }
366
367    /// scan for pattern with wildcards
368    ///
369    /// pattern format: `"48 8B ? ? 90"` where `?` is wildcard
370    pub fn find(&self, pattern: &str) -> Option<usize> {
371        let parsed = Pattern::parse(pattern).ok()?;
372        let scanner = Scanner::from_pattern(parsed);
373        scanner.scan_slice_first(self.data)
374    }
375
376    /// find all occurrences of pattern
377    pub fn find_all(&self, pattern: &str) -> Vec<usize> {
378        match Pattern::parse(pattern) {
379            Ok(parsed) => {
380                let scanner = Scanner::from_pattern(parsed);
381                scanner.scan_slice(self.data)
382            }
383            Err(_) => vec![],
384        }
385    }
386}
387
388// ============================================================================
389// Module and Memory Region Scanning (requires navigation feature)
390// ============================================================================
391
392#[cfg(feature = "navigation")]
393mod navigation_scan {
394    use super::*;
395    use crate::navigation::{MemoryRegion, MemoryRegionIterator, Module, ModuleQuery};
396    use crate::structures::Peb;
397
398    impl Scanner {
399        /// scan loaded module for pattern
400        pub fn scan_module(&self, module: &Module) -> Result<Vec<ScanMatch>> {
401            let base = module.base();
402            let size = module.size();
403            let name = module.name();
404
405            // SAFETY: module memory is mapped and readable for loaded modules
406            let matches = unsafe { self.scan_range(base, size)? };
407
408            Ok(matches
409                .into_iter()
410                .map(|m| m.with_module(name.clone(), base))
411                .collect())
412        }
413
414        /// scan module, returning first match
415        pub fn scan_module_first(&self, module: &Module) -> Result<Option<ScanMatch>> {
416            let base = module.base();
417            let size = module.size();
418            let name = module.name();
419
420            // SAFETY: module memory is mapped and readable
421            let result = unsafe { self.scan_range_first(base, size)? };
422
423            Ok(result.map(|m| m.with_module(name, base)))
424        }
425
426        /// scan memory region for pattern
427        pub fn scan_region(&self, region: &MemoryRegion) -> Result<Vec<ScanMatch>> {
428            if !region.is_committed() || !region.is_readable() {
429                return Ok(Vec::new());
430            }
431
432            // SAFETY: region is committed and readable
433            unsafe { self.scan_range(region.base_address, region.region_size) }
434        }
435
436        /// scan all executable memory regions
437        pub fn scan_executable_regions(&self) -> Result<Vec<ScanMatch>> {
438            let mut all_matches = Vec::new();
439
440            for region in MemoryRegionIterator::new() {
441                if !region.is_committed() || !region.is_readable() || !region.is_executable() {
442                    continue;
443                }
444
445                // SAFETY: region is committed and readable
446                let matches = unsafe { self.scan_range(region.base_address, region.region_size)? };
447                all_matches.extend(matches);
448
449                if let Some(max) = self.max_results {
450                    if all_matches.len() >= max {
451                        all_matches.truncate(max);
452                        break;
453                    }
454                }
455            }
456
457            Ok(all_matches)
458        }
459
460        /// scan all committed memory regions (more thorough, slower)
461        pub fn scan_all_regions(&self) -> Result<Vec<ScanMatch>> {
462            let mut all_matches = Vec::new();
463
464            for region in MemoryRegionIterator::new() {
465                if !region.is_committed() || !region.is_readable() {
466                    continue;
467                }
468
469                // SAFETY: region is committed and readable
470                let matches = unsafe { self.scan_range(region.base_address, region.region_size)? };
471                all_matches.extend(matches);
472
473                if let Some(max) = self.max_results {
474                    if all_matches.len() >= max {
475                        all_matches.truncate(max);
476                        break;
477                    }
478                }
479            }
480
481            Ok(all_matches)
482        }
483    }
484
485    /// find pattern in module by name
486    ///
487    /// convenience function that looks up the module and scans it
488    pub fn find_pattern_in_module(module_name: &str, pattern: &str) -> Result<Vec<ScanMatch>> {
489        let peb = Peb::current()?;
490        let query = ModuleQuery::new(&peb);
491        let module = query.find_by_name(module_name)?;
492        Scanner::new(pattern)?.scan_module(&module)
493    }
494
495    /// find first pattern match in module
496    pub fn find_pattern_in_module_first(
497        module_name: &str,
498        pattern: &str,
499    ) -> Result<Option<ScanMatch>> {
500        let peb = Peb::current()?;
501        let query = ModuleQuery::new(&peb);
502        let module = query.find_by_name(module_name)?;
503        Scanner::new(pattern)?.scan_module_first(&module)
504    }
505
506    /// scan all loaded modules for pattern
507    pub fn find_pattern_all_modules(pattern: &str) -> Result<Vec<ScanMatch>> {
508        let peb = Peb::current()?;
509        let scanner = Scanner::new(pattern)?;
510        let mut all_matches = Vec::new();
511
512        for module in crate::navigation::InLoadOrderIter::new(&peb)? {
513            match scanner.scan_module(&module) {
514                Ok(matches) => all_matches.extend(matches),
515                Err(_) => continue, // skip modules we can't scan
516            }
517        }
518
519        Ok(all_matches)
520    }
521
522    /// scan all executable regions for pattern
523    pub fn find_pattern_executable(pattern: &str) -> Result<Vec<ScanMatch>> {
524        Scanner::new(pattern)?.scan_executable_regions()
525    }
526
527    /// find pattern in specific memory region
528    pub fn find_pattern_in_region(region: &MemoryRegion, pattern: &str) -> Result<Vec<ScanMatch>> {
529        Scanner::new(pattern)?.scan_region(region)
530    }
531}
532
533#[cfg(feature = "navigation")]
534pub use navigation_scan::*;
535
536#[cfg(test)]
537mod tests {
538    use super::*;
539
540    #[test]
541    fn test_pattern_parse_ida() {
542        let p = Pattern::parse("48 8B 05").unwrap();
543        assert_eq!(p.bytes, vec![0x48, 0x8B, 0x05]);
544        assert_eq!(p.mask, vec![false, false, false]);
545    }
546
547    #[test]
548    fn test_pattern_parse_wildcards() {
549        let p = Pattern::parse("48 8B ?? ?? 90").unwrap();
550        assert_eq!(p.bytes, vec![0x48, 0x8B, 0, 0, 0x90]);
551        assert_eq!(p.mask, vec![false, false, true, true, false]);
552    }
553
554    #[test]
555    fn test_pattern_parse_single_wildcard() {
556        let p = Pattern::parse("48 ? 05").unwrap();
557        assert_eq!(p.bytes, vec![0x48, 0, 0x05]);
558        assert_eq!(p.mask, vec![false, true, false]);
559    }
560
561    #[test]
562    fn test_pattern_from_code() {
563        let bytes = [0x48, 0x8B, 0x05, 0x00];
564        let p = Pattern::from_code(&bytes, "xx??").unwrap();
565        assert_eq!(p.bytes, vec![0x48, 0x8B, 0x05, 0x00]);
566        assert_eq!(p.mask, vec![false, false, true, true]);
567    }
568
569    #[test]
570    fn test_scanner_find() {
571        let data = [0x48, 0x8B, 0x05, 0x12, 0x34, 0x56, 0x78, 0x90];
572        let scanner = PatternScanner::new(&data);
573
574        assert_eq!(scanner.find("48 8B 05"), Some(0));
575        assert_eq!(scanner.find("48 8B ? ? 34"), Some(0));
576        assert_eq!(scanner.find("FF FF"), None);
577    }
578
579    #[test]
580    fn test_scanner_find_all() {
581        let data = [0x48, 0x8B, 0x48, 0x8B, 0x48, 0x8B];
582        let scanner = PatternScanner::new(&data);
583
584        let results = scanner.find_all("48 8B");
585        assert_eq!(results, vec![0, 2, 4]);
586    }
587
588    #[test]
589    fn test_scanner_alignment() {
590        let data = [0x48, 0x8B, 0x48, 0x8B, 0x48, 0x8B, 0x48, 0x8B];
591        let scanner = Scanner::new("48 8B").unwrap().alignment(4);
592
593        let results = scanner.scan_slice(&data);
594        // only matches at offsets 0 and 4 (aligned to 4)
595        assert_eq!(results, vec![0, 4]);
596    }
597
598    #[test]
599    fn test_scanner_max_results() {
600        let data = [0x48, 0x48, 0x48, 0x48, 0x48, 0x48, 0x48, 0x48];
601        let scanner = Scanner::new("48").unwrap().max_results(3);
602
603        let results = scanner.scan_slice(&data);
604        assert_eq!(results.len(), 3);
605    }
606
607    #[test]
608    fn test_empty_pattern_error() {
609        assert!(Pattern::parse("").is_err());
610        assert!(Pattern::parse("   ").is_err());
611    }
612
613    #[test]
614    fn test_invalid_hex_error() {
615        assert!(Pattern::parse("48 ZZ 05").is_err());
616        assert!(Pattern::parse("GG").is_err());
617    }
618
619    #[test]
620    fn test_pattern_length_mismatch() {
621        assert!(Pattern::from_code(&[0x48, 0x8B], "x").is_err());
622        assert!(Pattern::from_bytes_mask(&[0x48], &[false, true]).is_err());
623    }
624}