Skip to main content

procmod_scan/
pattern.rs

1use crate::error::{Error, Result};
2
3/// A single element in a scan pattern.
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum Token {
6    /// Matches a specific byte value.
7    Exact(u8),
8    /// Matches any byte value.
9    Wildcard,
10}
11
12/// A byte pattern used for scanning memory regions.
13///
14/// Patterns consist of exact byte matches and wildcard positions. They can be
15/// constructed from IDA-style signature strings or code-style byte/mask pairs.
16#[derive(Debug, Clone)]
17pub struct Pattern {
18    tokens: Vec<Token>,
19}
20
21impl Pattern {
22    /// Creates a pattern from an IDA-style signature string.
23    ///
24    /// Each token is separated by whitespace. Exact bytes are specified as
25    /// two-character hex values. Wildcards are represented by `?` or `??`.
26    ///
27    /// ```
28    /// use procmod_scan::Pattern;
29    ///
30    /// let pattern = Pattern::from_ida("48 8B ?? 89 ? 0F").unwrap();
31    /// ```
32    pub fn from_ida(signature: &str) -> Result<Self> {
33        let tokens = signature
34            .split_whitespace()
35            .map(|tok| match tok {
36                "?" | "??" => Ok(Token::Wildcard),
37                hex => {
38                    if hex.len() != 2 {
39                        return Err(Error::InvalidPattern(format!(
40                            "expected 2-character hex token, got '{hex}'"
41                        )));
42                    }
43                    u8::from_str_radix(hex, 16)
44                        .map(Token::Exact)
45                        .map_err(|_| Error::InvalidPattern(format!("invalid hex byte '{hex}'")))
46                }
47            })
48            .collect::<Result<Vec<_>>>()?;
49
50        if tokens.is_empty() {
51            return Err(Error::InvalidPattern("pattern is empty".into()));
52        }
53
54        Ok(Self { tokens })
55    }
56
57    /// Creates a pattern from a code-style byte/mask pair.
58    ///
59    /// The mask string uses `x` for exact byte matches and `?` for wildcards,
60    /// one character per byte. The mask length must equal the bytes length.
61    ///
62    /// ```
63    /// use procmod_scan::Pattern;
64    ///
65    /// let pattern = Pattern::from_code(b"\x48\x8B\x00\x89", "xx?x").unwrap();
66    /// ```
67    pub fn from_code(bytes: &[u8], mask: &str) -> Result<Self> {
68        if bytes.len() != mask.len() {
69            return Err(Error::InvalidPattern(format!(
70                "bytes length ({}) does not match mask length ({})",
71                bytes.len(),
72                mask.len()
73            )));
74        }
75
76        if bytes.is_empty() {
77            return Err(Error::InvalidPattern("pattern is empty".into()));
78        }
79
80        let tokens = bytes
81            .iter()
82            .zip(mask.chars())
83            .map(|(&byte, m)| match m {
84                'x' => Ok(Token::Exact(byte)),
85                '?' => Ok(Token::Wildcard),
86                other => Err(Error::InvalidPattern(format!(
87                    "invalid mask character '{other}', expected 'x' or '?'"
88                ))),
89            })
90            .collect::<Result<Vec<_>>>()?;
91
92        Ok(Self { tokens })
93    }
94
95    /// Creates a pattern from raw tokens.
96    ///
97    /// Returns an error if the token list is empty.
98    pub fn from_tokens(tokens: Vec<Token>) -> Result<Self> {
99        if tokens.is_empty() {
100            return Err(Error::InvalidPattern("pattern is empty".into()));
101        }
102        Ok(Self { tokens })
103    }
104
105    /// Returns the tokens in this pattern.
106    pub fn tokens(&self) -> &[Token] {
107        &self.tokens
108    }
109
110    /// Returns the number of tokens in this pattern.
111    pub fn len(&self) -> usize {
112        self.tokens.len()
113    }
114
115    /// Returns true if the pattern has no tokens.
116    pub fn is_empty(&self) -> bool {
117        self.tokens.is_empty()
118    }
119
120    /// Finds all offsets in `data` where this pattern matches.
121    ///
122    /// Returns an empty vec if no matches are found. Matches may overlap.
123    pub fn scan(&self, data: &[u8]) -> Vec<usize> {
124        if data.len() < self.tokens.len() {
125            return Vec::new();
126        }
127
128        let prefix = exact_prefix(&self.tokens);
129
130        if prefix.len() >= 2 {
131            scan_prefix_filtered(data, &self.tokens, &prefix)
132        } else {
133            scan_naive(data, &self.tokens)
134        }
135    }
136
137    /// Finds the first offset in `data` where this pattern matches.
138    ///
139    /// Returns `None` if no match is found.
140    pub fn scan_first(&self, data: &[u8]) -> Option<usize> {
141        if data.len() < self.tokens.len() {
142            return None;
143        }
144
145        let prefix = exact_prefix(&self.tokens);
146
147        if prefix.len() >= 2 {
148            scan_first_prefix_filtered(data, &self.tokens, &prefix)
149        } else {
150            scan_first_naive(data, &self.tokens)
151        }
152    }
153}
154
155fn exact_prefix(tokens: &[Token]) -> Vec<u8> {
156    tokens
157        .iter()
158        .take_while(|t| matches!(t, Token::Exact(_)))
159        .map(|t| match t {
160            Token::Exact(b) => *b,
161            _ => unreachable!(),
162        })
163        .collect()
164}
165
166fn matches_at(data: &[u8], offset: usize, tokens: &[Token], skip: usize) -> bool {
167    if offset + tokens.len() > data.len() {
168        return false;
169    }
170    tokens[skip..].iter().enumerate().all(|(i, tok)| match tok {
171        Token::Wildcard => true,
172        Token::Exact(b) => data[offset + skip + i] == *b,
173    })
174}
175
176fn scan_naive(data: &[u8], tokens: &[Token]) -> Vec<usize> {
177    let end = data.len() - tokens.len() + 1;
178    (0..end)
179        .filter(|&i| matches_at(data, i, tokens, 0))
180        .collect()
181}
182
183fn scan_first_naive(data: &[u8], tokens: &[Token]) -> Option<usize> {
184    let end = data.len() - tokens.len() + 1;
185    (0..end).find(|&i| matches_at(data, i, tokens, 0))
186}
187
188fn scan_prefix_filtered(data: &[u8], tokens: &[Token], prefix: &[u8]) -> Vec<usize> {
189    let end = data.len() - tokens.len() + 1;
190    let first = prefix[0];
191    let skip = prefix.len();
192    let mut results = Vec::new();
193
194    let mut i = 0;
195    while i < end {
196        if let Some(pos) = memchr_single(first, &data[i..end]) {
197            let abs = i + pos;
198            if data[abs..].starts_with(prefix) && matches_at(data, abs, tokens, skip) {
199                results.push(abs);
200            }
201            i = abs + 1;
202        } else {
203            break;
204        }
205    }
206
207    results
208}
209
210fn scan_first_prefix_filtered(data: &[u8], tokens: &[Token], prefix: &[u8]) -> Option<usize> {
211    let end = data.len() - tokens.len() + 1;
212    let first = prefix[0];
213    let skip = prefix.len();
214
215    let mut i = 0;
216    while i < end {
217        if let Some(pos) = memchr_single(first, &data[i..end]) {
218            let abs = i + pos;
219            if data[abs..].starts_with(prefix) && matches_at(data, abs, tokens, skip) {
220                return Some(abs);
221            }
222            i = abs + 1;
223        } else {
224            break;
225        }
226    }
227
228    None
229}
230
231fn memchr_single(needle: u8, haystack: &[u8]) -> Option<usize> {
232    haystack.iter().position(|&b| b == needle)
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238
239    #[test]
240    fn ida_basic() {
241        let p = Pattern::from_ida("48 8B 05").unwrap();
242        assert_eq!(p.len(), 3);
243        assert_eq!(
244            p.tokens(),
245            &[Token::Exact(0x48), Token::Exact(0x8B), Token::Exact(0x05)]
246        );
247    }
248
249    #[test]
250    fn ida_wildcards() {
251        let p = Pattern::from_ida("48 ? ?? 89").unwrap();
252        assert_eq!(p.len(), 4);
253        assert_eq!(p.tokens()[1], Token::Wildcard);
254        assert_eq!(p.tokens()[2], Token::Wildcard);
255    }
256
257    #[test]
258    fn ida_invalid_hex() {
259        assert!(Pattern::from_ida("ZZ").is_err());
260    }
261
262    #[test]
263    fn ida_invalid_length() {
264        assert!(Pattern::from_ida("ABC").is_err());
265    }
266
267    #[test]
268    fn ida_empty() {
269        assert!(Pattern::from_ida("").is_err());
270    }
271
272    #[test]
273    fn code_basic() {
274        let p = Pattern::from_code(b"\x48\x8B\x00\x89", "xx?x").unwrap();
275        assert_eq!(p.len(), 4);
276        assert_eq!(p.tokens()[2], Token::Wildcard);
277        assert_eq!(p.tokens()[3], Token::Exact(0x89));
278    }
279
280    #[test]
281    fn code_length_mismatch() {
282        assert!(Pattern::from_code(b"\x48\x8B", "x").is_err());
283    }
284
285    #[test]
286    fn code_invalid_mask() {
287        assert!(Pattern::from_code(b"\x48", "z").is_err());
288    }
289
290    #[test]
291    fn code_empty() {
292        assert!(Pattern::from_code(b"", "").is_err());
293    }
294
295    #[test]
296    fn scan_exact_match() {
297        let data = b"\x00\x48\x8B\x05\x00\x00";
298        let p = Pattern::from_ida("48 8B 05").unwrap();
299        assert_eq!(p.scan(data), vec![1]);
300    }
301
302    #[test]
303    fn scan_with_wildcards() {
304        let data = b"\x48\x8B\xFF\x89\x00\x48\x8B\xAA\x89\x00";
305        let p = Pattern::from_ida("48 8B ? 89").unwrap();
306        assert_eq!(p.scan(data), vec![0, 5]);
307    }
308
309    #[test]
310    fn scan_no_match() {
311        let data = b"\x00\x00\x00\x00";
312        let p = Pattern::from_ida("FF FF").unwrap();
313        assert!(p.scan(data).is_empty());
314    }
315
316    #[test]
317    fn scan_data_shorter_than_pattern() {
318        let data = b"\x48";
319        let p = Pattern::from_ida("48 8B 05").unwrap();
320        assert!(p.scan(data).is_empty());
321    }
322
323    #[test]
324    fn scan_data_equals_pattern_length() {
325        let data = b"\x48\x8B\x05";
326        let p = Pattern::from_ida("48 8B 05").unwrap();
327        assert_eq!(p.scan(data), vec![0]);
328    }
329
330    #[test]
331    fn scan_first_found() {
332        let data = b"\x00\x48\x8B\x05\x00\x48\x8B\x05";
333        let p = Pattern::from_ida("48 8B 05").unwrap();
334        assert_eq!(p.scan_first(data), Some(1));
335    }
336
337    #[test]
338    fn scan_first_not_found() {
339        let data = b"\x00\x00\x00";
340        let p = Pattern::from_ida("FF").unwrap();
341        assert_eq!(p.scan_first(data), None);
342    }
343
344    #[test]
345    fn scan_overlapping() {
346        let data = b"\xAA\xAA\xAA";
347        let p = Pattern::from_ida("AA AA").unwrap();
348        assert_eq!(p.scan(data), vec![0, 1]);
349    }
350
351    #[test]
352    fn scan_all_wildcards() {
353        let data = b"\x00\x01\x02\x03";
354        let p = Pattern::from_ida("? ?").unwrap();
355        assert_eq!(p.scan(data), vec![0, 1, 2]);
356    }
357
358    #[test]
359    fn scan_single_byte_pattern() {
360        let data = b"\x00\x90\x00\x90";
361        let p = Pattern::from_ida("90").unwrap();
362        assert_eq!(p.scan(data), vec![1, 3]);
363    }
364
365    #[test]
366    fn scan_at_end_of_data() {
367        let data = b"\x00\x00\x48\x8B";
368        let p = Pattern::from_ida("48 8B").unwrap();
369        assert_eq!(p.scan(data), vec![2]);
370    }
371
372    #[test]
373    fn scan_empty_data() {
374        let data: &[u8] = &[];
375        let p = Pattern::from_ida("48").unwrap();
376        assert!(p.scan(data).is_empty());
377        assert_eq!(p.scan_first(data), None);
378    }
379
380    #[test]
381    fn scan_long_prefix_uses_fast_path() {
382        // pattern with 4-byte exact prefix should hit the simd-filtered path
383        let mut data = vec![0u8; 4096];
384        data[2000] = 0x48;
385        data[2001] = 0x8B;
386        data[2002] = 0x05;
387        data[2003] = 0x10;
388        data[2004] = 0xFF; // wildcard position
389
390        let p = Pattern::from_ida("48 8B 05 10 ?").unwrap();
391        assert_eq!(p.scan(&data), vec![2000]);
392    }
393
394    #[test]
395    fn code_style_scan() {
396        let data = b"\x00\x55\x48\x89\xE5\x00";
397        let p = Pattern::from_code(b"\x55\x48\x00\xE5", "xx?x").unwrap();
398        assert_eq!(p.scan(data), vec![1]);
399    }
400
401    #[test]
402    fn from_tokens_works() {
403        let p = Pattern::from_tokens(vec![
404            Token::Exact(0x90),
405            Token::Wildcard,
406            Token::Exact(0xCC),
407        ])
408        .unwrap();
409        let data = b"\x90\x00\xCC\x90\xFF\xCC";
410        assert_eq!(p.scan(data), vec![0, 3]);
411    }
412
413    #[test]
414    fn from_tokens_empty() {
415        assert!(Pattern::from_tokens(vec![]).is_err());
416    }
417
418    #[test]
419    fn ida_lowercase_hex() {
420        let p = Pattern::from_ida("4a 8b ff").unwrap();
421        assert_eq!(
422            p.tokens(),
423            &[Token::Exact(0x4A), Token::Exact(0x8B), Token::Exact(0xFF)]
424        );
425    }
426
427    #[test]
428    fn ida_mixed_case_hex() {
429        let p = Pattern::from_ida("4A 8b Ff").unwrap();
430        assert_eq!(
431            p.tokens(),
432            &[Token::Exact(0x4A), Token::Exact(0x8B), Token::Exact(0xFF)]
433        );
434    }
435
436    #[test]
437    fn scan_first_exact_length_match() {
438        let data = b"\x48\x8B\x05";
439        let p = Pattern::from_ida("48 8B 05").unwrap();
440        assert_eq!(p.scan_first(data), Some(0));
441    }
442
443    #[test]
444    fn scan_first_exact_length_no_match() {
445        let data = b"\x48\x8B\x06";
446        let p = Pattern::from_ida("48 8B 05").unwrap();
447        assert_eq!(p.scan_first(data), None);
448    }
449
450    #[test]
451    fn scan_wildcard_leading() {
452        let data = b"\x00\x48\x8B\x00\x49\x8B";
453        let p = Pattern::from_ida("? 8B").unwrap();
454        assert_eq!(p.scan(data), vec![1, 4]);
455    }
456
457    #[test]
458    fn scan_prefix_multiple_first_byte_one_full_match() {
459        let data = b"\x48\x00\x00\x48\x8B\x05\x48\x00\x00";
460        let p = Pattern::from_ida("48 8B 05").unwrap();
461        assert_eq!(p.scan(data), vec![3]);
462    }
463
464    #[test]
465    fn scan_prefix_all_candidates_match() {
466        let data = b"\x48\x8B\x05\x00\x48\x8B\x05\x00";
467        let p = Pattern::from_ida("48 8B 05").unwrap();
468        assert_eq!(p.scan(data), vec![0, 4]);
469    }
470
471    #[test]
472    fn ida_extra_whitespace() {
473        let p = Pattern::from_ida("  48   8B   05  ").unwrap();
474        assert_eq!(p.len(), 3);
475        assert_eq!(
476            p.tokens(),
477            &[Token::Exact(0x48), Token::Exact(0x8B), Token::Exact(0x05)]
478        );
479    }
480}