Skip to main content

procmod_scan/
pattern.rs

1use crate::error::{Error, Result};
2
3/// A single element in a scan pattern.
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum Token {
6    /// Matches a specific byte value.
7    Exact(u8),
8    /// Matches any byte value.
9    Wildcard,
10}
11
12/// A byte pattern used for scanning memory regions.
13///
14/// Patterns consist of exact byte matches and wildcard positions. They can be
15/// constructed from IDA-style signature strings or code-style byte/mask pairs.
16#[derive(Debug, Clone)]
17pub struct Pattern {
18    tokens: Vec<Token>,
19}
20
21impl Pattern {
22    /// Creates a pattern from an IDA-style signature string.
23    ///
24    /// Each token is separated by whitespace. Exact bytes are specified as
25    /// two-character hex values. Wildcards are represented by `?` or `??`.
26    ///
27    /// ```
28    /// use procmod_scan::Pattern;
29    ///
30    /// let pattern = Pattern::from_ida("48 8B ?? 89 ? 0F").unwrap();
31    /// ```
32    pub fn from_ida(signature: &str) -> Result<Self> {
33        let tokens = signature
34            .split_whitespace()
35            .map(|tok| match tok {
36                "?" | "??" => Ok(Token::Wildcard),
37                hex => {
38                    if hex.len() != 2 {
39                        return Err(Error::InvalidPattern(format!(
40                            "expected 2-character hex token, got '{hex}'"
41                        )));
42                    }
43                    u8::from_str_radix(hex, 16)
44                        .map(Token::Exact)
45                        .map_err(|_| Error::InvalidPattern(format!("invalid hex byte '{hex}'")))
46                }
47            })
48            .collect::<Result<Vec<_>>>()?;
49
50        if tokens.is_empty() {
51            return Err(Error::InvalidPattern("pattern is empty".into()));
52        }
53
54        Ok(Self { tokens })
55    }
56
57    /// Creates a pattern from a code-style byte/mask pair.
58    ///
59    /// The mask string uses `x` for exact byte matches and `?` for wildcards,
60    /// one character per byte. The mask length must equal the bytes length.
61    ///
62    /// ```
63    /// use procmod_scan::Pattern;
64    ///
65    /// let pattern = Pattern::from_code(b"\x48\x8B\x00\x89", "xx?x").unwrap();
66    /// ```
67    pub fn from_code(bytes: &[u8], mask: &str) -> Result<Self> {
68        if bytes.len() != mask.len() {
69            return Err(Error::InvalidPattern(format!(
70                "bytes length ({}) does not match mask length ({})",
71                bytes.len(),
72                mask.len()
73            )));
74        }
75
76        if bytes.is_empty() {
77            return Err(Error::InvalidPattern("pattern is empty".into()));
78        }
79
80        let tokens = bytes
81            .iter()
82            .zip(mask.chars())
83            .map(|(&byte, m)| match m {
84                'x' => Ok(Token::Exact(byte)),
85                '?' => Ok(Token::Wildcard),
86                other => Err(Error::InvalidPattern(format!(
87                    "invalid mask character '{other}', expected 'x' or '?'"
88                ))),
89            })
90            .collect::<Result<Vec<_>>>()?;
91
92        Ok(Self { tokens })
93    }
94
95    /// Creates a pattern from raw tokens.
96    ///
97    /// Returns an error if the token list is empty.
98    pub fn from_tokens(tokens: Vec<Token>) -> Result<Self> {
99        if tokens.is_empty() {
100            return Err(Error::InvalidPattern("pattern is empty".into()));
101        }
102        Ok(Self { tokens })
103    }
104
105    /// Returns the tokens in this pattern.
106    pub fn tokens(&self) -> &[Token] {
107        &self.tokens
108    }
109
110    /// Returns the number of tokens in this pattern.
111    pub fn len(&self) -> usize {
112        self.tokens.len()
113    }
114
115    /// Returns true if the pattern has no tokens.
116    pub fn is_empty(&self) -> bool {
117        self.tokens.is_empty()
118    }
119
120    /// Finds all offsets in `data` where this pattern matches.
121    ///
122    /// Returns an empty vec if no matches are found. Matches may overlap.
123    pub fn scan(&self, data: &[u8]) -> Vec<usize> {
124        if data.len() < self.tokens.len() {
125            return Vec::new();
126        }
127
128        let prefix = exact_prefix(&self.tokens);
129
130        if prefix.len() >= 2 {
131            scan_simd_filtered(data, &self.tokens, &prefix)
132        } else {
133            scan_naive(data, &self.tokens)
134        }
135    }
136
137    /// Finds the first offset in `data` where this pattern matches.
138    ///
139    /// Returns `None` if no match is found.
140    pub fn scan_first(&self, data: &[u8]) -> Option<usize> {
141        if data.len() < self.tokens.len() {
142            return None;
143        }
144
145        let prefix = exact_prefix(&self.tokens);
146
147        if prefix.len() >= 2 {
148            scan_first_simd_filtered(data, &self.tokens, &prefix)
149        } else {
150            scan_first_naive(data, &self.tokens)
151        }
152    }
153}
154
155fn exact_prefix(tokens: &[Token]) -> Vec<u8> {
156    tokens
157        .iter()
158        .take_while(|t| matches!(t, Token::Exact(_)))
159        .map(|t| match t {
160            Token::Exact(b) => *b,
161            _ => unreachable!(),
162        })
163        .collect()
164}
165
166fn matches_at(data: &[u8], offset: usize, tokens: &[Token]) -> bool {
167    if offset + tokens.len() > data.len() {
168        return false;
169    }
170    tokens.iter().enumerate().all(|(i, tok)| match tok {
171        Token::Wildcard => true,
172        Token::Exact(b) => data[offset + i] == *b,
173    })
174}
175
176fn scan_naive(data: &[u8], tokens: &[Token]) -> Vec<usize> {
177    let end = data.len() - tokens.len() + 1;
178    (0..end).filter(|&i| matches_at(data, i, tokens)).collect()
179}
180
181fn scan_first_naive(data: &[u8], tokens: &[Token]) -> Option<usize> {
182    let end = data.len() - tokens.len() + 1;
183    (0..end).find(|&i| matches_at(data, i, tokens))
184}
185
186// simd-accelerated scan: use the exact prefix as a fast first-byte filter
187// then verify the full pattern only at candidate positions
188fn scan_simd_filtered(data: &[u8], tokens: &[Token], prefix: &[u8]) -> Vec<usize> {
189    let end = data.len() - tokens.len() + 1;
190    let first = prefix[0];
191    let mut results = Vec::new();
192
193    let mut i = 0;
194    while i < end {
195        if let Some(pos) = memchr_naive(first, &data[i..end]) {
196            let abs = i + pos;
197            if data[abs..].starts_with(prefix) && matches_at(data, abs, tokens) {
198                results.push(abs);
199            }
200            i = abs + 1;
201        } else {
202            break;
203        }
204    }
205
206    results
207}
208
209fn scan_first_simd_filtered(data: &[u8], tokens: &[Token], prefix: &[u8]) -> Option<usize> {
210    let end = data.len() - tokens.len() + 1;
211    let first = prefix[0];
212
213    let mut i = 0;
214    while i < end {
215        if let Some(pos) = memchr_naive(first, &data[i..end]) {
216            let abs = i + pos;
217            if data[abs..].starts_with(prefix) && matches_at(data, abs, tokens) {
218                return Some(abs);
219            }
220            i = abs + 1;
221        } else {
222            break;
223        }
224    }
225
226    None
227}
228
229// fast single-byte search, autovectorizes well on modern compilers
230fn memchr_naive(needle: u8, haystack: &[u8]) -> Option<usize> {
231    haystack.iter().position(|&b| b == needle)
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237
238    #[test]
239    fn ida_basic() {
240        let p = Pattern::from_ida("48 8B 05").unwrap();
241        assert_eq!(p.len(), 3);
242        assert_eq!(
243            p.tokens(),
244            &[Token::Exact(0x48), Token::Exact(0x8B), Token::Exact(0x05)]
245        );
246    }
247
248    #[test]
249    fn ida_wildcards() {
250        let p = Pattern::from_ida("48 ? ?? 89").unwrap();
251        assert_eq!(p.len(), 4);
252        assert_eq!(p.tokens()[1], Token::Wildcard);
253        assert_eq!(p.tokens()[2], Token::Wildcard);
254    }
255
256    #[test]
257    fn ida_invalid_hex() {
258        assert!(Pattern::from_ida("ZZ").is_err());
259    }
260
261    #[test]
262    fn ida_invalid_length() {
263        assert!(Pattern::from_ida("ABC").is_err());
264    }
265
266    #[test]
267    fn ida_empty() {
268        assert!(Pattern::from_ida("").is_err());
269    }
270
271    #[test]
272    fn code_basic() {
273        let p = Pattern::from_code(b"\x48\x8B\x00\x89", "xx?x").unwrap();
274        assert_eq!(p.len(), 4);
275        assert_eq!(p.tokens()[2], Token::Wildcard);
276        assert_eq!(p.tokens()[3], Token::Exact(0x89));
277    }
278
279    #[test]
280    fn code_length_mismatch() {
281        assert!(Pattern::from_code(b"\x48\x8B", "x").is_err());
282    }
283
284    #[test]
285    fn code_invalid_mask() {
286        assert!(Pattern::from_code(b"\x48", "z").is_err());
287    }
288
289    #[test]
290    fn code_empty() {
291        assert!(Pattern::from_code(b"", "").is_err());
292    }
293
294    #[test]
295    fn scan_exact_match() {
296        let data = b"\x00\x48\x8B\x05\x00\x00";
297        let p = Pattern::from_ida("48 8B 05").unwrap();
298        assert_eq!(p.scan(data), vec![1]);
299    }
300
301    #[test]
302    fn scan_with_wildcards() {
303        let data = b"\x48\x8B\xFF\x89\x00\x48\x8B\xAA\x89\x00";
304        let p = Pattern::from_ida("48 8B ? 89").unwrap();
305        assert_eq!(p.scan(data), vec![0, 5]);
306    }
307
308    #[test]
309    fn scan_no_match() {
310        let data = b"\x00\x00\x00\x00";
311        let p = Pattern::from_ida("FF FF").unwrap();
312        assert!(p.scan(data).is_empty());
313    }
314
315    #[test]
316    fn scan_data_shorter_than_pattern() {
317        let data = b"\x48";
318        let p = Pattern::from_ida("48 8B 05").unwrap();
319        assert!(p.scan(data).is_empty());
320    }
321
322    #[test]
323    fn scan_data_equals_pattern_length() {
324        let data = b"\x48\x8B\x05";
325        let p = Pattern::from_ida("48 8B 05").unwrap();
326        assert_eq!(p.scan(data), vec![0]);
327    }
328
329    #[test]
330    fn scan_first_found() {
331        let data = b"\x00\x48\x8B\x05\x00\x48\x8B\x05";
332        let p = Pattern::from_ida("48 8B 05").unwrap();
333        assert_eq!(p.scan_first(data), Some(1));
334    }
335
336    #[test]
337    fn scan_first_not_found() {
338        let data = b"\x00\x00\x00";
339        let p = Pattern::from_ida("FF").unwrap();
340        assert_eq!(p.scan_first(data), None);
341    }
342
343    #[test]
344    fn scan_overlapping() {
345        let data = b"\xAA\xAA\xAA";
346        let p = Pattern::from_ida("AA AA").unwrap();
347        assert_eq!(p.scan(data), vec![0, 1]);
348    }
349
350    #[test]
351    fn scan_all_wildcards() {
352        let data = b"\x00\x01\x02\x03";
353        let p = Pattern::from_ida("? ?").unwrap();
354        assert_eq!(p.scan(data), vec![0, 1, 2]);
355    }
356
357    #[test]
358    fn scan_single_byte_pattern() {
359        let data = b"\x00\x90\x00\x90";
360        let p = Pattern::from_ida("90").unwrap();
361        assert_eq!(p.scan(data), vec![1, 3]);
362    }
363
364    #[test]
365    fn scan_at_end_of_data() {
366        let data = b"\x00\x00\x48\x8B";
367        let p = Pattern::from_ida("48 8B").unwrap();
368        assert_eq!(p.scan(data), vec![2]);
369    }
370
371    #[test]
372    fn scan_empty_data() {
373        let data: &[u8] = &[];
374        let p = Pattern::from_ida("48").unwrap();
375        assert!(p.scan(data).is_empty());
376        assert_eq!(p.scan_first(data), None);
377    }
378
379    #[test]
380    fn scan_long_prefix_uses_fast_path() {
381        // pattern with 4-byte exact prefix should hit the simd-filtered path
382        let mut data = vec![0u8; 4096];
383        data[2000] = 0x48;
384        data[2001] = 0x8B;
385        data[2002] = 0x05;
386        data[2003] = 0x10;
387        data[2004] = 0xFF; // wildcard position
388
389        let p = Pattern::from_ida("48 8B 05 10 ?").unwrap();
390        assert_eq!(p.scan(&data), vec![2000]);
391    }
392
393    #[test]
394    fn code_style_scan() {
395        let data = b"\x00\x55\x48\x89\xE5\x00";
396        let p = Pattern::from_code(b"\x55\x48\x00\xE5", "xx?x").unwrap();
397        assert_eq!(p.scan(data), vec![1]);
398    }
399
400    #[test]
401    fn from_tokens_works() {
402        let p = Pattern::from_tokens(vec![
403            Token::Exact(0x90),
404            Token::Wildcard,
405            Token::Exact(0xCC),
406        ])
407        .unwrap();
408        let data = b"\x90\x00\xCC\x90\xFF\xCC";
409        assert_eq!(p.scan(data), vec![0, 3]);
410    }
411
412    #[test]
413    fn from_tokens_empty() {
414        assert!(Pattern::from_tokens(vec![]).is_err());
415    }
416}