1use crate::error::{Error, Result};
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum Token {
6 Exact(u8),
8 Wildcard,
10}
11
12#[derive(Debug, Clone)]
17pub struct Pattern {
18 tokens: Vec<Token>,
19}
20
21impl Pattern {
22 pub fn from_ida(signature: &str) -> Result<Self> {
33 let tokens = signature
34 .split_whitespace()
35 .map(|tok| match tok {
36 "?" | "??" => Ok(Token::Wildcard),
37 hex => {
38 if hex.len() != 2 {
39 return Err(Error::InvalidPattern(format!(
40 "expected 2-character hex token, got '{hex}'"
41 )));
42 }
43 u8::from_str_radix(hex, 16)
44 .map(Token::Exact)
45 .map_err(|_| Error::InvalidPattern(format!("invalid hex byte '{hex}'")))
46 }
47 })
48 .collect::<Result<Vec<_>>>()?;
49
50 if tokens.is_empty() {
51 return Err(Error::InvalidPattern("pattern is empty".into()));
52 }
53
54 Ok(Self { tokens })
55 }
56
57 pub fn from_code(bytes: &[u8], mask: &str) -> Result<Self> {
68 if bytes.len() != mask.len() {
69 return Err(Error::InvalidPattern(format!(
70 "bytes length ({}) does not match mask length ({})",
71 bytes.len(),
72 mask.len()
73 )));
74 }
75
76 if bytes.is_empty() {
77 return Err(Error::InvalidPattern("pattern is empty".into()));
78 }
79
80 let tokens = bytes
81 .iter()
82 .zip(mask.chars())
83 .map(|(&byte, m)| match m {
84 'x' => Ok(Token::Exact(byte)),
85 '?' => Ok(Token::Wildcard),
86 other => Err(Error::InvalidPattern(format!(
87 "invalid mask character '{other}', expected 'x' or '?'"
88 ))),
89 })
90 .collect::<Result<Vec<_>>>()?;
91
92 Ok(Self { tokens })
93 }
94
95 pub fn from_tokens(tokens: Vec<Token>) -> Result<Self> {
99 if tokens.is_empty() {
100 return Err(Error::InvalidPattern("pattern is empty".into()));
101 }
102 Ok(Self { tokens })
103 }
104
105 pub fn tokens(&self) -> &[Token] {
107 &self.tokens
108 }
109
110 pub fn len(&self) -> usize {
112 self.tokens.len()
113 }
114
115 pub fn is_empty(&self) -> bool {
117 self.tokens.is_empty()
118 }
119
120 pub fn scan(&self, data: &[u8]) -> Vec<usize> {
124 if data.len() < self.tokens.len() {
125 return Vec::new();
126 }
127
128 let prefix = exact_prefix(&self.tokens);
129
130 if prefix.len() >= 2 {
131 scan_simd_filtered(data, &self.tokens, &prefix)
132 } else {
133 scan_naive(data, &self.tokens)
134 }
135 }
136
137 pub fn scan_first(&self, data: &[u8]) -> Option<usize> {
141 if data.len() < self.tokens.len() {
142 return None;
143 }
144
145 let prefix = exact_prefix(&self.tokens);
146
147 if prefix.len() >= 2 {
148 scan_first_simd_filtered(data, &self.tokens, &prefix)
149 } else {
150 scan_first_naive(data, &self.tokens)
151 }
152 }
153}
154
155fn exact_prefix(tokens: &[Token]) -> Vec<u8> {
156 tokens
157 .iter()
158 .take_while(|t| matches!(t, Token::Exact(_)))
159 .map(|t| match t {
160 Token::Exact(b) => *b,
161 _ => unreachable!(),
162 })
163 .collect()
164}
165
166fn matches_at(data: &[u8], offset: usize, tokens: &[Token]) -> bool {
167 if offset + tokens.len() > data.len() {
168 return false;
169 }
170 tokens.iter().enumerate().all(|(i, tok)| match tok {
171 Token::Wildcard => true,
172 Token::Exact(b) => data[offset + i] == *b,
173 })
174}
175
176fn scan_naive(data: &[u8], tokens: &[Token]) -> Vec<usize> {
177 let end = data.len() - tokens.len() + 1;
178 (0..end).filter(|&i| matches_at(data, i, tokens)).collect()
179}
180
181fn scan_first_naive(data: &[u8], tokens: &[Token]) -> Option<usize> {
182 let end = data.len() - tokens.len() + 1;
183 (0..end).find(|&i| matches_at(data, i, tokens))
184}
185
186fn scan_simd_filtered(data: &[u8], tokens: &[Token], prefix: &[u8]) -> Vec<usize> {
189 let end = data.len() - tokens.len() + 1;
190 let first = prefix[0];
191 let mut results = Vec::new();
192
193 let mut i = 0;
194 while i < end {
195 if let Some(pos) = memchr_naive(first, &data[i..end]) {
196 let abs = i + pos;
197 if data[abs..].starts_with(prefix) && matches_at(data, abs, tokens) {
198 results.push(abs);
199 }
200 i = abs + 1;
201 } else {
202 break;
203 }
204 }
205
206 results
207}
208
209fn scan_first_simd_filtered(data: &[u8], tokens: &[Token], prefix: &[u8]) -> Option<usize> {
210 let end = data.len() - tokens.len() + 1;
211 let first = prefix[0];
212
213 let mut i = 0;
214 while i < end {
215 if let Some(pos) = memchr_naive(first, &data[i..end]) {
216 let abs = i + pos;
217 if data[abs..].starts_with(prefix) && matches_at(data, abs, tokens) {
218 return Some(abs);
219 }
220 i = abs + 1;
221 } else {
222 break;
223 }
224 }
225
226 None
227}
228
229fn memchr_naive(needle: u8, haystack: &[u8]) -> Option<usize> {
231 haystack.iter().position(|&b| b == needle)
232}
233
234#[cfg(test)]
235mod tests {
236 use super::*;
237
238 #[test]
239 fn ida_basic() {
240 let p = Pattern::from_ida("48 8B 05").unwrap();
241 assert_eq!(p.len(), 3);
242 assert_eq!(
243 p.tokens(),
244 &[Token::Exact(0x48), Token::Exact(0x8B), Token::Exact(0x05)]
245 );
246 }
247
248 #[test]
249 fn ida_wildcards() {
250 let p = Pattern::from_ida("48 ? ?? 89").unwrap();
251 assert_eq!(p.len(), 4);
252 assert_eq!(p.tokens()[1], Token::Wildcard);
253 assert_eq!(p.tokens()[2], Token::Wildcard);
254 }
255
256 #[test]
257 fn ida_invalid_hex() {
258 assert!(Pattern::from_ida("ZZ").is_err());
259 }
260
261 #[test]
262 fn ida_invalid_length() {
263 assert!(Pattern::from_ida("ABC").is_err());
264 }
265
266 #[test]
267 fn ida_empty() {
268 assert!(Pattern::from_ida("").is_err());
269 }
270
271 #[test]
272 fn code_basic() {
273 let p = Pattern::from_code(b"\x48\x8B\x00\x89", "xx?x").unwrap();
274 assert_eq!(p.len(), 4);
275 assert_eq!(p.tokens()[2], Token::Wildcard);
276 assert_eq!(p.tokens()[3], Token::Exact(0x89));
277 }
278
279 #[test]
280 fn code_length_mismatch() {
281 assert!(Pattern::from_code(b"\x48\x8B", "x").is_err());
282 }
283
284 #[test]
285 fn code_invalid_mask() {
286 assert!(Pattern::from_code(b"\x48", "z").is_err());
287 }
288
289 #[test]
290 fn code_empty() {
291 assert!(Pattern::from_code(b"", "").is_err());
292 }
293
294 #[test]
295 fn scan_exact_match() {
296 let data = b"\x00\x48\x8B\x05\x00\x00";
297 let p = Pattern::from_ida("48 8B 05").unwrap();
298 assert_eq!(p.scan(data), vec![1]);
299 }
300
301 #[test]
302 fn scan_with_wildcards() {
303 let data = b"\x48\x8B\xFF\x89\x00\x48\x8B\xAA\x89\x00";
304 let p = Pattern::from_ida("48 8B ? 89").unwrap();
305 assert_eq!(p.scan(data), vec![0, 5]);
306 }
307
308 #[test]
309 fn scan_no_match() {
310 let data = b"\x00\x00\x00\x00";
311 let p = Pattern::from_ida("FF FF").unwrap();
312 assert!(p.scan(data).is_empty());
313 }
314
315 #[test]
316 fn scan_data_shorter_than_pattern() {
317 let data = b"\x48";
318 let p = Pattern::from_ida("48 8B 05").unwrap();
319 assert!(p.scan(data).is_empty());
320 }
321
322 #[test]
323 fn scan_data_equals_pattern_length() {
324 let data = b"\x48\x8B\x05";
325 let p = Pattern::from_ida("48 8B 05").unwrap();
326 assert_eq!(p.scan(data), vec![0]);
327 }
328
329 #[test]
330 fn scan_first_found() {
331 let data = b"\x00\x48\x8B\x05\x00\x48\x8B\x05";
332 let p = Pattern::from_ida("48 8B 05").unwrap();
333 assert_eq!(p.scan_first(data), Some(1));
334 }
335
336 #[test]
337 fn scan_first_not_found() {
338 let data = b"\x00\x00\x00";
339 let p = Pattern::from_ida("FF").unwrap();
340 assert_eq!(p.scan_first(data), None);
341 }
342
343 #[test]
344 fn scan_overlapping() {
345 let data = b"\xAA\xAA\xAA";
346 let p = Pattern::from_ida("AA AA").unwrap();
347 assert_eq!(p.scan(data), vec![0, 1]);
348 }
349
350 #[test]
351 fn scan_all_wildcards() {
352 let data = b"\x00\x01\x02\x03";
353 let p = Pattern::from_ida("? ?").unwrap();
354 assert_eq!(p.scan(data), vec![0, 1, 2]);
355 }
356
357 #[test]
358 fn scan_single_byte_pattern() {
359 let data = b"\x00\x90\x00\x90";
360 let p = Pattern::from_ida("90").unwrap();
361 assert_eq!(p.scan(data), vec![1, 3]);
362 }
363
364 #[test]
365 fn scan_at_end_of_data() {
366 let data = b"\x00\x00\x48\x8B";
367 let p = Pattern::from_ida("48 8B").unwrap();
368 assert_eq!(p.scan(data), vec![2]);
369 }
370
371 #[test]
372 fn scan_empty_data() {
373 let data: &[u8] = &[];
374 let p = Pattern::from_ida("48").unwrap();
375 assert!(p.scan(data).is_empty());
376 assert_eq!(p.scan_first(data), None);
377 }
378
379 #[test]
380 fn scan_long_prefix_uses_fast_path() {
381 let mut data = vec![0u8; 4096];
383 data[2000] = 0x48;
384 data[2001] = 0x8B;
385 data[2002] = 0x05;
386 data[2003] = 0x10;
387 data[2004] = 0xFF; let p = Pattern::from_ida("48 8B 05 10 ?").unwrap();
390 assert_eq!(p.scan(&data), vec![2000]);
391 }
392
393 #[test]
394 fn code_style_scan() {
395 let data = b"\x00\x55\x48\x89\xE5\x00";
396 let p = Pattern::from_code(b"\x55\x48\x00\xE5", "xx?x").unwrap();
397 assert_eq!(p.scan(data), vec![1]);
398 }
399
400 #[test]
401 fn from_tokens_works() {
402 let p = Pattern::from_tokens(vec![
403 Token::Exact(0x90),
404 Token::Wildcard,
405 Token::Exact(0xCC),
406 ])
407 .unwrap();
408 let data = b"\x90\x00\xCC\x90\xFF\xCC";
409 assert_eq!(p.scan(data), vec![0, 3]);
410 }
411
412 #[test]
413 fn from_tokens_empty() {
414 assert!(Pattern::from_tokens(vec![]).is_err());
415 }
416}