1use crate::error::{Error, Result};
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum Token {
6 Exact(u8),
8 Wildcard,
10}
11
12#[derive(Debug, Clone)]
17pub struct Pattern {
18 tokens: Vec<Token>,
19}
20
21impl Pattern {
22 pub fn from_ida(signature: &str) -> Result<Self> {
33 let tokens = signature
34 .split_whitespace()
35 .map(|tok| match tok {
36 "?" | "??" => Ok(Token::Wildcard),
37 hex => {
38 if hex.len() != 2 {
39 return Err(Error::InvalidPattern(format!(
40 "expected 2-character hex token, got '{hex}'"
41 )));
42 }
43 u8::from_str_radix(hex, 16)
44 .map(Token::Exact)
45 .map_err(|_| Error::InvalidPattern(format!("invalid hex byte '{hex}'")))
46 }
47 })
48 .collect::<Result<Vec<_>>>()?;
49
50 if tokens.is_empty() {
51 return Err(Error::InvalidPattern("pattern is empty".into()));
52 }
53
54 Ok(Self { tokens })
55 }
56
57 pub fn from_code(bytes: &[u8], mask: &str) -> Result<Self> {
68 if bytes.len() != mask.len() {
69 return Err(Error::InvalidPattern(format!(
70 "bytes length ({}) does not match mask length ({})",
71 bytes.len(),
72 mask.len()
73 )));
74 }
75
76 if bytes.is_empty() {
77 return Err(Error::InvalidPattern("pattern is empty".into()));
78 }
79
80 let tokens = bytes
81 .iter()
82 .zip(mask.chars())
83 .map(|(&byte, m)| match m {
84 'x' => Ok(Token::Exact(byte)),
85 '?' => Ok(Token::Wildcard),
86 other => Err(Error::InvalidPattern(format!(
87 "invalid mask character '{other}', expected 'x' or '?'"
88 ))),
89 })
90 .collect::<Result<Vec<_>>>()?;
91
92 Ok(Self { tokens })
93 }
94
95 pub fn from_tokens(tokens: Vec<Token>) -> Result<Self> {
99 if tokens.is_empty() {
100 return Err(Error::InvalidPattern("pattern is empty".into()));
101 }
102 Ok(Self { tokens })
103 }
104
105 pub fn tokens(&self) -> &[Token] {
107 &self.tokens
108 }
109
110 pub fn len(&self) -> usize {
112 self.tokens.len()
113 }
114
115 pub fn is_empty(&self) -> bool {
117 self.tokens.is_empty()
118 }
119
120 pub fn scan(&self, data: &[u8]) -> Vec<usize> {
124 if data.len() < self.tokens.len() {
125 return Vec::new();
126 }
127
128 let prefix = exact_prefix(&self.tokens);
129
130 if prefix.len() >= 2 {
131 scan_prefix_filtered(data, &self.tokens, &prefix)
132 } else {
133 scan_naive(data, &self.tokens)
134 }
135 }
136
137 pub fn scan_first(&self, data: &[u8]) -> Option<usize> {
141 if data.len() < self.tokens.len() {
142 return None;
143 }
144
145 let prefix = exact_prefix(&self.tokens);
146
147 if prefix.len() >= 2 {
148 scan_first_prefix_filtered(data, &self.tokens, &prefix)
149 } else {
150 scan_first_naive(data, &self.tokens)
151 }
152 }
153}
154
155fn exact_prefix(tokens: &[Token]) -> Vec<u8> {
156 tokens
157 .iter()
158 .take_while(|t| matches!(t, Token::Exact(_)))
159 .map(|t| match t {
160 Token::Exact(b) => *b,
161 _ => unreachable!(),
162 })
163 .collect()
164}
165
166fn matches_at(data: &[u8], offset: usize, tokens: &[Token], skip: usize) -> bool {
167 if offset + tokens.len() > data.len() {
168 return false;
169 }
170 tokens[skip..].iter().enumerate().all(|(i, tok)| match tok {
171 Token::Wildcard => true,
172 Token::Exact(b) => data[offset + skip + i] == *b,
173 })
174}
175
176fn scan_naive(data: &[u8], tokens: &[Token]) -> Vec<usize> {
177 let end = data.len() - tokens.len() + 1;
178 (0..end)
179 .filter(|&i| matches_at(data, i, tokens, 0))
180 .collect()
181}
182
183fn scan_first_naive(data: &[u8], tokens: &[Token]) -> Option<usize> {
184 let end = data.len() - tokens.len() + 1;
185 (0..end).find(|&i| matches_at(data, i, tokens, 0))
186}
187
188fn scan_prefix_filtered(data: &[u8], tokens: &[Token], prefix: &[u8]) -> Vec<usize> {
189 let end = data.len() - tokens.len() + 1;
190 let first = prefix[0];
191 let skip = prefix.len();
192 let mut results = Vec::new();
193
194 let mut i = 0;
195 while i < end {
196 if let Some(pos) = memchr_single(first, &data[i..end]) {
197 let abs = i + pos;
198 if data[abs..].starts_with(prefix) && matches_at(data, abs, tokens, skip) {
199 results.push(abs);
200 }
201 i = abs + 1;
202 } else {
203 break;
204 }
205 }
206
207 results
208}
209
210fn scan_first_prefix_filtered(data: &[u8], tokens: &[Token], prefix: &[u8]) -> Option<usize> {
211 let end = data.len() - tokens.len() + 1;
212 let first = prefix[0];
213 let skip = prefix.len();
214
215 let mut i = 0;
216 while i < end {
217 if let Some(pos) = memchr_single(first, &data[i..end]) {
218 let abs = i + pos;
219 if data[abs..].starts_with(prefix) && matches_at(data, abs, tokens, skip) {
220 return Some(abs);
221 }
222 i = abs + 1;
223 } else {
224 break;
225 }
226 }
227
228 None
229}
230
231fn memchr_single(needle: u8, haystack: &[u8]) -> Option<usize> {
232 haystack.iter().position(|&b| b == needle)
233}
234
235#[cfg(test)]
236mod tests {
237 use super::*;
238
239 #[test]
240 fn ida_basic() {
241 let p = Pattern::from_ida("48 8B 05").unwrap();
242 assert_eq!(p.len(), 3);
243 assert_eq!(
244 p.tokens(),
245 &[Token::Exact(0x48), Token::Exact(0x8B), Token::Exact(0x05)]
246 );
247 }
248
249 #[test]
250 fn ida_wildcards() {
251 let p = Pattern::from_ida("48 ? ?? 89").unwrap();
252 assert_eq!(p.len(), 4);
253 assert_eq!(p.tokens()[1], Token::Wildcard);
254 assert_eq!(p.tokens()[2], Token::Wildcard);
255 }
256
257 #[test]
258 fn ida_invalid_hex() {
259 assert!(Pattern::from_ida("ZZ").is_err());
260 }
261
262 #[test]
263 fn ida_invalid_length() {
264 assert!(Pattern::from_ida("ABC").is_err());
265 }
266
267 #[test]
268 fn ida_empty() {
269 assert!(Pattern::from_ida("").is_err());
270 }
271
272 #[test]
273 fn code_basic() {
274 let p = Pattern::from_code(b"\x48\x8B\x00\x89", "xx?x").unwrap();
275 assert_eq!(p.len(), 4);
276 assert_eq!(p.tokens()[2], Token::Wildcard);
277 assert_eq!(p.tokens()[3], Token::Exact(0x89));
278 }
279
280 #[test]
281 fn code_length_mismatch() {
282 assert!(Pattern::from_code(b"\x48\x8B", "x").is_err());
283 }
284
285 #[test]
286 fn code_invalid_mask() {
287 assert!(Pattern::from_code(b"\x48", "z").is_err());
288 }
289
290 #[test]
291 fn code_empty() {
292 assert!(Pattern::from_code(b"", "").is_err());
293 }
294
295 #[test]
296 fn scan_exact_match() {
297 let data = b"\x00\x48\x8B\x05\x00\x00";
298 let p = Pattern::from_ida("48 8B 05").unwrap();
299 assert_eq!(p.scan(data), vec![1]);
300 }
301
302 #[test]
303 fn scan_with_wildcards() {
304 let data = b"\x48\x8B\xFF\x89\x00\x48\x8B\xAA\x89\x00";
305 let p = Pattern::from_ida("48 8B ? 89").unwrap();
306 assert_eq!(p.scan(data), vec![0, 5]);
307 }
308
309 #[test]
310 fn scan_no_match() {
311 let data = b"\x00\x00\x00\x00";
312 let p = Pattern::from_ida("FF FF").unwrap();
313 assert!(p.scan(data).is_empty());
314 }
315
316 #[test]
317 fn scan_data_shorter_than_pattern() {
318 let data = b"\x48";
319 let p = Pattern::from_ida("48 8B 05").unwrap();
320 assert!(p.scan(data).is_empty());
321 }
322
323 #[test]
324 fn scan_data_equals_pattern_length() {
325 let data = b"\x48\x8B\x05";
326 let p = Pattern::from_ida("48 8B 05").unwrap();
327 assert_eq!(p.scan(data), vec![0]);
328 }
329
330 #[test]
331 fn scan_first_found() {
332 let data = b"\x00\x48\x8B\x05\x00\x48\x8B\x05";
333 let p = Pattern::from_ida("48 8B 05").unwrap();
334 assert_eq!(p.scan_first(data), Some(1));
335 }
336
337 #[test]
338 fn scan_first_not_found() {
339 let data = b"\x00\x00\x00";
340 let p = Pattern::from_ida("FF").unwrap();
341 assert_eq!(p.scan_first(data), None);
342 }
343
344 #[test]
345 fn scan_overlapping() {
346 let data = b"\xAA\xAA\xAA";
347 let p = Pattern::from_ida("AA AA").unwrap();
348 assert_eq!(p.scan(data), vec![0, 1]);
349 }
350
351 #[test]
352 fn scan_all_wildcards() {
353 let data = b"\x00\x01\x02\x03";
354 let p = Pattern::from_ida("? ?").unwrap();
355 assert_eq!(p.scan(data), vec![0, 1, 2]);
356 }
357
358 #[test]
359 fn scan_single_byte_pattern() {
360 let data = b"\x00\x90\x00\x90";
361 let p = Pattern::from_ida("90").unwrap();
362 assert_eq!(p.scan(data), vec![1, 3]);
363 }
364
365 #[test]
366 fn scan_at_end_of_data() {
367 let data = b"\x00\x00\x48\x8B";
368 let p = Pattern::from_ida("48 8B").unwrap();
369 assert_eq!(p.scan(data), vec![2]);
370 }
371
372 #[test]
373 fn scan_empty_data() {
374 let data: &[u8] = &[];
375 let p = Pattern::from_ida("48").unwrap();
376 assert!(p.scan(data).is_empty());
377 assert_eq!(p.scan_first(data), None);
378 }
379
380 #[test]
381 fn scan_long_prefix_uses_fast_path() {
382 let mut data = vec![0u8; 4096];
384 data[2000] = 0x48;
385 data[2001] = 0x8B;
386 data[2002] = 0x05;
387 data[2003] = 0x10;
388 data[2004] = 0xFF; let p = Pattern::from_ida("48 8B 05 10 ?").unwrap();
391 assert_eq!(p.scan(&data), vec![2000]);
392 }
393
394 #[test]
395 fn code_style_scan() {
396 let data = b"\x00\x55\x48\x89\xE5\x00";
397 let p = Pattern::from_code(b"\x55\x48\x00\xE5", "xx?x").unwrap();
398 assert_eq!(p.scan(data), vec![1]);
399 }
400
401 #[test]
402 fn from_tokens_works() {
403 let p = Pattern::from_tokens(vec![
404 Token::Exact(0x90),
405 Token::Wildcard,
406 Token::Exact(0xCC),
407 ])
408 .unwrap();
409 let data = b"\x90\x00\xCC\x90\xFF\xCC";
410 assert_eq!(p.scan(data), vec![0, 3]);
411 }
412
413 #[test]
414 fn from_tokens_empty() {
415 assert!(Pattern::from_tokens(vec![]).is_err());
416 }
417
418 #[test]
419 fn ida_lowercase_hex() {
420 let p = Pattern::from_ida("4a 8b ff").unwrap();
421 assert_eq!(
422 p.tokens(),
423 &[Token::Exact(0x4A), Token::Exact(0x8B), Token::Exact(0xFF)]
424 );
425 }
426
427 #[test]
428 fn ida_mixed_case_hex() {
429 let p = Pattern::from_ida("4A 8b Ff").unwrap();
430 assert_eq!(
431 p.tokens(),
432 &[Token::Exact(0x4A), Token::Exact(0x8B), Token::Exact(0xFF)]
433 );
434 }
435
436 #[test]
437 fn scan_first_exact_length_match() {
438 let data = b"\x48\x8B\x05";
439 let p = Pattern::from_ida("48 8B 05").unwrap();
440 assert_eq!(p.scan_first(data), Some(0));
441 }
442
443 #[test]
444 fn scan_first_exact_length_no_match() {
445 let data = b"\x48\x8B\x06";
446 let p = Pattern::from_ida("48 8B 05").unwrap();
447 assert_eq!(p.scan_first(data), None);
448 }
449
450 #[test]
451 fn scan_wildcard_leading() {
452 let data = b"\x00\x48\x8B\x00\x49\x8B";
453 let p = Pattern::from_ida("? 8B").unwrap();
454 assert_eq!(p.scan(data), vec![1, 4]);
455 }
456
457 #[test]
458 fn scan_prefix_multiple_first_byte_one_full_match() {
459 let data = b"\x48\x00\x00\x48\x8B\x05\x48\x00\x00";
460 let p = Pattern::from_ida("48 8B 05").unwrap();
461 assert_eq!(p.scan(data), vec![3]);
462 }
463
464 #[test]
465 fn scan_prefix_all_candidates_match() {
466 let data = b"\x48\x8B\x05\x00\x48\x8B\x05\x00";
467 let p = Pattern::from_ida("48 8B 05").unwrap();
468 assert_eq!(p.scan(data), vec![0, 4]);
469 }
470
471 #[test]
472 fn ida_extra_whitespace() {
473 let p = Pattern::from_ida(" 48 8B 05 ").unwrap();
474 assert_eq!(p.len(), 3);
475 assert_eq!(
476 p.tokens(),
477 &[Token::Exact(0x48), Token::Exact(0x8B), Token::Exact(0x05)]
478 );
479 }
480}