1use core::ops::{
2 Bound,
3 Range,
4 RangeBounds,
5};
6
7use crate::{
8 Atom,
9 BinaryPattern,
10 HeapStack,
11 JumpType,
12 MatchTarget,
13 ReadWidth,
14 Stack,
15 StaticStack,
16};
17
18enum MatchHint {
20 Unsupported,
22
23 NoMatches,
25
26 MaybeMatch(usize),
28}
29
30pub struct BinaryMatcher<
34 'a,
35 S: Stack<u32> = StaticStack<0x10, u32>,
36 C: Stack<usize> = StaticStack<0x10, usize>,
37> {
38 pattern_atoms: &'a [Atom],
39 pattern_byte_sequence: &'a [u8],
40
41 target: &'a dyn MatchTarget,
42
43 match_offset: usize,
44
45 save_stack: S,
46 cursor_stack: C,
47}
48
49impl<'a> BinaryMatcher<'a> {
50 pub fn new(pattern: &'a dyn BinaryPattern, target: &'a dyn MatchTarget) -> Self {
53 Self::new_with_stack(
54 pattern,
55 target,
56 StaticStack::<0x10, u32>::new(),
57 StaticStack::<0x10, usize>::new(),
58 )
59 }
60
61 pub fn new_with_heap_stack(
63 pattern: &'a dyn BinaryPattern,
64 target: &'a dyn MatchTarget,
65 ) -> BinaryMatcher<'a, HeapStack<u32>, HeapStack<usize>> {
66 BinaryMatcher::new_with_stack(
67 pattern,
68 target,
69 HeapStack::<u32>::new(),
70 HeapStack::<usize>::new(),
71 )
72 }
73}
74
75impl<'a, S: Stack<u32>, C: Stack<usize>> BinaryMatcher<'a, S, C> {
76 pub fn new_with_stack(
78 pattern: &'a dyn BinaryPattern,
79 target: &'a dyn MatchTarget,
80 mut save_stack: S,
81 cursor_stack: C,
82 ) -> Self {
83 save_stack.truncate(0);
84 save_stack.push_value(0x00);
85
86 Self {
87 pattern_atoms: pattern.atoms(),
88 pattern_byte_sequence: pattern.byte_sequence(),
89
90 target,
91
92 save_stack,
93 cursor_stack,
94
95 match_offset: 0,
96 }
97 }
98
99 fn match_atoms(&mut self, mut data_cursor: usize, atoms: &[Atom]) -> Option<usize> {
102 let mut atom_cursor = 0;
103 while atom_cursor < atoms.len() {
104 match atoms[atom_cursor] {
105 Atom::ByteSequence { seq_start, seq_end } => {
106 let expected_bytes =
107 &self.pattern_byte_sequence[seq_start as usize..seq_end as usize];
108 let actual_bytes = self.target.subrange(data_cursor, expected_bytes.len())?;
109
110 if expected_bytes
111 .iter()
112 .zip(actual_bytes.iter())
113 .any(|(expected, data)| *expected != *data)
114 {
115 return None;
116 }
117
118 atom_cursor += 1;
119 data_cursor += expected_bytes.len();
120 }
121 Atom::ByteSequenceMasked {
122 seq_start,
123 mask_start,
124 len,
125 } => {
126 let target_bytes = &self.pattern_byte_sequence
127 [seq_start as usize..seq_start as usize + len as usize];
128
129 let target_mask = &self.pattern_byte_sequence
130 [mask_start as usize..mask_start as usize + len as usize];
131
132 let actual_bytes = self.target.subrange(data_cursor, target_mask.len())?;
133
134 let target_bytes = target_bytes
135 .iter()
136 .zip(target_mask)
137 .map(|(value, mask)| *value & *mask);
138 let actual_bytes = actual_bytes
139 .iter()
140 .zip(target_mask)
141 .map(|(value, mask)| *value & *mask);
142
143 if target_bytes
144 .zip(actual_bytes)
145 .any(|(expected, data)| expected != data)
146 {
147 return None;
148 }
149
150 atom_cursor += 1;
151 data_cursor += len as usize;
152 }
153 Atom::WildcardFixed(length) => {
154 atom_cursor += 1;
155 data_cursor += length as usize;
156 }
157 Atom::WildcardRange { min, max } => {
158 let save_stack_size = self.save_stack.len();
159 let cursor_stack_size = self.cursor_stack.len();
160
161 for offset in min..=max {
162 self.save_stack.truncate(save_stack_size);
163 self.cursor_stack.truncate(cursor_stack_size);
164 if let Some(data_cursor) = self
165 .match_atoms(data_cursor + offset as usize, &atoms[atom_cursor + 1..])
166 {
167 return Some(data_cursor);
169 }
170 }
171
172 return None;
173 }
174
175 Atom::CursorPush => {
176 if !self.cursor_stack.push_value(data_cursor) {
177 return None;
179 }
180
181 atom_cursor += 1;
182 }
183 Atom::CursorPop { advance } => {
184 let Some(value) = self.cursor_stack.pop_value() else {
185 return None;
187 };
188
189 data_cursor = value + advance as usize;
190 atom_cursor += 1;
191 }
192
193 Atom::Branch {
194 left_len,
195 right_len,
196 } => {
197 let left_len = left_len as usize;
198 let right_len = right_len as usize;
199
200 let save_stack_size = self.save_stack.len();
201 let cursor_stack_size = self.cursor_stack.len();
202
203 let remaining_atoms = &atoms[atom_cursor + 1 + left_len + right_len..];
204 if let Some(data_cursor) = {
205 self.match_atoms(
206 data_cursor,
207 &atoms[atom_cursor + 1..atom_cursor + 1 + left_len],
208 )
209 } {
210 if let Some(data_cursor) = self.match_atoms(data_cursor, remaining_atoms) {
212 return Some(data_cursor);
213 }
214 }
215
216 self.save_stack.truncate(save_stack_size);
217 self.cursor_stack.truncate(cursor_stack_size);
218 if let Some(data_cursor) = {
219 self.match_atoms(
220 data_cursor,
221 &atoms[atom_cursor + left_len + 1
222 ..atom_cursor + left_len + right_len + 1],
223 )
224 } {
225 if let Some(data_cursor) = self.match_atoms(data_cursor, remaining_atoms) {
227 return Some(data_cursor);
228 }
229 }
230
231 return None;
232 }
233
234 Atom::Jump(mode) => {
235 data_cursor = match mode {
236 JumpType::RelByte => {
237 let value = self.target.subrange(data_cursor, 1)?;
238 (data_cursor + 1).wrapping_add_signed(value[0] as i8 as isize)
239 }
240 JumpType::RelDWord => {
241 let value = self.target.subrange(data_cursor, 4)?;
242 let value = i32::from_le_bytes(value.try_into().unwrap());
243 (data_cursor + 4).wrapping_add_signed(value as isize)
244 }
245 JumpType::AbsQWord => {
246 let value = self.target.subrange(data_cursor, 8)?;
247 let value = u64::from_le_bytes(value.try_into().unwrap());
248 self.target.translate_absolute_address(value)?
249 }
250 };
251 atom_cursor += 1;
252 }
253
254 Atom::Read(width) => {
255 let (value, width) = match width {
256 ReadWidth::Byte => {
257 let value = self.target.subrange(data_cursor, 1)?;
258 (value[0] as u32, 1)
259 }
260 ReadWidth::Word => {
261 let value = self.target.subrange(data_cursor, 2)?;
262 (u16::from_le_bytes(value.try_into().unwrap()) as u32, 2)
263 }
264 ReadWidth::DWord => {
265 let value = self.target.subrange(data_cursor, 4)?;
266 (u32::from_le_bytes(value.try_into().unwrap()), 4)
267 }
268 };
269 if !self.save_stack.push_value(value) {
270 return None;
272 }
273
274 atom_cursor += 1;
275 data_cursor += width;
276 }
277
278 Atom::SaveCursor => {
279 if !self.save_stack.push_value(data_cursor as u32) {
280 return None;
282 }
283 atom_cursor += 1;
284 }
285 Atom::SaveConstant(value) => {
286 if !self.save_stack.push_value(value) {
287 return None;
289 }
290 atom_cursor += 1;
291 }
292 }
293 }
294
295 Some(data_cursor)
296 }
297
298 fn next_match_hint(&self, range: Range<usize>) -> MatchHint {
301 let mut fs_buffer = [0u8; 0x10];
302 let mut fs_buffer_len = 0;
303 for atom in self.pattern_atoms {
304 match atom {
305 Atom::ByteSequence { seq_start, seq_end } => {
306 let seq_start = *seq_start as usize;
307 let seq_end = *seq_end as usize;
308
309 let copy_length = (seq_end - seq_start).min(fs_buffer.len() - fs_buffer_len);
310 fs_buffer[fs_buffer_len..fs_buffer_len + copy_length].copy_from_slice(
311 &self.pattern_byte_sequence[seq_start..seq_start + copy_length],
312 );
313 fs_buffer_len += copy_length;
314 if fs_buffer_len >= fs_buffer.len() {
315 break;
317 }
318 }
319 Atom::CursorPush => continue,
320 Atom::SaveConstant(_) => continue,
321 Atom::SaveCursor => continue,
322 Atom::Read(_) => continue,
323 _ => break,
324 }
325 }
326
327 if fs_buffer_len == 0 {
328 return MatchHint::Unsupported;
330 }
331
332 let Some(target_buffer) = self.target.subrange(range.start, range.end - range.start) else {
333 return MatchHint::Unsupported;
335 };
336
337 Self::fuzzy_search(&fs_buffer[0..fs_buffer_len], target_buffer)
338 .map_or(MatchHint::NoMatches, |offset| {
339 MatchHint::MaybeMatch(range.start + offset)
340 })
341 }
342
343 fn fuzzy_search(needle: &[u8], haystack: &[u8]) -> Option<usize> {
344 for offset in 0..(haystack.len() - needle.len()) {
345 let is_match = needle
346 .iter()
347 .zip(&haystack[offset..offset + needle.len()])
348 .all(|(a, b)| *a == *b);
349
350 if is_match {
351 return Some(offset);
352 }
353 }
354
355 None
356 }
357
358 pub fn next_match(&mut self) -> Option<&[u32]> {
366 self.next_match_within(..)
367 }
368
369 pub fn next_match_within<R: RangeBounds<usize>>(&mut self, range: R) -> Option<&[u32]> {
372 let range_start = match range.start_bound() {
373 Bound::Excluded(value) => *value + 1,
374 Bound::Included(value) => *value,
375 Bound::Unbounded => 0,
376 };
377
378 let range_end = match range.end_bound() {
379 Bound::Excluded(value) => *value,
380 Bound::Included(value) => *value + 1,
381 Bound::Unbounded => self.target.match_length(),
382 };
383 if range_start >= range_end {
384 return None;
386 }
387
388 let mut match_offset = self.match_offset.clamp(range_start, range_end);
389 while match_offset < range_end {
390 match self.next_match_hint(match_offset..range_end) {
391 MatchHint::Unsupported => {
392 return self.next_match_within_loop(match_offset..range_end);
394 }
395 MatchHint::NoMatches => {
396 return None;
398 }
399 MatchHint::MaybeMatch(hint_offset) => {
400 self.save_stack.truncate(1);
402 self.cursor_stack.truncate(0);
403
404 if self.match_atoms(hint_offset, self.pattern_atoms).is_some() {
405 self.match_offset = hint_offset + 1;
406
407 let save_stack = self.save_stack.stack_mut();
408 save_stack[0] = hint_offset as u32;
409 return Some(save_stack);
410 }
411
412 match_offset = hint_offset + 1;
413 }
414 }
415 }
416
417 self.match_offset = range_end;
418 None
419 }
420
421 fn next_match_within_loop(&mut self, range: Range<usize>) -> Option<&[u32]> {
422 for match_offset in range.clone() {
423 self.save_stack.truncate(1);
424 self.cursor_stack.truncate(0);
425
426 if self.match_atoms(match_offset, self.pattern_atoms).is_none() {
427 continue;
428 }
429
430 self.match_offset = match_offset + 1;
431
432 let save_stack = self.save_stack.stack_mut();
433 save_stack[0] = match_offset as u32;
434 return Some(save_stack);
435 }
436
437 self.match_offset = range.end;
438 None
439 }
440}
441
442#[cfg(test)]
443mod test {
444 use super::BinaryMatcher;
445 use crate::{
446 compiler::parse_pattern,
447 BinaryPattern,
448 };
449
450 const DATA: &[u8] = &[
451 0xCA, 0x70, 0x11, 0xB5, 0xA, 0x9D, 0x91, 0x83, 0xC4, 0x5A, 0xFC, 0xC7, 0x31, 0x26, 0xC3,
452 0x48, 0x3D, 0x6C, 0x16, 0xD7, 0x15, 0x91, 0xDB, 0xC4, 0x21, 0x2, 0x31, 0x4D, 0xE9, 0xD5,
453 0x52, 0xFB, 0xB7, 0x31, 0x91, 0x45, 0x35, 0xC7, 0xDA, 0xA9, 0x77, 0xFC, 0x9C, 0x3E, 0x65,
454 0x19, 0xF2, 0x5A, 0x68, 0x99, 0x21, 0xC, 0xED, 0xDC, 0x21, 0x8C, 0xA2, 0x7B, 0xBA, 0xC0,
455 0x9A, 0x94, 0x99, 0x9B, 0xB2, 0xB7, 0x69, 0x2D, 0x17, 0xA9, 0x85, 0x2C, 0xD7, 0x42, 0x43,
456 0x91, 0xF6, 0x6E, 0x34, 0xBC, 0x2F, 0xF7, 0xAE, 0xAA, 0xAE, 0xBF, 0x4, 0xE5, 0xD5, 0x9B,
457 0x13, 0x60, 0x17, 0x31, 0x87, 0xEF, 0xF1, 0x24, 0x43, 0xB4, 0x60, 0xBC, 0x9F, 0x16, 0x86,
458 0x39, 0x3D, 0x9E, 0x1, 0x68, 0x74, 0x8D, 0xD3, 0xC8, 0x6, 0x25, 0x88, 0xB0, 0x95, 0x99,
459 0xB4, 0x5D, 0xBE, 0x8B, 0xD3, 0x26, 0xCB, 0x3C,
460 ];
461
462 fn test_single(pattern: &str, data: &[u8], result: Option<&[u32]>) {
463 let pattern = parse_pattern(pattern).unwrap();
464 println!("Atoms: {:?}", pattern.atoms());
465
466 let mut matcher = BinaryMatcher::new(&pattern, &data);
467 assert_eq!(matcher.next_match(), result);
468 }
469
470 #[test]
471 fn test_simple() {
472 test_single("B7 69 2D", DATA, Some(&[0x41]));
473 test_single("B7 69 ' 2D", DATA, Some(&[0x41, 0x43]));
474 test_single("' B7 69 ' 2D", DATA, Some(&[0x41, 0x41, 0x43]));
475 test_single("B7 69 3D", DATA, None);
476 }
477
478 #[test]
479 fn test_binary_mask() {
480 test_single("B7682D & FFFEFF", DATA, Some(&[0x41]));
481 }
482
483 #[test]
484 fn test_range() {
485 test_single("B7 69 2D [0-3] 85 2C '", DATA, Some(&[0x41, 0x48]));
486 test_single("B7 69 2D [0-1] 85 2C '", DATA, None);
487 }
488
489 #[test]
490 fn test_branch() {
491 test_single("B7 (69 | 70) 2D", DATA, Some(&[0x41]));
492 test_single("B7 (70 | 69) 2D", DATA, Some(&[0x41]));
493
494 test_single("B7 (70 | ) 69 2D", DATA, Some(&[0x41]));
496 }
497
498 #[test]
499 fn test_jmp() {
500 test_single(
501 "EB % FF",
502 &[0x00, 0xEB, 0x01, 0xEE, 0xFF, 0xEE],
503 Some(&[0x01]),
504 );
505 test_single("EB % EE", &[0x00, 0xEB, 0x01, 0xEE, 0xFF, 0xEE], None);
506 test_single(
507 "EB % EE",
508 &[0x00, 0xEB, 0x00, 0xEE, 0xFF, 0xEE],
509 Some(&[0x01]),
510 );
511
512 test_single(
513 "E9 $ EE",
514 &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xFF],
515 None,
516 );
517
518 test_single(
519 "E9 $ EE '",
520 &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xEE],
521 Some(&[0x01, 0x08]),
522 );
523
524 test_single(
525 "E9 $ { EE FF } '",
526 &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xEE, 0xFF],
527 Some(&[0x01, 0x06]),
528 );
529 }
530
531 #[test]
532 fn test_branch_behaviour() {
533 #[rustfmt::skip]
534 test_single(
535 "
536 48 8B 83 r4
537 [14]
538 48 8B 88 r4
539 ([4] | [0])
540 48 89 91 F8 05 00 00
541 ",
542 &[
543 0x00,
544 0x48, 0x8B, 0x83, 0x02, 0x00, 0x00, 0x00,
545 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
546 0x48, 0x8B, 0x88, 0x03, 0x00, 0x00, 0x00,
547 0x48, 0x89, 0x91, 0xF8, 0x05, 0x00, 0x00,
548 ],
549 Some(&[0x01, 0x02, 0x03]),
550 );
551
552 #[rustfmt::skip]
553 test_single(
554 "
555 48 8B 83 r4
556 [14]
557 48 8B 88 r4
558 ([4] | [0])
559 48 89 91 F8 05 00 00
560 ",
561 &[
562 0x00,
563 0x48, 0x8B, 0x83, 0x02, 0x00, 0x00, 0x00,
564 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
565 0x48, 0x8B, 0x88, 0x03, 0x00, 0x00, 0x00,
566 0xFF, 0xFF, 0xFF, 0xFF,
567 0x48, 0x89, 0x91, 0xF8, 0x05, 0x00, 0x00,
568 ],
569 Some(&[0x01, 0x02, 0x03]),
570 );
571 }
572}