1use crate::{
2 Atom,
3 BinaryPattern,
4 JumpType,
5 MatchTarget,
6 ReadWidth,
7 Stack,
8 StaticStack,
9};
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
13pub enum MatchHint {
14 Unsupported,
16
17 NoMatches,
19
20 MaybeMatch(usize),
22}
23
24pub struct BinaryMatcher<'a, S: Stack<u32>, C: Stack<usize>> {
28 pattern_atoms: &'a [Atom],
29 pattern_byte_sequence: &'a [u8],
30
31 target: &'a dyn MatchTarget,
32
33 current_offset: usize,
34 match_length: usize,
35
36 save_stack: S,
37 cursor_stack: C,
38}
39
40pub fn execute<'a>(
43 target: &'a dyn MatchTarget,
44 pattern: &'a dyn BinaryPattern,
45) -> BinaryMatcher<'a, StaticStack<0x10, u32>, StaticStack<0x10, usize>> {
46 self::execute_with_stack(target, pattern, StaticStack::new(), StaticStack::new())
47}
48
49pub fn execute_with_stack<'a, S: Stack<u32>, C: Stack<usize>>(
51 target: &'a dyn MatchTarget,
52 pattern: &'a dyn BinaryPattern,
53 mut save_stack: S,
54 cursor_stack: C,
55) -> BinaryMatcher<'a, S, C> {
56 save_stack.truncate(0);
57 save_stack.push_value(0x00);
58
59 BinaryMatcher::new(pattern, target, save_stack, cursor_stack)
60}
61
62impl<'a, S: Stack<u32>, C: Stack<usize>> BinaryMatcher<'a, S, C> {
63 pub(crate) fn new(
65 pattern: &'a dyn BinaryPattern,
66 target: &'a dyn MatchTarget,
67 mut save_stack: S,
68 cursor_stack: C,
69 ) -> Self {
70 save_stack.truncate(0);
71 save_stack.push_value(0x00);
72
73 Self {
74 pattern_atoms: pattern.atoms(),
75 pattern_byte_sequence: pattern.byte_sequence(),
76
77 target,
78
79 save_stack,
80 cursor_stack,
81
82 current_offset: 0,
83 match_length: target.match_length(),
84 }
85 }
86
87 fn match_atoms(&mut self, mut data_cursor: usize, atoms: &[Atom]) -> Option<usize> {
90 let mut atom_cursor = 0;
91 while atom_cursor < atoms.len() {
92 match atoms[atom_cursor] {
93 Atom::ByteSequence { seq_start, seq_end } => {
94 let expected_bytes =
95 &self.pattern_byte_sequence[seq_start as usize..seq_end as usize];
96 let actual_bytes = self.target.subrange(data_cursor, expected_bytes.len())?;
97
98 if expected_bytes
99 .iter()
100 .zip(actual_bytes.iter())
101 .any(|(expected, data)| *expected != *data)
102 {
103 return None;
104 }
105
106 atom_cursor += 1;
107 data_cursor += expected_bytes.len();
108 }
109 Atom::ByteSequenceMasked {
110 seq_start,
111 mask_start,
112 len,
113 } => {
114 let target_bytes = &self.pattern_byte_sequence
115 [seq_start as usize..seq_start as usize + len as usize];
116
117 let target_mask = &self.pattern_byte_sequence
118 [mask_start as usize..mask_start as usize + len as usize];
119
120 let actual_bytes = self.target.subrange(data_cursor, target_mask.len())?;
121
122 let target_bytes = target_bytes
123 .iter()
124 .zip(target_mask)
125 .map(|(value, mask)| *value & *mask);
126 let actual_bytes = actual_bytes
127 .iter()
128 .zip(target_mask)
129 .map(|(value, mask)| *value & *mask);
130
131 if target_bytes
132 .zip(actual_bytes)
133 .any(|(expected, data)| expected != data)
134 {
135 return None;
136 }
137
138 atom_cursor += 1;
139 data_cursor += len as usize;
140 }
141 Atom::WildcardFixed(length) => {
142 atom_cursor += 1;
143 data_cursor += length as usize;
144 }
145 Atom::WildcardRange { min, max } => {
146 let save_stack_size = self.save_stack.len();
147 let cursor_stack_size = self.cursor_stack.len();
148
149 for offset in min..=max {
150 self.save_stack.truncate(save_stack_size);
151 self.cursor_stack.truncate(cursor_stack_size);
152 if let Some(data_cursor) = self
153 .match_atoms(data_cursor + offset as usize, &atoms[atom_cursor + 1..])
154 {
155 return Some(data_cursor);
157 }
158 }
159
160 return None;
161 }
162
163 Atom::CursorPush => {
164 if !self.cursor_stack.push_value(data_cursor) {
165 return None;
167 }
168
169 atom_cursor += 1;
170 }
171 Atom::CursorPop { advance } => {
172 let Some(value) = self.cursor_stack.pop_value() else {
173 return None;
175 };
176
177 data_cursor = value + advance as usize;
178 atom_cursor += 1;
179 }
180
181 Atom::Branch {
182 left_len,
183 right_len,
184 } => {
185 let left_len = left_len as usize;
186 let right_len = right_len as usize;
187
188 let save_stack_size = self.save_stack.len();
189 let cursor_stack_size = self.cursor_stack.len();
190
191 let remaining_atoms = &atoms[atom_cursor + 1 + left_len + right_len..];
192 if let Some(data_cursor) = {
193 self.match_atoms(
194 data_cursor,
195 &atoms[atom_cursor + 1..atom_cursor + 1 + left_len],
196 )
197 } {
198 if let Some(data_cursor) = self.match_atoms(data_cursor, remaining_atoms) {
200 return Some(data_cursor);
201 }
202 }
203
204 self.save_stack.truncate(save_stack_size);
205 self.cursor_stack.truncate(cursor_stack_size);
206 if let Some(data_cursor) = {
207 self.match_atoms(
208 data_cursor,
209 &atoms[atom_cursor + left_len + 1
210 ..atom_cursor + left_len + right_len + 1],
211 )
212 } {
213 if let Some(data_cursor) = self.match_atoms(data_cursor, remaining_atoms) {
215 return Some(data_cursor);
216 }
217 }
218
219 return None;
220 }
221
222 Atom::Jump(mode) => {
223 data_cursor = match mode {
224 JumpType::RelByte => {
225 let value = self.target.subrange(data_cursor, 1)?;
226 (data_cursor + 1).wrapping_add_signed(value[0] as i8 as isize)
227 }
228 JumpType::RelDWord => {
229 let value = self.target.subrange(data_cursor, 4)?;
230 let value = i32::from_le_bytes(value.try_into().unwrap());
231 (data_cursor + 4).wrapping_add_signed(value as isize)
232 }
233 JumpType::AbsQWord => {
234 let value = self.target.subrange(data_cursor, 8)?;
235 let value = u64::from_le_bytes(value.try_into().unwrap());
236 self.target.translate_absolute_address(value)?
237 }
238 };
239 atom_cursor += 1;
240 }
241
242 Atom::Read(width) => {
243 let (value, width) = match width {
244 ReadWidth::Byte => {
245 let value = self.target.subrange(data_cursor, 1)?;
246 (value[0] as u32, 1)
247 }
248 ReadWidth::Word => {
249 let value = self.target.subrange(data_cursor, 2)?;
250 (u16::from_le_bytes(value.try_into().unwrap()) as u32, 2)
251 }
252 ReadWidth::DWord => {
253 let value = self.target.subrange(data_cursor, 4)?;
254 (u32::from_le_bytes(value.try_into().unwrap()), 4)
255 }
256 };
257 if !self.save_stack.push_value(value) {
258 return None;
260 }
261
262 atom_cursor += 1;
263 data_cursor += width;
264 }
265
266 Atom::SaveCursor => {
267 if !self.save_stack.push_value(data_cursor as u32) {
268 return None;
270 }
271 atom_cursor += 1;
272 }
273 Atom::SaveConstant(value) => {
274 if !self.save_stack.push_value(value) {
275 return None;
277 }
278 atom_cursor += 1;
279 }
280 }
281 }
282
283 Some(data_cursor)
284 }
285
286 fn next_match_hint(&self) -> MatchHint {
289 let mut fs_buffer = [0u8; 0x10];
290 let mut fs_buffer_len = 0;
291 for atom in self.pattern_atoms {
292 match atom {
293 Atom::ByteSequence { seq_start, seq_end } => {
294 let seq_start = *seq_start as usize;
295 let seq_end = *seq_end as usize;
296
297 let copy_length = (seq_end - seq_start).min(fs_buffer.len() - fs_buffer_len);
298 fs_buffer[fs_buffer_len..fs_buffer_len + copy_length].copy_from_slice(
299 &self.pattern_byte_sequence[seq_start..seq_start + copy_length],
300 );
301 fs_buffer_len += copy_length;
302 if fs_buffer_len >= fs_buffer.len() {
303 break;
305 }
306 }
307 Atom::CursorPush => continue,
308 Atom::SaveConstant(_) => continue,
309 Atom::SaveCursor => continue,
310 _ => break,
311 }
312 }
313
314 if fs_buffer_len == 0 {
315 return MatchHint::Unsupported;
317 }
318
319 self.target
320 .match_hint(self.current_offset, &fs_buffer[0..fs_buffer_len])
321 }
322
323 fn next_match_looped(&mut self) -> Option<&[u32]> {
324 while self.current_offset < self.match_length {
325 self.save_stack.truncate(1);
326 self.cursor_stack.truncate(0);
327
328 let match_offset = self.current_offset;
329 self.current_offset += 1;
330
331 if self.match_atoms(match_offset, self.pattern_atoms).is_none() {
332 continue;
333 }
334
335 let save_stack = self.save_stack.stack_mut();
336 save_stack[0] = match_offset as u32;
337 return Some(save_stack);
338 }
339
340 None
341 }
342
343 pub fn next_match(&mut self) -> Option<&[u32]> {
351 while self.current_offset < self.match_length {
352 match self.next_match_hint() {
353 MatchHint::Unsupported => {
354 return self.next_match_looped();
356 }
357 MatchHint::NoMatches => {
358 return None;
360 }
361 MatchHint::MaybeMatch(hint_offset) => {
362 self.save_stack.truncate(1);
364 self.cursor_stack.truncate(0);
365
366 self.current_offset = hint_offset + 1;
367 if self.match_atoms(hint_offset, self.pattern_atoms).is_none() {
368 continue;
370 }
371
372 let save_stack = self.save_stack.stack_mut();
373 save_stack[0] = hint_offset as u32;
374 return Some(save_stack);
375 }
376 }
377 }
378
379 None
380 }
381}
382
383#[cfg(test)]
384mod test {
385 use crate::{
386 compiler::parse_pattern,
387 BinaryPattern,
388 };
389
390 const DATA: &[u8] = &[
391 0xCA, 0x70, 0x11, 0xB5, 0xA, 0x9D, 0x91, 0x83, 0xC4, 0x5A, 0xFC, 0xC7, 0x31, 0x26, 0xC3,
392 0x48, 0x3D, 0x6C, 0x16, 0xD7, 0x15, 0x91, 0xDB, 0xC4, 0x21, 0x2, 0x31, 0x4D, 0xE9, 0xD5,
393 0x52, 0xFB, 0xB7, 0x31, 0x91, 0x45, 0x35, 0xC7, 0xDA, 0xA9, 0x77, 0xFC, 0x9C, 0x3E, 0x65,
394 0x19, 0xF2, 0x5A, 0x68, 0x99, 0x21, 0xC, 0xED, 0xDC, 0x21, 0x8C, 0xA2, 0x7B, 0xBA, 0xC0,
395 0x9A, 0x94, 0x99, 0x9B, 0xB2, 0xB7, 0x69, 0x2D, 0x17, 0xA9, 0x85, 0x2C, 0xD7, 0x42, 0x43,
396 0x91, 0xF6, 0x6E, 0x34, 0xBC, 0x2F, 0xF7, 0xAE, 0xAA, 0xAE, 0xBF, 0x4, 0xE5, 0xD5, 0x9B,
397 0x13, 0x60, 0x17, 0x31, 0x87, 0xEF, 0xF1, 0x24, 0x43, 0xB4, 0x60, 0xBC, 0x9F, 0x16, 0x86,
398 0x39, 0x3D, 0x9E, 0x1, 0x68, 0x74, 0x8D, 0xD3, 0xC8, 0x6, 0x25, 0x88, 0xB0, 0x95, 0x99,
399 0xB4, 0x5D, 0xBE, 0x8B, 0xD3, 0x26, 0xCB, 0x3C,
400 ];
401
402 fn test_single(pattern: &str, data: &[u8], result: Option<&[u32]>) {
403 let pattern = parse_pattern(pattern).unwrap();
404 println!("Atoms: {:?}", pattern.atoms());
405
406 let mut matcher = super::execute(&data, &pattern);
407 assert_eq!(matcher.next_match(), result);
408 }
409
410 #[test]
411 fn test_simple() {
412 test_single("B7 69 2D", DATA, Some(&[0x41]));
413 test_single("B7 69 ' 2D", DATA, Some(&[0x41, 0x43]));
414 test_single("' B7 69 ' 2D", DATA, Some(&[0x41, 0x41, 0x43]));
415 test_single("B7 69 3D", DATA, None);
416 }
417
418 #[test]
419 fn test_binary_mask() {
420 test_single("0xB76?2D", DATA, Some(&[0x41]));
421 }
422
423 #[test]
424 fn test_range() {
425 test_single("B7 69 2D [0-3] 85 2C '", DATA, Some(&[0x41, 0x48]));
426 test_single("B7 69 2D [0-1] 85 2C '", DATA, None);
427 }
428
429 #[test]
430 fn test_branch() {
431 test_single("B7 (69 | 70) 2D", DATA, Some(&[0x41]));
432 test_single("B7 (70 | 69) 2D", DATA, Some(&[0x41]));
433
434 test_single("B7 (70 | ) 69 2D", DATA, Some(&[0x41]));
436 }
437
438 #[test]
439 fn test_jmp() {
440 test_single(
441 "EB % FF",
442 &[0x00, 0xEB, 0x01, 0xEE, 0xFF, 0xEE],
443 Some(&[0x01]),
444 );
445 test_single("EB % EE", &[0x00, 0xEB, 0x01, 0xEE, 0xFF, 0xEE], None);
446 test_single(
447 "EB % EE",
448 &[0x00, 0xEB, 0x00, 0xEE, 0xFF, 0xEE],
449 Some(&[0x01]),
450 );
451
452 test_single(
453 "E9 $ EE",
454 &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xFF],
455 None,
456 );
457
458 test_single(
459 "E9 $ EE '",
460 &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xEE],
461 Some(&[0x01, 0x08]),
462 );
463
464 test_single(
465 "E9 $ { EE FF } '",
466 &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xEE, 0xFF],
467 Some(&[0x01, 0x06]),
468 );
469 }
470
471 #[test]
472 fn test_branch_behaviour() {
473 #[rustfmt::skip]
474 test_single(
475 "
476 48 8B 83 r4
477 [14]
478 48 8B 88 r4
479 ([4] | [0])
480 48 89 91 F8 05 00 00
481 ",
482 &[
483 0x00,
484 0x48, 0x8B, 0x83, 0x02, 0x00, 0x00, 0x00,
485 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
486 0x48, 0x8B, 0x88, 0x03, 0x00, 0x00, 0x00,
487 0x48, 0x89, 0x91, 0xF8, 0x05, 0x00, 0x00,
488 ],
489 Some(&[0x01, 0x02, 0x03]),
490 );
491
492 #[rustfmt::skip]
493 test_single(
494 "
495 48 8B 83 r4
496 [14]
497 48 8B 88 r4
498 ([4] | [0])
499 48 89 91 F8 05 00 00
500 ",
501 &[
502 0x00,
503 0x48, 0x8B, 0x83, 0x02, 0x00, 0x00, 0x00,
504 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
505 0x48, 0x8B, 0x88, 0x03, 0x00, 0x00, 0x00,
506 0xFF, 0xFF, 0xFF, 0xFF,
507 0x48, 0x89, 0x91, 0xF8, 0x05, 0x00, 0x00,
508 ],
509 Some(&[0x01, 0x02, 0x03]),
510 );
511 }
512}