1use crate::{
2 Atom,
3 BinaryPattern,
4 JumpType,
5 MatchTarget,
6 ReadWidth,
7 Stack,
8 StaticStack,
9};
10
11pub enum MatchHint {
13 Unsupported,
15
16 NoMatches,
18
19 MaybeMatch(usize),
21}
22
23pub struct BinaryMatcher<'a, S: Stack<u32>, C: Stack<usize>> {
27 pattern_atoms: &'a [Atom],
28 pattern_byte_sequence: &'a [u8],
29
30 target: &'a dyn MatchTarget,
31
32 current_offset: usize,
33 match_length: usize,
34
35 save_stack: S,
36 cursor_stack: C,
37}
38
39pub fn execute<'a>(
42 target: &'a dyn MatchTarget,
43 pattern: &'a dyn BinaryPattern,
44) -> BinaryMatcher<'a, StaticStack<0x10, u32>, StaticStack<0x10, usize>> {
45 self::execute_with_stack(target, pattern, StaticStack::new(), StaticStack::new())
46}
47
48pub fn execute_with_stack<'a, S: Stack<u32>, C: Stack<usize>>(
50 target: &'a dyn MatchTarget,
51 pattern: &'a dyn BinaryPattern,
52 mut save_stack: S,
53 cursor_stack: C,
54) -> BinaryMatcher<'a, S, C> {
55 save_stack.truncate(0);
56 save_stack.push_value(0x00);
57
58 BinaryMatcher::new(pattern, target, save_stack, cursor_stack)
59}
60
61impl<'a, S: Stack<u32>, C: Stack<usize>> BinaryMatcher<'a, S, C> {
62 pub(crate) fn new(
64 pattern: &'a dyn BinaryPattern,
65 target: &'a dyn MatchTarget,
66 mut save_stack: S,
67 cursor_stack: C,
68 ) -> Self {
69 save_stack.truncate(0);
70 save_stack.push_value(0x00);
71
72 Self {
73 pattern_atoms: pattern.atoms(),
74 pattern_byte_sequence: pattern.byte_sequence(),
75
76 target,
77
78 save_stack,
79 cursor_stack,
80
81 current_offset: 0,
82 match_length: target.match_length(),
83 }
84 }
85
86 fn match_atoms(&mut self, mut data_cursor: usize, atoms: &[Atom]) -> Option<usize> {
89 let mut atom_cursor = 0;
90 while atom_cursor < atoms.len() {
91 match atoms[atom_cursor] {
92 Atom::ByteSequence { seq_start, seq_end } => {
93 let expected_bytes =
94 &self.pattern_byte_sequence[seq_start as usize..seq_end as usize];
95 let actual_bytes = self.target.subrange(data_cursor, expected_bytes.len())?;
96
97 if expected_bytes
98 .iter()
99 .zip(actual_bytes.iter())
100 .any(|(expected, data)| *expected != *data)
101 {
102 return None;
103 }
104
105 atom_cursor += 1;
106 data_cursor += expected_bytes.len();
107 }
108 Atom::ByteSequenceMasked {
109 seq_start,
110 mask_start,
111 len,
112 } => {
113 let target_bytes = &self.pattern_byte_sequence
114 [seq_start as usize..seq_start as usize + len as usize];
115
116 let target_mask = &self.pattern_byte_sequence
117 [mask_start as usize..mask_start as usize + len as usize];
118
119 let actual_bytes = self.target.subrange(data_cursor, target_mask.len())?;
120
121 let target_bytes = target_bytes
122 .iter()
123 .zip(target_mask)
124 .map(|(value, mask)| *value & *mask);
125 let actual_bytes = actual_bytes
126 .iter()
127 .zip(target_mask)
128 .map(|(value, mask)| *value & *mask);
129
130 if target_bytes
131 .zip(actual_bytes)
132 .any(|(expected, data)| expected != data)
133 {
134 return None;
135 }
136
137 atom_cursor += 1;
138 data_cursor += len as usize;
139 }
140 Atom::WildcardFixed(length) => {
141 atom_cursor += 1;
142 data_cursor += length as usize;
143 }
144 Atom::WildcardRange { min, max } => {
145 let save_stack_size = self.save_stack.len();
146 let cursor_stack_size = self.cursor_stack.len();
147
148 for offset in min..=max {
149 self.save_stack.truncate(save_stack_size);
150 self.cursor_stack.truncate(cursor_stack_size);
151 if let Some(data_cursor) = self
152 .match_atoms(data_cursor + offset as usize, &atoms[atom_cursor + 1..])
153 {
154 return Some(data_cursor);
156 }
157 }
158
159 return None;
160 }
161
162 Atom::CursorPush => {
163 if !self.cursor_stack.push_value(data_cursor) {
164 return None;
166 }
167
168 atom_cursor += 1;
169 }
170 Atom::CursorPop { advance } => {
171 let Some(value) = self.cursor_stack.pop_value() else {
172 return None;
174 };
175
176 data_cursor = value + advance as usize;
177 atom_cursor += 1;
178 }
179
180 Atom::Branch {
181 left_len,
182 right_len,
183 } => {
184 let left_len = left_len as usize;
185 let right_len = right_len as usize;
186
187 let save_stack_size = self.save_stack.len();
188 let cursor_stack_size = self.cursor_stack.len();
189
190 let remaining_atoms = &atoms[atom_cursor + 1 + left_len + right_len..];
191 if let Some(data_cursor) = {
192 self.match_atoms(
193 data_cursor,
194 &atoms[atom_cursor + 1..atom_cursor + 1 + left_len],
195 )
196 } {
197 if let Some(data_cursor) = self.match_atoms(data_cursor, remaining_atoms) {
199 return Some(data_cursor);
200 }
201 }
202
203 self.save_stack.truncate(save_stack_size);
204 self.cursor_stack.truncate(cursor_stack_size);
205 if let Some(data_cursor) = {
206 self.match_atoms(
207 data_cursor,
208 &atoms[atom_cursor + left_len + 1
209 ..atom_cursor + left_len + right_len + 1],
210 )
211 } {
212 if let Some(data_cursor) = self.match_atoms(data_cursor, remaining_atoms) {
214 return Some(data_cursor);
215 }
216 }
217
218 return None;
219 }
220
221 Atom::Jump(mode) => {
222 data_cursor = match mode {
223 JumpType::RelByte => {
224 let value = self.target.subrange(data_cursor, 1)?;
225 (data_cursor + 1).wrapping_add_signed(value[0] as i8 as isize)
226 }
227 JumpType::RelDWord => {
228 let value = self.target.subrange(data_cursor, 4)?;
229 let value = i32::from_le_bytes(value.try_into().unwrap());
230 (data_cursor + 4).wrapping_add_signed(value as isize)
231 }
232 JumpType::AbsQWord => {
233 let value = self.target.subrange(data_cursor, 8)?;
234 let value = u64::from_le_bytes(value.try_into().unwrap());
235 self.target.translate_absolute_address(value)?
236 }
237 };
238 atom_cursor += 1;
239 }
240
241 Atom::Read(width) => {
242 let (value, width) = match width {
243 ReadWidth::Byte => {
244 let value = self.target.subrange(data_cursor, 1)?;
245 (value[0] as u32, 1)
246 }
247 ReadWidth::Word => {
248 let value = self.target.subrange(data_cursor, 2)?;
249 (u16::from_le_bytes(value.try_into().unwrap()) as u32, 2)
250 }
251 ReadWidth::DWord => {
252 let value = self.target.subrange(data_cursor, 4)?;
253 (u32::from_le_bytes(value.try_into().unwrap()), 4)
254 }
255 };
256 if !self.save_stack.push_value(value) {
257 return None;
259 }
260
261 atom_cursor += 1;
262 data_cursor += width;
263 }
264
265 Atom::SaveCursor => {
266 if !self.save_stack.push_value(data_cursor as u32) {
267 return None;
269 }
270 atom_cursor += 1;
271 }
272 Atom::SaveConstant(value) => {
273 if !self.save_stack.push_value(value) {
274 return None;
276 }
277 atom_cursor += 1;
278 }
279 }
280 }
281
282 Some(data_cursor)
283 }
284
285 fn next_match_hint(&self) -> MatchHint {
288 let mut fs_buffer = [0u8; 0x10];
289 let mut fs_buffer_len = 0;
290 for atom in self.pattern_atoms {
291 match atom {
292 Atom::ByteSequence { seq_start, seq_end } => {
293 let seq_start = *seq_start as usize;
294 let seq_end = *seq_end as usize;
295
296 let copy_length = (seq_end - seq_start).min(fs_buffer.len() - fs_buffer_len);
297 fs_buffer[fs_buffer_len..fs_buffer_len + copy_length].copy_from_slice(
298 &self.pattern_byte_sequence[seq_start..seq_start + copy_length],
299 );
300 fs_buffer_len += copy_length;
301 if fs_buffer_len >= fs_buffer.len() {
302 break;
304 }
305 }
306 Atom::CursorPush => continue,
307 Atom::SaveConstant(_) => continue,
308 Atom::SaveCursor => continue,
309 Atom::Read(_) => continue,
310 _ => break,
311 }
312 }
313
314 if fs_buffer_len == 0 {
315 return MatchHint::Unsupported;
317 }
318
319 self.target
320 .match_hint(self.current_offset, &fs_buffer[0..fs_buffer_len])
321 }
322
323 fn next_match_looped(&mut self) -> Option<&[u32]> {
324 while self.current_offset < self.match_length {
325 self.save_stack.truncate(1);
326 self.cursor_stack.truncate(0);
327
328 let match_offset = self.current_offset;
329 self.current_offset += 1;
330
331 if self.match_atoms(match_offset, self.pattern_atoms).is_none() {
332 continue;
333 }
334
335 let save_stack = self.save_stack.stack_mut();
336 save_stack[0] = match_offset as u32;
337 return Some(save_stack);
338 }
339
340 None
341 }
342
343 pub fn next_match(&mut self) -> Option<&[u32]> {
351 while self.current_offset < self.match_length {
352 match self.next_match_hint() {
353 MatchHint::Unsupported => {
354 return self.next_match_looped();
356 }
357 MatchHint::NoMatches => {
358 return None;
360 }
361 MatchHint::MaybeMatch(hint_offset) => {
362 self.save_stack.truncate(1);
364 self.cursor_stack.truncate(0);
365
366 self.current_offset = hint_offset + 1;
367 if self.match_atoms(hint_offset, self.pattern_atoms).is_none() {
368 continue;
370 }
371
372 let save_stack = self.save_stack.stack_mut();
373 save_stack[0] = hint_offset as u32;
374 return Some(save_stack);
375 }
376 }
377 }
378
379 None
380 }
381}
382
383#[cfg(test)]
384mod test {
385 use crate::{
386 compiler::parse_pattern,
387 BinaryPattern,
388 };
389
390 const DATA: &[u8] = &[
391 0xCA, 0x70, 0x11, 0xB5, 0xA, 0x9D, 0x91, 0x83, 0xC4, 0x5A, 0xFC, 0xC7, 0x31, 0x26, 0xC3,
392 0x48, 0x3D, 0x6C, 0x16, 0xD7, 0x15, 0x91, 0xDB, 0xC4, 0x21, 0x2, 0x31, 0x4D, 0xE9, 0xD5,
393 0x52, 0xFB, 0xB7, 0x31, 0x91, 0x45, 0x35, 0xC7, 0xDA, 0xA9, 0x77, 0xFC, 0x9C, 0x3E, 0x65,
394 0x19, 0xF2, 0x5A, 0x68, 0x99, 0x21, 0xC, 0xED, 0xDC, 0x21, 0x8C, 0xA2, 0x7B, 0xBA, 0xC0,
395 0x9A, 0x94, 0x99, 0x9B, 0xB2, 0xB7, 0x69, 0x2D, 0x17, 0xA9, 0x85, 0x2C, 0xD7, 0x42, 0x43,
396 0x91, 0xF6, 0x6E, 0x34, 0xBC, 0x2F, 0xF7, 0xAE, 0xAA, 0xAE, 0xBF, 0x4, 0xE5, 0xD5, 0x9B,
397 0x13, 0x60, 0x17, 0x31, 0x87, 0xEF, 0xF1, 0x24, 0x43, 0xB4, 0x60, 0xBC, 0x9F, 0x16, 0x86,
398 0x39, 0x3D, 0x9E, 0x1, 0x68, 0x74, 0x8D, 0xD3, 0xC8, 0x6, 0x25, 0x88, 0xB0, 0x95, 0x99,
399 0xB4, 0x5D, 0xBE, 0x8B, 0xD3, 0x26, 0xCB, 0x3C,
400 ];
401
402 fn test_single(pattern: &str, data: &[u8], result: Option<&[u32]>) {
403 let pattern = parse_pattern(pattern).unwrap();
404 println!("Atoms: {:?}", pattern.atoms());
405
406 let mut matcher = super::execute(&data, &pattern);
407 assert_eq!(matcher.next_match(), result);
408 }
409
410 #[test]
411 fn test_simple() {
412 test_single("B7 69 2D", DATA, Some(&[0x41]));
413 test_single("B7 69 ' 2D", DATA, Some(&[0x41, 0x43]));
414 test_single("' B7 69 ' 2D", DATA, Some(&[0x41, 0x41, 0x43]));
415 test_single("B7 69 3D", DATA, None);
416 }
417
418 #[test]
419 fn test_binary_mask() {
420 test_single("0xB76?2D", DATA, Some(&[0x41]));
421 }
422
423 #[test]
424 fn test_range() {
425 test_single("B7 69 2D [0-3] 85 2C '", DATA, Some(&[0x41, 0x48]));
426 test_single("B7 69 2D [0-1] 85 2C '", DATA, None);
427 }
428
429 #[test]
430 fn test_branch() {
431 test_single("B7 (69 | 70) 2D", DATA, Some(&[0x41]));
432 test_single("B7 (70 | 69) 2D", DATA, Some(&[0x41]));
433
434 test_single("B7 (70 | ) 69 2D", DATA, Some(&[0x41]));
436 }
437
438 #[test]
439 fn test_jmp() {
440 test_single(
441 "EB % FF",
442 &[0x00, 0xEB, 0x01, 0xEE, 0xFF, 0xEE],
443 Some(&[0x01]),
444 );
445 test_single("EB % EE", &[0x00, 0xEB, 0x01, 0xEE, 0xFF, 0xEE], None);
446 test_single(
447 "EB % EE",
448 &[0x00, 0xEB, 0x00, 0xEE, 0xFF, 0xEE],
449 Some(&[0x01]),
450 );
451
452 test_single(
453 "E9 $ EE",
454 &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xFF],
455 None,
456 );
457
458 test_single(
459 "E9 $ EE '",
460 &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xEE],
461 Some(&[0x01, 0x08]),
462 );
463
464 test_single(
465 "E9 $ { EE FF } '",
466 &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xEE, 0xFF],
467 Some(&[0x01, 0x06]),
468 );
469 }
470
471 #[test]
472 fn test_branch_behaviour() {
473 #[rustfmt::skip]
474 test_single(
475 "
476 48 8B 83 r4
477 [14]
478 48 8B 88 r4
479 ([4] | [0])
480 48 89 91 F8 05 00 00
481 ",
482 &[
483 0x00,
484 0x48, 0x8B, 0x83, 0x02, 0x00, 0x00, 0x00,
485 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
486 0x48, 0x8B, 0x88, 0x03, 0x00, 0x00, 0x00,
487 0x48, 0x89, 0x91, 0xF8, 0x05, 0x00, 0x00,
488 ],
489 Some(&[0x01, 0x02, 0x03]),
490 );
491
492 #[rustfmt::skip]
493 test_single(
494 "
495 48 8B 83 r4
496 [14]
497 48 8B 88 r4
498 ([4] | [0])
499 48 89 91 F8 05 00 00
500 ",
501 &[
502 0x00,
503 0x48, 0x8B, 0x83, 0x02, 0x00, 0x00, 0x00,
504 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
505 0x48, 0x8B, 0x88, 0x03, 0x00, 0x00, 0x00,
506 0xFF, 0xFF, 0xFF, 0xFF,
507 0x48, 0x89, 0x91, 0xF8, 0x05, 0x00, 0x00,
508 ],
509 Some(&[0x01, 0x02, 0x03]),
510 );
511 }
512}