bmatcher_core/
matcher.rs

1use core::ops::{
2    Bound,
3    Range,
4    RangeBounds,
5};
6
7use crate::{
8    Atom,
9    BinaryPattern,
10    HeapStack,
11    JumpType,
12    MatchTarget,
13    ReadWidth,
14    Stack,
15    StaticStack,
16};
17
18/// Hinting where the pattern might be matching the target
19enum MatchHint {
20    /// A match hint could not be generated
21    Unsupported,
22
23    /// The pattern will not match the target
24    NoMatches,
25
26    /// The pattern may match the target at the given offset
27    MaybeMatch(usize),
28}
29
30/// The `BinaryMatcher` is responsible for searching a [BinaryPattern] within a [MatchTarget].
31///
32/// Use [`BinaryMatcher::next_match`] to iterate through matches of the specified pattern.
33pub struct BinaryMatcher<
34    'a,
35    S: Stack<u32> = StaticStack<0x10, u32>,
36    C: Stack<usize> = StaticStack<0x10, usize>,
37> {
38    pattern_atoms: &'a [Atom],
39    pattern_byte_sequence: &'a [u8],
40
41    target: &'a dyn MatchTarget,
42
43    match_offset: usize,
44
45    save_stack: S,
46    cursor_stack: C,
47}
48
49impl<'a> BinaryMatcher<'a> {
50    /// Create a new BinaryMatcher instance with a statically allocated stack.
51    /// The default stack size is 0x10 for the save and cursor stack.
52    pub fn new(pattern: &'a dyn BinaryPattern, target: &'a dyn MatchTarget) -> Self {
53        Self::new_with_stack(
54            pattern,
55            target,
56            StaticStack::<0x10, u32>::new(),
57            StaticStack::<0x10, usize>::new(),
58        )
59    }
60
61    /// Create a new BinaryMatcher instance with a heap allocated save and cursor stack
62    pub fn new_with_heap_stack(
63        pattern: &'a dyn BinaryPattern,
64        target: &'a dyn MatchTarget,
65    ) -> BinaryMatcher<'a, HeapStack<u32>, HeapStack<usize>> {
66        BinaryMatcher::new_with_stack(
67            pattern,
68            target,
69            HeapStack::<u32>::new(),
70            HeapStack::<usize>::new(),
71        )
72    }
73}
74
75impl<'a, S: Stack<u32>, C: Stack<usize>> BinaryMatcher<'a, S, C> {
76    /// Create a new binary matcher and supply the save and cursor stacks on your own
77    pub fn new_with_stack(
78        pattern: &'a dyn BinaryPattern,
79        target: &'a dyn MatchTarget,
80        mut save_stack: S,
81        cursor_stack: C,
82    ) -> Self {
83        save_stack.truncate(0);
84        save_stack.push_value(0x00);
85
86        Self {
87            pattern_atoms: pattern.atoms(),
88            pattern_byte_sequence: pattern.byte_sequence(),
89
90            target,
91
92            save_stack,
93            cursor_stack,
94
95            match_offset: 0,
96        }
97    }
98
99    /// Matches the current atom and updates the data & atom cursor.
100    /// Returns false if the atom does not match.
101    fn match_atoms(&mut self, mut data_cursor: usize, atoms: &[Atom]) -> Option<usize> {
102        let mut atom_cursor = 0;
103        while atom_cursor < atoms.len() {
104            match atoms[atom_cursor] {
105                Atom::ByteSequence { seq_start, seq_end } => {
106                    let expected_bytes =
107                        &self.pattern_byte_sequence[seq_start as usize..seq_end as usize];
108                    let actual_bytes = self.target.subrange(data_cursor, expected_bytes.len())?;
109
110                    if expected_bytes
111                        .iter()
112                        .zip(actual_bytes.iter())
113                        .any(|(expected, data)| *expected != *data)
114                    {
115                        return None;
116                    }
117
118                    atom_cursor += 1;
119                    data_cursor += expected_bytes.len();
120                }
121                Atom::ByteSequenceMasked {
122                    seq_start,
123                    mask_start,
124                    len,
125                } => {
126                    let target_bytes = &self.pattern_byte_sequence
127                        [seq_start as usize..seq_start as usize + len as usize];
128
129                    let target_mask = &self.pattern_byte_sequence
130                        [mask_start as usize..mask_start as usize + len as usize];
131
132                    let actual_bytes = self.target.subrange(data_cursor, target_mask.len())?;
133
134                    let target_bytes = target_bytes
135                        .iter()
136                        .zip(target_mask)
137                        .map(|(value, mask)| *value & *mask);
138                    let actual_bytes = actual_bytes
139                        .iter()
140                        .zip(target_mask)
141                        .map(|(value, mask)| *value & *mask);
142
143                    if target_bytes
144                        .zip(actual_bytes)
145                        .any(|(expected, data)| expected != data)
146                    {
147                        return None;
148                    }
149
150                    atom_cursor += 1;
151                    data_cursor += len as usize;
152                }
153                Atom::WildcardFixed(length) => {
154                    atom_cursor += 1;
155                    data_cursor += length as usize;
156                }
157                Atom::WildcardRange { min, max } => {
158                    let save_stack_size = self.save_stack.len();
159                    let cursor_stack_size = self.cursor_stack.len();
160
161                    for offset in min..=max {
162                        self.save_stack.truncate(save_stack_size);
163                        self.cursor_stack.truncate(cursor_stack_size);
164                        if let Some(data_cursor) = self
165                            .match_atoms(data_cursor + offset as usize, &atoms[atom_cursor + 1..])
166                        {
167                            /* match :) */
168                            return Some(data_cursor);
169                        }
170                    }
171
172                    return None;
173                }
174
175                Atom::CursorPush => {
176                    if !self.cursor_stack.push_value(data_cursor) {
177                        /* TODO: Return error instead of abort search */
178                        return None;
179                    }
180
181                    atom_cursor += 1;
182                }
183                Atom::CursorPop { advance } => {
184                    let Some(value) = self.cursor_stack.pop_value() else {
185                        /* TODO: Return error instead of abort search */
186                        return None;
187                    };
188
189                    data_cursor = value + advance as usize;
190                    atom_cursor += 1;
191                }
192
193                Atom::Branch {
194                    left_len,
195                    right_len,
196                } => {
197                    let left_len = left_len as usize;
198                    let right_len = right_len as usize;
199
200                    let save_stack_size = self.save_stack.len();
201                    let cursor_stack_size = self.cursor_stack.len();
202
203                    let remaining_atoms = &atoms[atom_cursor + 1 + left_len + right_len..];
204                    if let Some(data_cursor) = {
205                        self.match_atoms(
206                            data_cursor,
207                            &atoms[atom_cursor + 1..atom_cursor + 1 + left_len],
208                        )
209                    } {
210                        /* Left site match. Match the rest */
211                        if let Some(data_cursor) = self.match_atoms(data_cursor, remaining_atoms) {
212                            return Some(data_cursor);
213                        }
214                    }
215
216                    self.save_stack.truncate(save_stack_size);
217                    self.cursor_stack.truncate(cursor_stack_size);
218                    if let Some(data_cursor) = {
219                        self.match_atoms(
220                            data_cursor,
221                            &atoms[atom_cursor + left_len + 1
222                                ..atom_cursor + left_len + right_len + 1],
223                        )
224                    } {
225                        /* Right site match. Match the rest */
226                        if let Some(data_cursor) = self.match_atoms(data_cursor, remaining_atoms) {
227                            return Some(data_cursor);
228                        }
229                    }
230
231                    return None;
232                }
233
234                Atom::Jump(mode) => {
235                    data_cursor = match mode {
236                        JumpType::RelByte => {
237                            let value = self.target.subrange(data_cursor, 1)?;
238                            (data_cursor + 1).wrapping_add_signed(value[0] as i8 as isize)
239                        }
240                        JumpType::RelDWord => {
241                            let value = self.target.subrange(data_cursor, 4)?;
242                            let value = i32::from_le_bytes(value.try_into().unwrap());
243                            (data_cursor + 4).wrapping_add_signed(value as isize)
244                        }
245                        JumpType::AbsQWord => {
246                            let value = self.target.subrange(data_cursor, 8)?;
247                            let value = u64::from_le_bytes(value.try_into().unwrap());
248                            self.target.translate_absolute_address(value)?
249                        }
250                    };
251                    atom_cursor += 1;
252                }
253
254                Atom::Read(width) => {
255                    let (value, width) = match width {
256                        ReadWidth::Byte => {
257                            let value = self.target.subrange(data_cursor, 1)?;
258                            (value[0] as u32, 1)
259                        }
260                        ReadWidth::Word => {
261                            let value = self.target.subrange(data_cursor, 2)?;
262                            (u16::from_le_bytes(value.try_into().unwrap()) as u32, 2)
263                        }
264                        ReadWidth::DWord => {
265                            let value = self.target.subrange(data_cursor, 4)?;
266                            (u32::from_le_bytes(value.try_into().unwrap()), 4)
267                        }
268                    };
269                    if !self.save_stack.push_value(value) {
270                        /* TODO: Return error instead of abort search */
271                        return None;
272                    }
273
274                    atom_cursor += 1;
275                    data_cursor += width;
276                }
277
278                Atom::SaveCursor => {
279                    if !self.save_stack.push_value(data_cursor as u32) {
280                        /* TODO: Return error instead of abort search */
281                        return None;
282                    }
283                    atom_cursor += 1;
284                }
285                Atom::SaveConstant(value) => {
286                    if !self.save_stack.push_value(value) {
287                        /* TODO: Return error instead of abort search */
288                        return None;
289                    }
290                    atom_cursor += 1;
291                }
292            }
293        }
294
295        Some(data_cursor)
296    }
297
298    /// Generate a match hint for a proper search based of the first matching bytes
299    /// given by the pattern. This algorithm assumes that thet MatchTarget is in continuous memory.
300    fn next_match_hint(&self, range: Range<usize>) -> MatchHint {
301        let mut fs_buffer = [0u8; 0x10];
302        let mut fs_buffer_len = 0;
303        for atom in self.pattern_atoms {
304            match atom {
305                Atom::ByteSequence { seq_start, seq_end } => {
306                    let seq_start = *seq_start as usize;
307                    let seq_end = *seq_end as usize;
308
309                    let copy_length = (seq_end - seq_start).min(fs_buffer.len() - fs_buffer_len);
310                    fs_buffer[fs_buffer_len..fs_buffer_len + copy_length].copy_from_slice(
311                        &self.pattern_byte_sequence[seq_start..seq_start + copy_length],
312                    );
313                    fs_buffer_len += copy_length;
314                    if fs_buffer_len >= fs_buffer.len() {
315                        /* quick search buffer filled */
316                        break;
317                    }
318                }
319                Atom::CursorPush => continue,
320                Atom::SaveConstant(_) => continue,
321                Atom::SaveCursor => continue,
322                Atom::Read(_) => continue,
323                _ => break,
324            }
325        }
326
327        if fs_buffer_len == 0 {
328            /* can not berform a fuzzy search as we do not start with any binary data */
329            return MatchHint::Unsupported;
330        }
331
332        let Some(target_buffer) = self.target.subrange(range.start, range.end - range.start) else {
333            /* memory is not continuous */
334            return MatchHint::Unsupported;
335        };
336
337        Self::fuzzy_search(&fs_buffer[0..fs_buffer_len], target_buffer)
338            .map_or(MatchHint::NoMatches, |offset| {
339                MatchHint::MaybeMatch(range.start + offset)
340            })
341    }
342
343    fn fuzzy_search(needle: &[u8], haystack: &[u8]) -> Option<usize> {
344        for offset in 0..(haystack.len() - needle.len()) {
345            let is_match = needle
346                .iter()
347                .zip(&haystack[offset..offset + needle.len()])
348                .all(|(a, b)| *a == *b);
349
350            if is_match {
351                return Some(offset);
352            }
353        }
354
355        None
356    }
357
358    /// Finds the next match for the associated [BinaryPattern] within the [MatchTarget].
359    ///
360    /// # Returns
361    /// - `Some(&[u32])` containing the saved stack.
362    ///    The first element of the saved stack represents the start of the matched location.  
363    ///    Subsequent elements can be pushed using the `Atom::SaveCursor` atom or the `'` command within the binary pattern.
364    /// - `None` if no further matches are available.
365    pub fn next_match(&mut self) -> Option<&[u32]> {
366        self.next_match_within(..)
367    }
368
369    /// Finds the next match for the associated [BinaryPattern] within the [MatchTarget] within the given range.
370    /// The current match offset will be clamped into the given range.
371    pub fn next_match_within<R: RangeBounds<usize>>(&mut self, range: R) -> Option<&[u32]> {
372        let range_start = match range.start_bound() {
373            Bound::Excluded(value) => *value + 1,
374            Bound::Included(value) => *value,
375            Bound::Unbounded => 0,
376        };
377
378        let range_end = match range.end_bound() {
379            Bound::Excluded(value) => *value,
380            Bound::Included(value) => *value + 1,
381            Bound::Unbounded => self.target.match_length(),
382        };
383        if range_start >= range_end {
384            /* nothing to match against */
385            return None;
386        }
387
388        let mut match_offset = self.match_offset.clamp(range_start, range_end);
389        while match_offset < range_end {
390            match self.next_match_hint(match_offset..range_end) {
391                MatchHint::Unsupported => {
392                    /* fall back to matching against every position */
393                    return self.next_match_within_loop(match_offset..range_end);
394                }
395                MatchHint::NoMatches => {
396                    /* no more matches */
397                    return None;
398                }
399                MatchHint::MaybeMatch(hint_offset) => {
400                    /* check if the given offset is actually a match */
401                    self.save_stack.truncate(1);
402                    self.cursor_stack.truncate(0);
403
404                    if self.match_atoms(hint_offset, self.pattern_atoms).is_some() {
405                        self.match_offset = hint_offset + 1;
406
407                        let save_stack = self.save_stack.stack_mut();
408                        save_stack[0] = hint_offset as u32;
409                        return Some(save_stack);
410                    }
411
412                    match_offset = hint_offset + 1;
413                }
414            }
415        }
416
417        self.match_offset = range_end;
418        None
419    }
420
421    fn next_match_within_loop(&mut self, range: Range<usize>) -> Option<&[u32]> {
422        for match_offset in range.clone() {
423            self.save_stack.truncate(1);
424            self.cursor_stack.truncate(0);
425
426            if self.match_atoms(match_offset, self.pattern_atoms).is_none() {
427                continue;
428            }
429
430            self.match_offset = match_offset + 1;
431
432            let save_stack = self.save_stack.stack_mut();
433            save_stack[0] = match_offset as u32;
434            return Some(save_stack);
435        }
436
437        self.match_offset = range.end;
438        None
439    }
440}
441
442#[cfg(test)]
443mod test {
444    use super::BinaryMatcher;
445    use crate::{
446        compiler::parse_pattern,
447        BinaryPattern,
448    };
449
450    const DATA: &[u8] = &[
451        0xCA, 0x70, 0x11, 0xB5, 0xA, 0x9D, 0x91, 0x83, 0xC4, 0x5A, 0xFC, 0xC7, 0x31, 0x26, 0xC3,
452        0x48, 0x3D, 0x6C, 0x16, 0xD7, 0x15, 0x91, 0xDB, 0xC4, 0x21, 0x2, 0x31, 0x4D, 0xE9, 0xD5,
453        0x52, 0xFB, 0xB7, 0x31, 0x91, 0x45, 0x35, 0xC7, 0xDA, 0xA9, 0x77, 0xFC, 0x9C, 0x3E, 0x65,
454        0x19, 0xF2, 0x5A, 0x68, 0x99, 0x21, 0xC, 0xED, 0xDC, 0x21, 0x8C, 0xA2, 0x7B, 0xBA, 0xC0,
455        0x9A, 0x94, 0x99, 0x9B, 0xB2, 0xB7, 0x69, 0x2D, 0x17, 0xA9, 0x85, 0x2C, 0xD7, 0x42, 0x43,
456        0x91, 0xF6, 0x6E, 0x34, 0xBC, 0x2F, 0xF7, 0xAE, 0xAA, 0xAE, 0xBF, 0x4, 0xE5, 0xD5, 0x9B,
457        0x13, 0x60, 0x17, 0x31, 0x87, 0xEF, 0xF1, 0x24, 0x43, 0xB4, 0x60, 0xBC, 0x9F, 0x16, 0x86,
458        0x39, 0x3D, 0x9E, 0x1, 0x68, 0x74, 0x8D, 0xD3, 0xC8, 0x6, 0x25, 0x88, 0xB0, 0x95, 0x99,
459        0xB4, 0x5D, 0xBE, 0x8B, 0xD3, 0x26, 0xCB, 0x3C,
460    ];
461
462    fn test_single(pattern: &str, data: &[u8], result: Option<&[u32]>) {
463        let pattern = parse_pattern(pattern).unwrap();
464        println!("Atoms: {:?}", pattern.atoms());
465
466        let mut matcher = BinaryMatcher::new(&pattern, &data);
467        assert_eq!(matcher.next_match(), result);
468    }
469
470    #[test]
471    fn test_simple() {
472        test_single("B7 69 2D", DATA, Some(&[0x41]));
473        test_single("B7 69 ' 2D", DATA, Some(&[0x41, 0x43]));
474        test_single("' B7 69 ' 2D", DATA, Some(&[0x41, 0x41, 0x43]));
475        test_single("B7 69 3D", DATA, None);
476    }
477
478    #[test]
479    fn test_binary_mask() {
480        test_single("B7682D & FFFEFF", DATA, Some(&[0x41]));
481    }
482
483    #[test]
484    fn test_range() {
485        test_single("B7 69 2D [0-3] 85 2C '", DATA, Some(&[0x41, 0x48]));
486        test_single("B7 69 2D [0-1] 85 2C '", DATA, None);
487    }
488
489    #[test]
490    fn test_branch() {
491        test_single("B7 (69 | 70) 2D", DATA, Some(&[0x41]));
492        test_single("B7 (70 | 69) 2D", DATA, Some(&[0x41]));
493
494        /* optional 0x70 */
495        test_single("B7 (70 | ) 69 2D", DATA, Some(&[0x41]));
496    }
497
498    #[test]
499    fn test_jmp() {
500        test_single(
501            "EB % FF",
502            &[0x00, 0xEB, 0x01, 0xEE, 0xFF, 0xEE],
503            Some(&[0x01]),
504        );
505        test_single("EB % EE", &[0x00, 0xEB, 0x01, 0xEE, 0xFF, 0xEE], None);
506        test_single(
507            "EB % EE",
508            &[0x00, 0xEB, 0x00, 0xEE, 0xFF, 0xEE],
509            Some(&[0x01]),
510        );
511
512        test_single(
513            "E9 $ EE",
514            &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xFF],
515            None,
516        );
517
518        test_single(
519            "E9 $ EE '",
520            &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xEE],
521            Some(&[0x01, 0x08]),
522        );
523
524        test_single(
525            "E9 $ { EE FF } '",
526            &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xEE, 0xFF],
527            Some(&[0x01, 0x06]),
528        );
529    }
530
531    #[test]
532    fn test_branch_behaviour() {
533        #[rustfmt::skip]
534        test_single(
535            "
536            48 8B 83 r4
537            [14]
538            48 8B 88 r4
539            ([4] | [0])
540            48 89 91 F8 05 00 00
541            ",
542            &[
543                0x00, 
544                0x48, 0x8B, 0x83, 0x02, 0x00, 0x00, 0x00, 
545                0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
546                0x48, 0x8B, 0x88, 0x03, 0x00, 0x00, 0x00, 
547                0x48, 0x89, 0x91, 0xF8, 0x05, 0x00, 0x00,
548            ],
549            Some(&[0x01, 0x02, 0x03]),
550        );
551
552        #[rustfmt::skip]
553        test_single(
554            "
555            48 8B 83 r4
556            [14]
557            48 8B 88 r4
558            ([4] | [0])
559            48 89 91 F8 05 00 00
560            ",
561            &[
562                0x00, 
563                0x48, 0x8B, 0x83, 0x02, 0x00, 0x00, 0x00, 
564                0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
565                0x48, 0x8B, 0x88, 0x03, 0x00, 0x00, 0x00, 
566                0xFF, 0xFF, 0xFF, 0xFF,
567                0x48, 0x89, 0x91, 0xF8, 0x05, 0x00, 0x00,
568            ],
569            Some(&[0x01, 0x02, 0x03]),
570        );
571    }
572}