bmatcher_core/
matcher.rs

1use crate::{
2    Atom,
3    BinaryPattern,
4    JumpType,
5    MatchTarget,
6    ReadWidth,
7    Stack,
8    StaticStack,
9};
10
11/// Hinting where the pattern might be matching the target
12#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
13pub enum MatchHint {
14    /// A match hint could not be generated
15    Unsupported,
16
17    /// The pattern will not match the target
18    NoMatches,
19
20    /// The pattern may match the target at the given offset
21    MaybeMatch(usize),
22}
23
24/// The `BinaryMatcher` is responsible for searching a [BinaryPattern] within a [MatchTarget].
25///
26/// Use [`BinaryMatcher::next_match`] to iterate through matches of the specified pattern.
27pub struct BinaryMatcher<'a, S: Stack<u32>, C: Stack<usize>> {
28    pattern_atoms: &'a [Atom],
29    pattern_byte_sequence: &'a [u8],
30
31    target: &'a dyn MatchTarget,
32
33    current_offset: usize,
34    match_length: usize,
35
36    save_stack: S,
37    cursor_stack: C,
38}
39
40/// Match a target against a binary pattern with a statically allocated stack.
41/// The default stack size is 0x10 for the save and cursor stack.
42pub fn execute<'a>(
43    target: &'a dyn MatchTarget,
44    pattern: &'a dyn BinaryPattern,
45) -> BinaryMatcher<'a, StaticStack<0x10, u32>, StaticStack<0x10, usize>> {
46    self::execute_with_stack(target, pattern, StaticStack::new(), StaticStack::new())
47}
48
49/// Match a target against a binary pattern and supply the save and cursor stacks on your own
50pub fn execute_with_stack<'a, S: Stack<u32>, C: Stack<usize>>(
51    target: &'a dyn MatchTarget,
52    pattern: &'a dyn BinaryPattern,
53    mut save_stack: S,
54    cursor_stack: C,
55) -> BinaryMatcher<'a, S, C> {
56    save_stack.truncate(0);
57    save_stack.push_value(0x00);
58
59    BinaryMatcher::new(pattern, target, save_stack, cursor_stack)
60}
61
62impl<'a, S: Stack<u32>, C: Stack<usize>> BinaryMatcher<'a, S, C> {
63    /// Create a new binary matcher and supply the save and cursor stacks on your own
64    pub(crate) fn new(
65        pattern: &'a dyn BinaryPattern,
66        target: &'a dyn MatchTarget,
67        mut save_stack: S,
68        cursor_stack: C,
69    ) -> Self {
70        save_stack.truncate(0);
71        save_stack.push_value(0x00);
72
73        Self {
74            pattern_atoms: pattern.atoms(),
75            pattern_byte_sequence: pattern.byte_sequence(),
76
77            target,
78
79            save_stack,
80            cursor_stack,
81
82            current_offset: 0,
83            match_length: target.match_length(),
84        }
85    }
86
87    /// Matches the current atom and updates the data & atom cursor.
88    /// Returns false if the atom does not match.
89    fn match_atoms(&mut self, mut data_cursor: usize, atoms: &[Atom]) -> Option<usize> {
90        let mut atom_cursor = 0;
91        while atom_cursor < atoms.len() {
92            match atoms[atom_cursor] {
93                Atom::ByteSequence { seq_start, seq_end } => {
94                    let expected_bytes =
95                        &self.pattern_byte_sequence[seq_start as usize..seq_end as usize];
96                    let actual_bytes = self.target.subrange(data_cursor, expected_bytes.len())?;
97
98                    if expected_bytes
99                        .iter()
100                        .zip(actual_bytes.iter())
101                        .any(|(expected, data)| *expected != *data)
102                    {
103                        return None;
104                    }
105
106                    atom_cursor += 1;
107                    data_cursor += expected_bytes.len();
108                }
109                Atom::ByteSequenceMasked {
110                    seq_start,
111                    mask_start,
112                    len,
113                } => {
114                    let target_bytes = &self.pattern_byte_sequence
115                        [seq_start as usize..seq_start as usize + len as usize];
116
117                    let target_mask = &self.pattern_byte_sequence
118                        [mask_start as usize..mask_start as usize + len as usize];
119
120                    let actual_bytes = self.target.subrange(data_cursor, target_mask.len())?;
121
122                    let target_bytes = target_bytes
123                        .iter()
124                        .zip(target_mask)
125                        .map(|(value, mask)| *value & *mask);
126                    let actual_bytes = actual_bytes
127                        .iter()
128                        .zip(target_mask)
129                        .map(|(value, mask)| *value & *mask);
130
131                    if target_bytes
132                        .zip(actual_bytes)
133                        .any(|(expected, data)| expected != data)
134                    {
135                        return None;
136                    }
137
138                    atom_cursor += 1;
139                    data_cursor += len as usize;
140                }
141                Atom::WildcardFixed(length) => {
142                    atom_cursor += 1;
143                    data_cursor += length as usize;
144                }
145                Atom::WildcardRange { min, max } => {
146                    let save_stack_size = self.save_stack.len();
147                    let cursor_stack_size = self.cursor_stack.len();
148
149                    for offset in min..=max {
150                        self.save_stack.truncate(save_stack_size);
151                        self.cursor_stack.truncate(cursor_stack_size);
152                        if let Some(data_cursor) = self
153                            .match_atoms(data_cursor + offset as usize, &atoms[atom_cursor + 1..])
154                        {
155                            /* match :) */
156                            return Some(data_cursor);
157                        }
158                    }
159
160                    return None;
161                }
162
163                Atom::CursorPush => {
164                    if !self.cursor_stack.push_value(data_cursor) {
165                        /* TODO: Return error instead of abort search */
166                        return None;
167                    }
168
169                    atom_cursor += 1;
170                }
171                Atom::CursorPop { advance } => {
172                    let Some(value) = self.cursor_stack.pop_value() else {
173                        /* TODO: Return error instead of abort search */
174                        return None;
175                    };
176
177                    data_cursor = value + advance as usize;
178                    atom_cursor += 1;
179                }
180
181                Atom::Branch {
182                    left_len,
183                    right_len,
184                } => {
185                    let left_len = left_len as usize;
186                    let right_len = right_len as usize;
187
188                    let save_stack_size = self.save_stack.len();
189                    let cursor_stack_size = self.cursor_stack.len();
190
191                    let remaining_atoms = &atoms[atom_cursor + 1 + left_len + right_len..];
192                    if let Some(data_cursor) = {
193                        self.match_atoms(
194                            data_cursor,
195                            &atoms[atom_cursor + 1..atom_cursor + 1 + left_len],
196                        )
197                    } {
198                        /* Left site match. Match the rest */
199                        if let Some(data_cursor) = self.match_atoms(data_cursor, remaining_atoms) {
200                            return Some(data_cursor);
201                        }
202                    }
203
204                    self.save_stack.truncate(save_stack_size);
205                    self.cursor_stack.truncate(cursor_stack_size);
206                    if let Some(data_cursor) = {
207                        self.match_atoms(
208                            data_cursor,
209                            &atoms[atom_cursor + left_len + 1
210                                ..atom_cursor + left_len + right_len + 1],
211                        )
212                    } {
213                        /* Right site match. Match the rest */
214                        if let Some(data_cursor) = self.match_atoms(data_cursor, remaining_atoms) {
215                            return Some(data_cursor);
216                        }
217                    }
218
219                    return None;
220                }
221
222                Atom::Jump(mode) => {
223                    data_cursor = match mode {
224                        JumpType::RelByte => {
225                            let value = self.target.subrange(data_cursor, 1)?;
226                            (data_cursor + 1).wrapping_add_signed(value[0] as i8 as isize)
227                        }
228                        JumpType::RelDWord => {
229                            let value = self.target.subrange(data_cursor, 4)?;
230                            let value = i32::from_le_bytes(value.try_into().unwrap());
231                            (data_cursor + 4).wrapping_add_signed(value as isize)
232                        }
233                        JumpType::AbsQWord => {
234                            let value = self.target.subrange(data_cursor, 8)?;
235                            let value = u64::from_le_bytes(value.try_into().unwrap());
236                            self.target.translate_absolute_address(value)?
237                        }
238                    };
239                    atom_cursor += 1;
240                }
241
242                Atom::Read(width) => {
243                    let (value, width) = match width {
244                        ReadWidth::Byte => {
245                            let value = self.target.subrange(data_cursor, 1)?;
246                            (value[0] as u32, 1)
247                        }
248                        ReadWidth::Word => {
249                            let value = self.target.subrange(data_cursor, 2)?;
250                            (u16::from_le_bytes(value.try_into().unwrap()) as u32, 2)
251                        }
252                        ReadWidth::DWord => {
253                            let value = self.target.subrange(data_cursor, 4)?;
254                            (u32::from_le_bytes(value.try_into().unwrap()), 4)
255                        }
256                    };
257                    if !self.save_stack.push_value(value) {
258                        /* TODO: Return error instead of abort search */
259                        return None;
260                    }
261
262                    atom_cursor += 1;
263                    data_cursor += width;
264                }
265
266                Atom::SaveCursor => {
267                    if !self.save_stack.push_value(data_cursor as u32) {
268                        /* TODO: Return error instead of abort search */
269                        return None;
270                    }
271                    atom_cursor += 1;
272                }
273                Atom::SaveConstant(value) => {
274                    if !self.save_stack.push_value(value) {
275                        /* TODO: Return error instead of abort search */
276                        return None;
277                    }
278                    atom_cursor += 1;
279                }
280            }
281        }
282
283        Some(data_cursor)
284    }
285
286    /// Generate a match hint for a proper search based of the first matching bytes
287    /// given by the pattern. This algorithm assumes that thet MatchTarget is in continuous memory.
288    fn next_match_hint(&self) -> MatchHint {
289        let mut fs_buffer = [0u8; 0x10];
290        let mut fs_buffer_len = 0;
291        for atom in self.pattern_atoms {
292            match atom {
293                Atom::ByteSequence { seq_start, seq_end } => {
294                    let seq_start = *seq_start as usize;
295                    let seq_end = *seq_end as usize;
296
297                    let copy_length = (seq_end - seq_start).min(fs_buffer.len() - fs_buffer_len);
298                    fs_buffer[fs_buffer_len..fs_buffer_len + copy_length].copy_from_slice(
299                        &self.pattern_byte_sequence[seq_start..seq_start + copy_length],
300                    );
301                    fs_buffer_len += copy_length;
302                    if fs_buffer_len >= fs_buffer.len() {
303                        /* quick search buffer filled */
304                        break;
305                    }
306                }
307                Atom::CursorPush => continue,
308                Atom::SaveConstant(_) => continue,
309                Atom::SaveCursor => continue,
310                _ => break,
311            }
312        }
313
314        if fs_buffer_len == 0 {
315            /* can not berform a fuzzy search as we do not start with any binary data */
316            return MatchHint::Unsupported;
317        }
318
319        self.target
320            .match_hint(self.current_offset, &fs_buffer[0..fs_buffer_len])
321    }
322
323    fn next_match_looped(&mut self) -> Option<&[u32]> {
324        while self.current_offset < self.match_length {
325            self.save_stack.truncate(1);
326            self.cursor_stack.truncate(0);
327
328            let match_offset = self.current_offset;
329            self.current_offset += 1;
330
331            if self.match_atoms(match_offset, self.pattern_atoms).is_none() {
332                continue;
333            }
334
335            let save_stack = self.save_stack.stack_mut();
336            save_stack[0] = match_offset as u32;
337            return Some(save_stack);
338        }
339
340        None
341    }
342
343    /// Finds the next match for the associated [BinaryPattern] within the [MatchTarget].
344    ///
345    /// # Returns
346    /// - `Some(&[u32])` containing the saved stack.
347    ///    The first element of the saved stack represents the start of the matched location.  
348    ///    Subsequent elements can be pushed using the `Atom::SaveCursor` atom or the `'` command within the binary pattern.
349    /// - `None` if no further matches are available.
350    pub fn next_match(&mut self) -> Option<&[u32]> {
351        while self.current_offset < self.match_length {
352            match self.next_match_hint() {
353                MatchHint::Unsupported => {
354                    /* fall back to matching against every position */
355                    return self.next_match_looped();
356                }
357                MatchHint::NoMatches => {
358                    /* no more matches */
359                    return None;
360                }
361                MatchHint::MaybeMatch(hint_offset) => {
362                    /* check if the given offset is actually a match */
363                    self.save_stack.truncate(1);
364                    self.cursor_stack.truncate(0);
365
366                    self.current_offset = hint_offset + 1;
367                    if self.match_atoms(hint_offset, self.pattern_atoms).is_none() {
368                        /* Hint did not result in an actual match. Continue. */
369                        continue;
370                    }
371
372                    let save_stack = self.save_stack.stack_mut();
373                    save_stack[0] = hint_offset as u32;
374                    return Some(save_stack);
375                }
376            }
377        }
378
379        None
380    }
381}
382
383#[cfg(test)]
384mod test {
385    use crate::{
386        compiler::parse_pattern,
387        BinaryPattern,
388    };
389
390    const DATA: &[u8] = &[
391        0xCA, 0x70, 0x11, 0xB5, 0xA, 0x9D, 0x91, 0x83, 0xC4, 0x5A, 0xFC, 0xC7, 0x31, 0x26, 0xC3,
392        0x48, 0x3D, 0x6C, 0x16, 0xD7, 0x15, 0x91, 0xDB, 0xC4, 0x21, 0x2, 0x31, 0x4D, 0xE9, 0xD5,
393        0x52, 0xFB, 0xB7, 0x31, 0x91, 0x45, 0x35, 0xC7, 0xDA, 0xA9, 0x77, 0xFC, 0x9C, 0x3E, 0x65,
394        0x19, 0xF2, 0x5A, 0x68, 0x99, 0x21, 0xC, 0xED, 0xDC, 0x21, 0x8C, 0xA2, 0x7B, 0xBA, 0xC0,
395        0x9A, 0x94, 0x99, 0x9B, 0xB2, 0xB7, 0x69, 0x2D, 0x17, 0xA9, 0x85, 0x2C, 0xD7, 0x42, 0x43,
396        0x91, 0xF6, 0x6E, 0x34, 0xBC, 0x2F, 0xF7, 0xAE, 0xAA, 0xAE, 0xBF, 0x4, 0xE5, 0xD5, 0x9B,
397        0x13, 0x60, 0x17, 0x31, 0x87, 0xEF, 0xF1, 0x24, 0x43, 0xB4, 0x60, 0xBC, 0x9F, 0x16, 0x86,
398        0x39, 0x3D, 0x9E, 0x1, 0x68, 0x74, 0x8D, 0xD3, 0xC8, 0x6, 0x25, 0x88, 0xB0, 0x95, 0x99,
399        0xB4, 0x5D, 0xBE, 0x8B, 0xD3, 0x26, 0xCB, 0x3C,
400    ];
401
402    fn test_single(pattern: &str, data: &[u8], result: Option<&[u32]>) {
403        let pattern = parse_pattern(pattern).unwrap();
404        println!("Atoms: {:?}", pattern.atoms());
405
406        let mut matcher = super::execute(&data, &pattern);
407        assert_eq!(matcher.next_match(), result);
408    }
409
410    #[test]
411    fn test_simple() {
412        test_single("B7 69 2D", DATA, Some(&[0x41]));
413        test_single("B7 69 ' 2D", DATA, Some(&[0x41, 0x43]));
414        test_single("' B7 69 ' 2D", DATA, Some(&[0x41, 0x41, 0x43]));
415        test_single("B7 69 3D", DATA, None);
416    }
417
418    #[test]
419    fn test_binary_mask() {
420        test_single("0xB76?2D", DATA, Some(&[0x41]));
421    }
422
423    #[test]
424    fn test_range() {
425        test_single("B7 69 2D [0-3] 85 2C '", DATA, Some(&[0x41, 0x48]));
426        test_single("B7 69 2D [0-1] 85 2C '", DATA, None);
427    }
428
429    #[test]
430    fn test_branch() {
431        test_single("B7 (69 | 70) 2D", DATA, Some(&[0x41]));
432        test_single("B7 (70 | 69) 2D", DATA, Some(&[0x41]));
433
434        /* optional 0x70 */
435        test_single("B7 (70 | ) 69 2D", DATA, Some(&[0x41]));
436    }
437
438    #[test]
439    fn test_jmp() {
440        test_single(
441            "EB % FF",
442            &[0x00, 0xEB, 0x01, 0xEE, 0xFF, 0xEE],
443            Some(&[0x01]),
444        );
445        test_single("EB % EE", &[0x00, 0xEB, 0x01, 0xEE, 0xFF, 0xEE], None);
446        test_single(
447            "EB % EE",
448            &[0x00, 0xEB, 0x00, 0xEE, 0xFF, 0xEE],
449            Some(&[0x01]),
450        );
451
452        test_single(
453            "E9 $ EE",
454            &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xFF],
455            None,
456        );
457
458        test_single(
459            "E9 $ EE '",
460            &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xEE],
461            Some(&[0x01, 0x08]),
462        );
463
464        test_single(
465            "E9 $ { EE FF } '",
466            &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xEE, 0xFF],
467            Some(&[0x01, 0x06]),
468        );
469    }
470
471    #[test]
472    fn test_branch_behaviour() {
473        #[rustfmt::skip]
474        test_single(
475            "
476            48 8B 83 r4
477            [14]
478            48 8B 88 r4
479            ([4] | [0])
480            48 89 91 F8 05 00 00
481            ",
482            &[
483                0x00, 
484                0x48, 0x8B, 0x83, 0x02, 0x00, 0x00, 0x00, 
485                0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
486                0x48, 0x8B, 0x88, 0x03, 0x00, 0x00, 0x00, 
487                0x48, 0x89, 0x91, 0xF8, 0x05, 0x00, 0x00,
488            ],
489            Some(&[0x01, 0x02, 0x03]),
490        );
491
492        #[rustfmt::skip]
493        test_single(
494            "
495            48 8B 83 r4
496            [14]
497            48 8B 88 r4
498            ([4] | [0])
499            48 89 91 F8 05 00 00
500            ",
501            &[
502                0x00, 
503                0x48, 0x8B, 0x83, 0x02, 0x00, 0x00, 0x00, 
504                0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
505                0x48, 0x8B, 0x88, 0x03, 0x00, 0x00, 0x00, 
506                0xFF, 0xFF, 0xFF, 0xFF,
507                0x48, 0x89, 0x91, 0xF8, 0x05, 0x00, 0x00,
508            ],
509            Some(&[0x01, 0x02, 0x03]),
510        );
511    }
512}