bmatcher_core/
matcher.rs

1use crate::{
2    Atom,
3    BinaryPattern,
4    JumpType,
5    MatchTarget,
6    ReadWidth,
7    Stack,
8    StaticStack,
9};
10
11/// Hinting where the pattern might be matching the target
12pub enum MatchHint {
13    /// A match hint could not be generated
14    Unsupported,
15
16    /// The pattern will not match the target
17    NoMatches,
18
19    /// The pattern may match the target at the given offset
20    MaybeMatch(usize),
21}
22
23/// The `BinaryMatcher` is responsible for searching a [BinaryPattern] within a [MatchTarget].
24///
25/// Use [`BinaryMatcher::next_match`] to iterate through matches of the specified pattern.
26pub struct BinaryMatcher<'a, S: Stack<u32>, C: Stack<usize>> {
27    pattern_atoms: &'a [Atom],
28    pattern_byte_sequence: &'a [u8],
29
30    target: &'a dyn MatchTarget,
31
32    current_offset: usize,
33    match_length: usize,
34
35    save_stack: S,
36    cursor_stack: C,
37}
38
39/// Match a target against a binary pattern with a statically allocated stack.
40/// The default stack size is 0x10 for the save and cursor stack.
41pub fn execute<'a>(
42    target: &'a dyn MatchTarget,
43    pattern: &'a dyn BinaryPattern,
44) -> BinaryMatcher<'a, StaticStack<0x10, u32>, StaticStack<0x10, usize>> {
45    self::execute_with_stack(target, pattern, StaticStack::new(), StaticStack::new())
46}
47
48/// Match a target against a binary pattern and supply the save and cursor stacks on your own
49pub fn execute_with_stack<'a, S: Stack<u32>, C: Stack<usize>>(
50    target: &'a dyn MatchTarget,
51    pattern: &'a dyn BinaryPattern,
52    mut save_stack: S,
53    cursor_stack: C,
54) -> BinaryMatcher<'a, S, C> {
55    save_stack.truncate(0);
56    save_stack.push_value(0x00);
57
58    BinaryMatcher::new(pattern, target, save_stack, cursor_stack)
59}
60
61impl<'a, S: Stack<u32>, C: Stack<usize>> BinaryMatcher<'a, S, C> {
62    /// Create a new binary matcher and supply the save and cursor stacks on your own
63    pub(crate) fn new(
64        pattern: &'a dyn BinaryPattern,
65        target: &'a dyn MatchTarget,
66        mut save_stack: S,
67        cursor_stack: C,
68    ) -> Self {
69        save_stack.truncate(0);
70        save_stack.push_value(0x00);
71
72        Self {
73            pattern_atoms: pattern.atoms(),
74            pattern_byte_sequence: pattern.byte_sequence(),
75
76            target,
77
78            save_stack,
79            cursor_stack,
80
81            current_offset: 0,
82            match_length: target.match_length(),
83        }
84    }
85
86    /// Matches the current atom and updates the data & atom cursor.
87    /// Returns false if the atom does not match.
88    fn match_atoms(&mut self, mut data_cursor: usize, atoms: &[Atom]) -> Option<usize> {
89        let mut atom_cursor = 0;
90        while atom_cursor < atoms.len() {
91            match atoms[atom_cursor] {
92                Atom::ByteSequence { seq_start, seq_end } => {
93                    let expected_bytes =
94                        &self.pattern_byte_sequence[seq_start as usize..seq_end as usize];
95                    let actual_bytes = self.target.subrange(data_cursor, expected_bytes.len())?;
96
97                    if expected_bytes
98                        .iter()
99                        .zip(actual_bytes.iter())
100                        .any(|(expected, data)| *expected != *data)
101                    {
102                        return None;
103                    }
104
105                    atom_cursor += 1;
106                    data_cursor += expected_bytes.len();
107                }
108                Atom::ByteSequenceMasked {
109                    seq_start,
110                    mask_start,
111                    len,
112                } => {
113                    let target_bytes = &self.pattern_byte_sequence
114                        [seq_start as usize..seq_start as usize + len as usize];
115
116                    let target_mask = &self.pattern_byte_sequence
117                        [mask_start as usize..mask_start as usize + len as usize];
118
119                    let actual_bytes = self.target.subrange(data_cursor, target_mask.len())?;
120
121                    let target_bytes = target_bytes
122                        .iter()
123                        .zip(target_mask)
124                        .map(|(value, mask)| *value & *mask);
125                    let actual_bytes = actual_bytes
126                        .iter()
127                        .zip(target_mask)
128                        .map(|(value, mask)| *value & *mask);
129
130                    if target_bytes
131                        .zip(actual_bytes)
132                        .any(|(expected, data)| expected != data)
133                    {
134                        return None;
135                    }
136
137                    atom_cursor += 1;
138                    data_cursor += len as usize;
139                }
140                Atom::WildcardFixed(length) => {
141                    atom_cursor += 1;
142                    data_cursor += length as usize;
143                }
144                Atom::WildcardRange { min, max } => {
145                    let save_stack_size = self.save_stack.len();
146                    let cursor_stack_size = self.cursor_stack.len();
147
148                    for offset in min..=max {
149                        self.save_stack.truncate(save_stack_size);
150                        self.cursor_stack.truncate(cursor_stack_size);
151                        if let Some(data_cursor) = self
152                            .match_atoms(data_cursor + offset as usize, &atoms[atom_cursor + 1..])
153                        {
154                            /* match :) */
155                            return Some(data_cursor);
156                        }
157                    }
158
159                    return None;
160                }
161
162                Atom::CursorPush => {
163                    if !self.cursor_stack.push_value(data_cursor) {
164                        /* TODO: Return error instead of abort search */
165                        return None;
166                    }
167
168                    atom_cursor += 1;
169                }
170                Atom::CursorPop { advance } => {
171                    let Some(value) = self.cursor_stack.pop_value() else {
172                        /* TODO: Return error instead of abort search */
173                        return None;
174                    };
175
176                    data_cursor = value + advance as usize;
177                    atom_cursor += 1;
178                }
179
180                Atom::Branch {
181                    left_len,
182                    right_len,
183                } => {
184                    let left_len = left_len as usize;
185                    let right_len = right_len as usize;
186
187                    let save_stack_size = self.save_stack.len();
188                    let cursor_stack_size = self.cursor_stack.len();
189
190                    let remaining_atoms = &atoms[atom_cursor + 1 + left_len + right_len..];
191                    if let Some(data_cursor) = {
192                        self.match_atoms(
193                            data_cursor,
194                            &atoms[atom_cursor + 1..atom_cursor + 1 + left_len],
195                        )
196                    } {
197                        /* Left site match. Match the rest */
198                        if let Some(data_cursor) = self.match_atoms(data_cursor, remaining_atoms) {
199                            return Some(data_cursor);
200                        }
201                    }
202
203                    self.save_stack.truncate(save_stack_size);
204                    self.cursor_stack.truncate(cursor_stack_size);
205                    if let Some(data_cursor) = {
206                        self.match_atoms(
207                            data_cursor,
208                            &atoms[atom_cursor + left_len + 1
209                                ..atom_cursor + left_len + right_len + 1],
210                        )
211                    } {
212                        /* Right site match. Match the rest */
213                        if let Some(data_cursor) = self.match_atoms(data_cursor, remaining_atoms) {
214                            return Some(data_cursor);
215                        }
216                    }
217
218                    return None;
219                }
220
221                Atom::Jump(mode) => {
222                    data_cursor = match mode {
223                        JumpType::RelByte => {
224                            let value = self.target.subrange(data_cursor, 1)?;
225                            (data_cursor + 1).wrapping_add_signed(value[0] as i8 as isize)
226                        }
227                        JumpType::RelDWord => {
228                            let value = self.target.subrange(data_cursor, 4)?;
229                            let value = i32::from_le_bytes(value.try_into().unwrap());
230                            (data_cursor + 4).wrapping_add_signed(value as isize)
231                        }
232                        JumpType::AbsQWord => {
233                            let value = self.target.subrange(data_cursor, 8)?;
234                            let value = u64::from_le_bytes(value.try_into().unwrap());
235                            self.target.translate_absolute_address(value)?
236                        }
237                    };
238                    atom_cursor += 1;
239                }
240
241                Atom::Read(width) => {
242                    let (value, width) = match width {
243                        ReadWidth::Byte => {
244                            let value = self.target.subrange(data_cursor, 1)?;
245                            (value[0] as u32, 1)
246                        }
247                        ReadWidth::Word => {
248                            let value = self.target.subrange(data_cursor, 2)?;
249                            (u16::from_le_bytes(value.try_into().unwrap()) as u32, 2)
250                        }
251                        ReadWidth::DWord => {
252                            let value = self.target.subrange(data_cursor, 4)?;
253                            (u32::from_le_bytes(value.try_into().unwrap()), 4)
254                        }
255                    };
256                    if !self.save_stack.push_value(value) {
257                        /* TODO: Return error instead of abort search */
258                        return None;
259                    }
260
261                    atom_cursor += 1;
262                    data_cursor += width;
263                }
264
265                Atom::SaveCursor => {
266                    if !self.save_stack.push_value(data_cursor as u32) {
267                        /* TODO: Return error instead of abort search */
268                        return None;
269                    }
270                    atom_cursor += 1;
271                }
272                Atom::SaveConstant(value) => {
273                    if !self.save_stack.push_value(value) {
274                        /* TODO: Return error instead of abort search */
275                        return None;
276                    }
277                    atom_cursor += 1;
278                }
279            }
280        }
281
282        Some(data_cursor)
283    }
284
285    /// Generate a match hint for a proper search based of the first matching bytes
286    /// given by the pattern. This algorithm assumes that thet MatchTarget is in continuous memory.
287    fn next_match_hint(&self) -> MatchHint {
288        let mut fs_buffer = [0u8; 0x10];
289        let mut fs_buffer_len = 0;
290        for atom in self.pattern_atoms {
291            match atom {
292                Atom::ByteSequence { seq_start, seq_end } => {
293                    let seq_start = *seq_start as usize;
294                    let seq_end = *seq_end as usize;
295
296                    let copy_length = (seq_end - seq_start).min(fs_buffer.len() - fs_buffer_len);
297                    fs_buffer[fs_buffer_len..fs_buffer_len + copy_length].copy_from_slice(
298                        &self.pattern_byte_sequence[seq_start..seq_start + copy_length],
299                    );
300                    fs_buffer_len += copy_length;
301                    if fs_buffer_len >= fs_buffer.len() {
302                        /* quick search buffer filled */
303                        break;
304                    }
305                }
306                Atom::CursorPush => continue,
307                Atom::SaveConstant(_) => continue,
308                Atom::SaveCursor => continue,
309                Atom::Read(_) => continue,
310                _ => break,
311            }
312        }
313
314        if fs_buffer_len == 0 {
315            /* can not berform a fuzzy search as we do not start with any binary data */
316            return MatchHint::Unsupported;
317        }
318
319        self.target
320            .match_hint(self.current_offset, &fs_buffer[0..fs_buffer_len])
321    }
322
323    fn next_match_looped(&mut self) -> Option<&[u32]> {
324        while self.current_offset < self.match_length {
325            self.save_stack.truncate(1);
326            self.cursor_stack.truncate(0);
327
328            let match_offset = self.current_offset;
329            self.current_offset += 1;
330
331            if self.match_atoms(match_offset, self.pattern_atoms).is_none() {
332                continue;
333            }
334
335            let save_stack = self.save_stack.stack_mut();
336            save_stack[0] = match_offset as u32;
337            return Some(save_stack);
338        }
339
340        None
341    }
342
343    /// Finds the next match for the associated [BinaryPattern] within the [MatchTarget].
344    ///
345    /// # Returns
346    /// - `Some(&[u32])` containing the saved stack.
347    ///    The first element of the saved stack represents the start of the matched location.  
348    ///    Subsequent elements can be pushed using the `Atom::SaveCursor` atom or the `'` command within the binary pattern.
349    /// - `None` if no further matches are available.
350    pub fn next_match(&mut self) -> Option<&[u32]> {
351        while self.current_offset < self.match_length {
352            match self.next_match_hint() {
353                MatchHint::Unsupported => {
354                    /* fall back to matching against every position */
355                    return self.next_match_looped();
356                }
357                MatchHint::NoMatches => {
358                    /* no more matches */
359                    return None;
360                }
361                MatchHint::MaybeMatch(hint_offset) => {
362                    /* check if the given offset is actually a match */
363                    self.save_stack.truncate(1);
364                    self.cursor_stack.truncate(0);
365
366                    self.current_offset = hint_offset + 1;
367                    if self.match_atoms(hint_offset, self.pattern_atoms).is_none() {
368                        /* Hint did not result in an actual match. Continue. */
369                        continue;
370                    }
371
372                    let save_stack = self.save_stack.stack_mut();
373                    save_stack[0] = hint_offset as u32;
374                    return Some(save_stack);
375                }
376            }
377        }
378
379        None
380    }
381}
382
383#[cfg(test)]
384mod test {
385    use crate::{
386        compiler::parse_pattern,
387        BinaryPattern,
388    };
389
390    const DATA: &[u8] = &[
391        0xCA, 0x70, 0x11, 0xB5, 0xA, 0x9D, 0x91, 0x83, 0xC4, 0x5A, 0xFC, 0xC7, 0x31, 0x26, 0xC3,
392        0x48, 0x3D, 0x6C, 0x16, 0xD7, 0x15, 0x91, 0xDB, 0xC4, 0x21, 0x2, 0x31, 0x4D, 0xE9, 0xD5,
393        0x52, 0xFB, 0xB7, 0x31, 0x91, 0x45, 0x35, 0xC7, 0xDA, 0xA9, 0x77, 0xFC, 0x9C, 0x3E, 0x65,
394        0x19, 0xF2, 0x5A, 0x68, 0x99, 0x21, 0xC, 0xED, 0xDC, 0x21, 0x8C, 0xA2, 0x7B, 0xBA, 0xC0,
395        0x9A, 0x94, 0x99, 0x9B, 0xB2, 0xB7, 0x69, 0x2D, 0x17, 0xA9, 0x85, 0x2C, 0xD7, 0x42, 0x43,
396        0x91, 0xF6, 0x6E, 0x34, 0xBC, 0x2F, 0xF7, 0xAE, 0xAA, 0xAE, 0xBF, 0x4, 0xE5, 0xD5, 0x9B,
397        0x13, 0x60, 0x17, 0x31, 0x87, 0xEF, 0xF1, 0x24, 0x43, 0xB4, 0x60, 0xBC, 0x9F, 0x16, 0x86,
398        0x39, 0x3D, 0x9E, 0x1, 0x68, 0x74, 0x8D, 0xD3, 0xC8, 0x6, 0x25, 0x88, 0xB0, 0x95, 0x99,
399        0xB4, 0x5D, 0xBE, 0x8B, 0xD3, 0x26, 0xCB, 0x3C,
400    ];
401
402    fn test_single(pattern: &str, data: &[u8], result: Option<&[u32]>) {
403        let pattern = parse_pattern(pattern).unwrap();
404        println!("Atoms: {:?}", pattern.atoms());
405
406        let mut matcher = super::execute(&data, &pattern);
407        assert_eq!(matcher.next_match(), result);
408    }
409
410    #[test]
411    fn test_simple() {
412        test_single("B7 69 2D", DATA, Some(&[0x41]));
413        test_single("B7 69 ' 2D", DATA, Some(&[0x41, 0x43]));
414        test_single("' B7 69 ' 2D", DATA, Some(&[0x41, 0x41, 0x43]));
415        test_single("B7 69 3D", DATA, None);
416    }
417
418    #[test]
419    fn test_binary_mask() {
420        test_single("0xB76?2D", DATA, Some(&[0x41]));
421    }
422
423    #[test]
424    fn test_range() {
425        test_single("B7 69 2D [0-3] 85 2C '", DATA, Some(&[0x41, 0x48]));
426        test_single("B7 69 2D [0-1] 85 2C '", DATA, None);
427    }
428
429    #[test]
430    fn test_branch() {
431        test_single("B7 (69 | 70) 2D", DATA, Some(&[0x41]));
432        test_single("B7 (70 | 69) 2D", DATA, Some(&[0x41]));
433
434        /* optional 0x70 */
435        test_single("B7 (70 | ) 69 2D", DATA, Some(&[0x41]));
436    }
437
438    #[test]
439    fn test_jmp() {
440        test_single(
441            "EB % FF",
442            &[0x00, 0xEB, 0x01, 0xEE, 0xFF, 0xEE],
443            Some(&[0x01]),
444        );
445        test_single("EB % EE", &[0x00, 0xEB, 0x01, 0xEE, 0xFF, 0xEE], None);
446        test_single(
447            "EB % EE",
448            &[0x00, 0xEB, 0x00, 0xEE, 0xFF, 0xEE],
449            Some(&[0x01]),
450        );
451
452        test_single(
453            "E9 $ EE",
454            &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xFF],
455            None,
456        );
457
458        test_single(
459            "E9 $ EE '",
460            &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xEE],
461            Some(&[0x01, 0x08]),
462        );
463
464        test_single(
465            "E9 $ { EE FF } '",
466            &[0x00, 0xE9, 0x01, 0x00, 0x00, 0x00, 0xEE, 0xEE, 0xFF],
467            Some(&[0x01, 0x06]),
468        );
469    }
470
471    #[test]
472    fn test_branch_behaviour() {
473        #[rustfmt::skip]
474        test_single(
475            "
476            48 8B 83 r4
477            [14]
478            48 8B 88 r4
479            ([4] | [0])
480            48 89 91 F8 05 00 00
481            ",
482            &[
483                0x00, 
484                0x48, 0x8B, 0x83, 0x02, 0x00, 0x00, 0x00, 
485                0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
486                0x48, 0x8B, 0x88, 0x03, 0x00, 0x00, 0x00, 
487                0x48, 0x89, 0x91, 0xF8, 0x05, 0x00, 0x00,
488            ],
489            Some(&[0x01, 0x02, 0x03]),
490        );
491
492        #[rustfmt::skip]
493        test_single(
494            "
495            48 8B 83 r4
496            [14]
497            48 8B 88 r4
498            ([4] | [0])
499            48 89 91 F8 05 00 00
500            ",
501            &[
502                0x00, 
503                0x48, 0x8B, 0x83, 0x02, 0x00, 0x00, 0x00, 
504                0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
505                0x48, 0x8B, 0x88, 0x03, 0x00, 0x00, 0x00, 
506                0xFF, 0xFF, 0xFF, 0xFF,
507                0x48, 0x89, 0x91, 0xF8, 0x05, 0x00, 0x00,
508            ],
509            Some(&[0x01, 0x02, 0x03]),
510        );
511    }
512}