bmatcher_core/compiler/
optimizer.rs

1use alloc::vec::Vec;
2
3use crate::{
4    pattern::BinaryPattern,
5    Atom,
6    GenericBinaryPattern,
7};
8
9struct Branch {
10    /// Index of the branch atom
11    atom_index: usize,
12
13    /// Index of the right branch
14    right_index: usize,
15
16    /// Index of when the branch ends
17    end_index: usize,
18}
19
20struct Optimizer {
21    atoms: Vec<Atom>,
22    bytes: Vec<u8>,
23
24    branches: Vec<Branch>,
25}
26
27impl Optimizer {
28    pub fn new(pattern: &dyn BinaryPattern) -> Self {
29        let mut branches = Vec::with_capacity(8);
30
31        for (index, atom) in pattern.atoms().iter().enumerate() {
32            let Atom::Branch {
33                left_len,
34                right_len,
35            } = atom
36            else {
37                continue;
38            };
39
40            branches.push(Branch {
41                atom_index: index,
42                right_index: index + 1 + *left_len as usize,
43                end_index: index + 1 + *left_len as usize + *right_len as usize,
44            });
45        }
46
47        Self {
48            atoms: pattern.atoms().to_vec(),
49            bytes: pattern.byte_sequence().to_vec(),
50            branches,
51        }
52    }
53
54    pub fn optimize(mut self) -> GenericBinaryPattern<'static> {
55        while self.atoms.len() > 1 {
56            if self.join_byte_sequences() {
57                continue;
58            }
59
60            if self.join_wildcards_fixed() {
61                continue;
62            }
63
64            if self.join_wildcards_ranged() {
65                continue;
66            }
67
68            if self.join_wildcards() {
69                continue;
70            }
71
72            /* finished optimizing */
73            break;
74        }
75
76        self.fixup_branches();
77
78        GenericBinaryPattern::new(self.atoms, self.bytes)
79    }
80
81    fn fixup_branches(&mut self) {
82        for branch in self.branches.iter() {
83            self.atoms[branch.atom_index] = Atom::Branch {
84                left_len: (branch.right_index - branch.atom_index - 1) as u16,
85                right_len: (branch.end_index - branch.right_index) as u16,
86            };
87        }
88    }
89
90    fn iter_merge_pair(&mut self, merger: impl Fn(&Atom, &Atom) -> Option<Atom>) -> bool {
91        let mut updated = false;
92        let mut index = 0;
93        while index + 1 < self.atoms.len() {
94            let is_across_branch = self
95                .branches
96                .iter()
97                .any(|branch| index + 1 == branch.right_index || index + 1 == branch.end_index);
98            if is_across_branch {
99                index += 1;
100                continue;
101            }
102
103            let left = &self.atoms[index];
104            let right = &self.atoms[index + 1];
105
106            if let Some(new) = merger(left, right) {
107                self.atoms[index] = new;
108                self.atoms.remove(index + 1);
109
110                for branch in self.branches.iter_mut() {
111                    if branch.atom_index > index {
112                        branch.atom_index -= 1;
113                    }
114
115                    if branch.right_index > index {
116                        branch.right_index -= 1;
117                    }
118
119                    if branch.end_index > index {
120                        branch.end_index -= 1;
121                    }
122                }
123
124                updated = true;
125            } else {
126                index += 1;
127            }
128        }
129
130        updated
131    }
132
133    fn join_byte_sequences(&mut self) -> bool {
134        self.iter_merge_pair(|left, right| {
135            let Atom::ByteSequence {
136                seq_start: left_start,
137                seq_end: left_end,
138            } = *left
139            else {
140                return None;
141            };
142
143            let Atom::ByteSequence {
144                seq_start: right_start,
145                seq_end: right_end,
146            } = *right
147            else {
148                return None;
149            };
150
151            if left_end == right_start {
152                Some(Atom::ByteSequence {
153                    seq_start: left_start,
154                    seq_end: right_end,
155                })
156            } else {
157                None
158            }
159        })
160    }
161
162    fn join_wildcards_fixed(&mut self) -> bool {
163        self.iter_merge_pair(|left, right| {
164            let Atom::WildcardFixed(left_value) = *left else {
165                return None;
166            };
167            let Atom::WildcardFixed(right_value) = *right else {
168                return None;
169            };
170            Some(Atom::WildcardFixed(left_value + right_value))
171        })
172    }
173
174    fn join_wildcards_ranged(&mut self) -> bool {
175        self.iter_merge_pair(|left, right| {
176            let Atom::WildcardRange {
177                min: left_min,
178                max: left_max,
179            } = *left
180            else {
181                return None;
182            };
183
184            let Atom::WildcardRange {
185                min: right_min,
186                max: right_max,
187            } = *right
188            else {
189                return None;
190            };
191
192            Some(Atom::WildcardRange {
193                min: left_min + right_min,
194                max: left_max + right_max,
195            })
196        })
197    }
198
199    fn join_wildcards(&mut self) -> bool {
200        self.iter_merge_pair(|left, right| {
201            if let Atom::WildcardFixed(fixed) = *left {
202                let Atom::WildcardRange { min, max } = *right else {
203                    return None;
204                };
205
206                Some(Atom::WildcardRange {
207                    min: min + fixed,
208                    max: max + fixed,
209                })
210            } else if let Atom::WildcardRange { min, max } = *left {
211                let Atom::WildcardFixed(fixed) = *right else {
212                    return None;
213                };
214
215                Some(Atom::WildcardRange {
216                    min: min + fixed,
217                    max: max + fixed,
218                })
219            } else {
220                None
221            }
222        })
223    }
224}
225
226/// Optimize a [BinaryPattern] to increase matching performance.  
227/// The following optimazations are performed:  
228/// - Join byte sequence matches
229/// - Join ranged wildcards
230/// - Join fixed wildcards
231/// - Join fixed and ranged wildcards
232pub fn optimize_pattern(pattern: &dyn BinaryPattern) -> GenericBinaryPattern<'static> {
233    let optimizer = Optimizer::new(pattern);
234    optimizer.optimize()
235}
236
237#[cfg(test)]
238mod test {
239    use crate::{
240        pattern::BinaryPattern,
241        Atom,
242        GenericBinaryPattern,
243    };
244
245    fn test_optimize(input: &[Atom], expected: &[Atom]) {
246        println!("Testing: {:?}", input);
247        let result = super::optimize_pattern(&GenericBinaryPattern::new(input, &[]));
248        assert_eq!(result.atoms(), expected);
249        println!(" -> success");
250    }
251
252    #[test]
253    fn test_empty() {
254        test_optimize(&[], &[]);
255        test_optimize(&[Atom::WildcardFixed(1)], &[Atom::WildcardFixed(1)]);
256    }
257
258    #[test]
259    fn test_byte_sequence() {
260        test_optimize(
261            &[
262                Atom::ByteSequence {
263                    seq_start: 0,
264                    seq_end: 1,
265                },
266                Atom::ByteSequence {
267                    seq_start: 1,
268                    seq_end: 3,
269                },
270            ],
271            &[Atom::ByteSequence {
272                seq_start: 0,
273                seq_end: 3,
274            }],
275        );
276
277        test_optimize(
278            &[
279                Atom::ByteSequence {
280                    seq_start: 0,
281                    seq_end: 1,
282                },
283                Atom::ByteSequence {
284                    seq_start: 2,
285                    seq_end: 4,
286                },
287            ],
288            &[
289                Atom::ByteSequence {
290                    seq_start: 0,
291                    seq_end: 1,
292                },
293                Atom::ByteSequence {
294                    seq_start: 2,
295                    seq_end: 4,
296                },
297            ],
298        );
299
300        test_optimize(
301            &[
302                Atom::ByteSequence {
303                    seq_start: 2,
304                    seq_end: 4,
305                },
306                Atom::ByteSequence {
307                    seq_start: 4,
308                    seq_end: 5,
309                },
310            ],
311            &[Atom::ByteSequence {
312                seq_start: 2,
313                seq_end: 5,
314            }],
315        );
316
317        test_optimize(
318            &[
319                Atom::ByteSequence {
320                    seq_start: 2,
321                    seq_end: 4,
322                },
323                Atom::ByteSequence {
324                    seq_start: 4,
325                    seq_end: 5,
326                },
327                Atom::ByteSequence {
328                    seq_start: 5,
329                    seq_end: 8,
330                },
331            ],
332            &[Atom::ByteSequence {
333                seq_start: 2,
334                seq_end: 8,
335            }],
336        );
337
338        test_optimize(
339            &[
340                Atom::ByteSequence {
341                    seq_start: 0,
342                    seq_end: 1,
343                },
344                Atom::CursorPush,
345                Atom::ByteSequence {
346                    seq_start: 1,
347                    seq_end: 3,
348                },
349            ],
350            &[
351                Atom::ByteSequence {
352                    seq_start: 0,
353                    seq_end: 1,
354                },
355                Atom::CursorPush,
356                Atom::ByteSequence {
357                    seq_start: 1,
358                    seq_end: 3,
359                },
360            ],
361        );
362    }
363
364    #[test]
365    fn test_wildcard_ranges() {
366        test_optimize(
367            &[
368                Atom::WildcardRange { min: 0, max: 10 },
369                Atom::WildcardRange { min: 5, max: 7 },
370            ],
371            &[Atom::WildcardRange { min: 5, max: 17 }],
372        );
373
374        test_optimize(
375            &[
376                Atom::WildcardRange { min: 0, max: 10 },
377                Atom::WildcardRange { min: 5, max: 7 },
378                Atom::WildcardRange { min: 2, max: 3 },
379            ],
380            &[Atom::WildcardRange { min: 7, max: 20 }],
381        );
382    }
383
384    #[test]
385    fn test_wildcard_fixed() {
386        test_optimize(
387            &[Atom::WildcardFixed(8), Atom::WildcardFixed(2)],
388            &[Atom::WildcardFixed(10)],
389        );
390
391        test_optimize(
392            &[
393                Atom::WildcardFixed(8),
394                Atom::WildcardFixed(2),
395                Atom::WildcardFixed(8),
396            ],
397            &[Atom::WildcardFixed(18)],
398        );
399    }
400
401    #[test]
402    fn test_wildcard_fixed_ranges() {
403        test_optimize(
404            &[
405                Atom::WildcardFixed(8),
406                Atom::WildcardRange { min: 2, max: 10 },
407            ],
408            &[Atom::WildcardRange { min: 10, max: 18 }],
409        );
410        test_optimize(
411            &[
412                Atom::WildcardRange { min: 2, max: 10 },
413                Atom::WildcardFixed(8),
414            ],
415            &[Atom::WildcardRange { min: 10, max: 18 }],
416        );
417
418        test_optimize(
419            &[
420                Atom::WildcardFixed(8),
421                Atom::WildcardRange { min: 2, max: 10 },
422                Atom::WildcardFixed(2),
423            ],
424            &[Atom::WildcardRange { min: 12, max: 20 }],
425        );
426
427        test_optimize(
428            &[
429                Atom::WildcardRange { min: 2, max: 10 },
430                Atom::WildcardFixed(8),
431                Atom::WildcardRange { min: 2, max: 10 },
432            ],
433            &[Atom::WildcardRange { min: 12, max: 28 }],
434        );
435    }
436
437    #[test]
438    fn test_merge_keep_branch() {
439        test_optimize(
440            &[
441                Atom::Branch {
442                    left_len: 2,
443                    right_len: 1,
444                },
445                Atom::WildcardFixed(8),
446                Atom::WildcardFixed(8),
447                Atom::WildcardFixed(8),
448            ],
449            &[
450                Atom::Branch {
451                    left_len: 1,
452                    right_len: 1,
453                },
454                Atom::WildcardFixed(16),
455                Atom::WildcardFixed(8),
456            ],
457        );
458
459        /* nested right branch */
460        test_optimize(
461            &[
462                Atom::Branch {
463                    left_len: 2,
464                    right_len: 4,
465                },
466                Atom::WildcardFixed(8),
467                Atom::WildcardFixed(8),
468                Atom::Branch {
469                    left_len: 2,
470                    right_len: 1,
471                },
472                Atom::WildcardFixed(8),
473                Atom::WildcardFixed(8),
474                Atom::WildcardFixed(8),
475            ],
476            &[
477                Atom::Branch {
478                    left_len: 1,
479                    right_len: 3,
480                },
481                Atom::WildcardFixed(16),
482                Atom::Branch {
483                    left_len: 1,
484                    right_len: 1,
485                },
486                Atom::WildcardFixed(16),
487                Atom::WildcardFixed(8),
488            ],
489        );
490
491        /* nested left branch */
492        test_optimize(
493            &[
494                Atom::Branch {
495                    left_len: 4,
496                    right_len: 2,
497                },
498                // outer left
499                Atom::Branch {
500                    left_len: 1,
501                    right_len: 2,
502                },
503                // inner left
504                Atom::WildcardFixed(8),
505                // inner right
506                Atom::WildcardFixed(8),
507                Atom::WildcardFixed(8),
508                // outer right
509                Atom::WildcardFixed(8),
510                Atom::WildcardFixed(8),
511            ],
512            &[
513                Atom::Branch {
514                    left_len: 3,
515                    right_len: 1,
516                },
517                Atom::Branch {
518                    left_len: 1,
519                    right_len: 1,
520                },
521                Atom::WildcardFixed(8),
522                Atom::WildcardFixed(16),
523                Atom::WildcardFixed(16),
524            ],
525        );
526    }
527}