bmatcher_core/compiler/
optimizer.rs

1use alloc::vec::Vec;
2
3use crate::{
4    pattern::{
5        BinaryPattern,
6        OwnedBinaryPattern,
7    },
8    Atom,
9};
10
11struct Branch {
12    /// Index of the branch atom
13    atom_index: usize,
14
15    /// Index of the right branch
16    right_index: usize,
17
18    /// Index of when the branch ends
19    end_index: usize,
20}
21
22struct Optimizer {
23    atoms: Vec<Atom>,
24    bytes: Vec<u8>,
25
26    branches: Vec<Branch>,
27}
28
29impl Optimizer {
30    pub fn new(pattern: &dyn BinaryPattern) -> Self {
31        let mut branches = Vec::with_capacity(8);
32
33        for (index, atom) in pattern.atoms().iter().enumerate() {
34            let Atom::Branch {
35                left_len,
36                right_len,
37            } = atom
38            else {
39                continue;
40            };
41
42            branches.push(Branch {
43                atom_index: index,
44                right_index: index + 1 + *left_len as usize,
45                end_index: index + 1 + *left_len as usize + *right_len as usize,
46            });
47        }
48
49        Self {
50            atoms: pattern.atoms().to_vec(),
51            bytes: pattern.byte_sequence().to_vec(),
52            branches,
53        }
54    }
55
56    pub fn optimize(mut self) -> OwnedBinaryPattern {
57        while self.atoms.len() > 1 {
58            if self.join_byte_sequences() {
59                continue;
60            }
61
62            if self.join_wildcards_fixed() {
63                continue;
64            }
65
66            if self.join_wildcards_ranged() {
67                continue;
68            }
69
70            if self.join_wildcards() {
71                continue;
72            }
73
74            /* finished optimizing */
75            break;
76        }
77
78        self.fixup_branches();
79
80        OwnedBinaryPattern::new(self.atoms, self.bytes)
81    }
82
83    fn fixup_branches(&mut self) {
84        for branch in self.branches.iter() {
85            self.atoms[branch.atom_index] = Atom::Branch {
86                left_len: (branch.right_index - branch.atom_index - 1) as u16,
87                right_len: (branch.end_index - branch.right_index) as u16,
88            };
89        }
90    }
91
92    fn iter_merge_pair(&mut self, merger: impl Fn(&Atom, &Atom) -> Option<Atom>) -> bool {
93        let mut updated = false;
94        let mut index = 0;
95        while index + 1 < self.atoms.len() {
96            let is_across_branch = self
97                .branches
98                .iter()
99                .any(|branch| index + 1 == branch.right_index || index + 1 == branch.end_index);
100            if is_across_branch {
101                index += 1;
102                continue;
103            }
104
105            let left = &self.atoms[index];
106            let right = &self.atoms[index + 1];
107
108            if let Some(new) = merger(left, right) {
109                self.atoms[index] = new;
110                self.atoms.remove(index + 1);
111
112                for branch in self.branches.iter_mut() {
113                    if branch.atom_index > index {
114                        branch.atom_index -= 1;
115                    }
116
117                    if branch.right_index > index {
118                        branch.right_index -= 1;
119                    }
120
121                    if branch.end_index > index {
122                        branch.end_index -= 1;
123                    }
124                }
125
126                updated = true;
127            } else {
128                index += 1;
129            }
130        }
131
132        updated
133    }
134
135    fn join_byte_sequences(&mut self) -> bool {
136        self.iter_merge_pair(|left, right| {
137            let Atom::ByteSequence {
138                seq_start: left_start,
139                seq_end: left_end,
140            } = *left
141            else {
142                return None;
143            };
144
145            let Atom::ByteSequence {
146                seq_start: right_start,
147                seq_end: right_end,
148            } = *right
149            else {
150                return None;
151            };
152
153            if left_end == right_start {
154                Some(Atom::ByteSequence {
155                    seq_start: left_start,
156                    seq_end: right_end,
157                })
158            } else {
159                None
160            }
161        })
162    }
163
164    fn join_wildcards_fixed(&mut self) -> bool {
165        self.iter_merge_pair(|left, right| {
166            let Atom::WildcardFixed(left_value) = *left else {
167                return None;
168            };
169            let Atom::WildcardFixed(right_value) = *right else {
170                return None;
171            };
172            Some(Atom::WildcardFixed(left_value + right_value))
173        })
174    }
175
176    fn join_wildcards_ranged(&mut self) -> bool {
177        self.iter_merge_pair(|left, right| {
178            let Atom::WildcardRange {
179                min: left_min,
180                max: left_max,
181            } = *left
182            else {
183                return None;
184            };
185
186            let Atom::WildcardRange {
187                min: right_min,
188                max: right_max,
189            } = *right
190            else {
191                return None;
192            };
193
194            Some(Atom::WildcardRange {
195                min: left_min + right_min,
196                max: left_max + right_max,
197            })
198        })
199    }
200
201    fn join_wildcards(&mut self) -> bool {
202        self.iter_merge_pair(|left, right| {
203            if let Atom::WildcardFixed(fixed) = *left {
204                let Atom::WildcardRange { min, max } = *right else {
205                    return None;
206                };
207
208                Some(Atom::WildcardRange {
209                    min: min + fixed,
210                    max: max + fixed,
211                })
212            } else if let Atom::WildcardRange { min, max } = *left {
213                let Atom::WildcardFixed(fixed) = *right else {
214                    return None;
215                };
216
217                Some(Atom::WildcardRange {
218                    min: min + fixed,
219                    max: max + fixed,
220                })
221            } else {
222                None
223            }
224        })
225    }
226}
227
228/// Optimize a [BinaryPattern] to increase matching performance.  
229/// The following optimazations are performed:  
230/// - Join byte sequence matches
231/// - Join ranged wildcards
232/// - Join fixed wildcards
233/// - Join fixed and ranged wildcards
234pub fn optimize_pattern(pattern: &dyn BinaryPattern) -> OwnedBinaryPattern {
235    let optimizer = Optimizer::new(pattern);
236    optimizer.optimize()
237}
238
239#[cfg(test)]
240mod test {
241    use crate::{
242        pattern::{
243            BinaryPattern,
244            OwnedBinaryPattern,
245        },
246        Atom,
247    };
248
249    fn test_optimize(input: &[Atom], expected: &[Atom]) {
250        println!("Testing: {:?}", input);
251        let result = super::optimize_pattern(&OwnedBinaryPattern::new(input.to_vec(), vec![]));
252        assert_eq!(result.atoms(), expected);
253        println!(" -> success");
254    }
255
256    #[test]
257    fn test_empty() {
258        test_optimize(&[], &[]);
259        test_optimize(&[Atom::WildcardFixed(1)], &[Atom::WildcardFixed(1)]);
260    }
261
262    #[test]
263    fn test_byte_sequence() {
264        test_optimize(
265            &[
266                Atom::ByteSequence {
267                    seq_start: 0,
268                    seq_end: 1,
269                },
270                Atom::ByteSequence {
271                    seq_start: 1,
272                    seq_end: 3,
273                },
274            ],
275            &[Atom::ByteSequence {
276                seq_start: 0,
277                seq_end: 3,
278            }],
279        );
280
281        test_optimize(
282            &[
283                Atom::ByteSequence {
284                    seq_start: 0,
285                    seq_end: 1,
286                },
287                Atom::ByteSequence {
288                    seq_start: 2,
289                    seq_end: 4,
290                },
291            ],
292            &[
293                Atom::ByteSequence {
294                    seq_start: 0,
295                    seq_end: 1,
296                },
297                Atom::ByteSequence {
298                    seq_start: 2,
299                    seq_end: 4,
300                },
301            ],
302        );
303
304        test_optimize(
305            &[
306                Atom::ByteSequence {
307                    seq_start: 2,
308                    seq_end: 4,
309                },
310                Atom::ByteSequence {
311                    seq_start: 4,
312                    seq_end: 5,
313                },
314            ],
315            &[Atom::ByteSequence {
316                seq_start: 2,
317                seq_end: 5,
318            }],
319        );
320
321        test_optimize(
322            &[
323                Atom::ByteSequence {
324                    seq_start: 2,
325                    seq_end: 4,
326                },
327                Atom::ByteSequence {
328                    seq_start: 4,
329                    seq_end: 5,
330                },
331                Atom::ByteSequence {
332                    seq_start: 5,
333                    seq_end: 8,
334                },
335            ],
336            &[Atom::ByteSequence {
337                seq_start: 2,
338                seq_end: 8,
339            }],
340        );
341
342        test_optimize(
343            &[
344                Atom::ByteSequence {
345                    seq_start: 0,
346                    seq_end: 1,
347                },
348                Atom::CursorPush,
349                Atom::ByteSequence {
350                    seq_start: 1,
351                    seq_end: 3,
352                },
353            ],
354            &[
355                Atom::ByteSequence {
356                    seq_start: 0,
357                    seq_end: 1,
358                },
359                Atom::CursorPush,
360                Atom::ByteSequence {
361                    seq_start: 1,
362                    seq_end: 3,
363                },
364            ],
365        );
366    }
367
368    #[test]
369    fn test_wildcard_ranges() {
370        test_optimize(
371            &[
372                Atom::WildcardRange { min: 0, max: 10 },
373                Atom::WildcardRange { min: 5, max: 7 },
374            ],
375            &[Atom::WildcardRange { min: 5, max: 17 }],
376        );
377
378        test_optimize(
379            &[
380                Atom::WildcardRange { min: 0, max: 10 },
381                Atom::WildcardRange { min: 5, max: 7 },
382                Atom::WildcardRange { min: 2, max: 3 },
383            ],
384            &[Atom::WildcardRange { min: 7, max: 20 }],
385        );
386    }
387
388    #[test]
389    fn test_wildcard_fixed() {
390        test_optimize(
391            &[Atom::WildcardFixed(8), Atom::WildcardFixed(2)],
392            &[Atom::WildcardFixed(10)],
393        );
394
395        test_optimize(
396            &[
397                Atom::WildcardFixed(8),
398                Atom::WildcardFixed(2),
399                Atom::WildcardFixed(8),
400            ],
401            &[Atom::WildcardFixed(18)],
402        );
403    }
404
405    #[test]
406    fn test_wildcard_fixed_ranges() {
407        test_optimize(
408            &[
409                Atom::WildcardFixed(8),
410                Atom::WildcardRange { min: 2, max: 10 },
411            ],
412            &[Atom::WildcardRange { min: 10, max: 18 }],
413        );
414        test_optimize(
415            &[
416                Atom::WildcardRange { min: 2, max: 10 },
417                Atom::WildcardFixed(8),
418            ],
419            &[Atom::WildcardRange { min: 10, max: 18 }],
420        );
421
422        test_optimize(
423            &[
424                Atom::WildcardFixed(8),
425                Atom::WildcardRange { min: 2, max: 10 },
426                Atom::WildcardFixed(2),
427            ],
428            &[Atom::WildcardRange { min: 12, max: 20 }],
429        );
430
431        test_optimize(
432            &[
433                Atom::WildcardRange { min: 2, max: 10 },
434                Atom::WildcardFixed(8),
435                Atom::WildcardRange { min: 2, max: 10 },
436            ],
437            &[Atom::WildcardRange { min: 12, max: 28 }],
438        );
439    }
440
441    #[test]
442    fn test_merge_keep_branch() {
443        test_optimize(
444            &[
445                Atom::Branch {
446                    left_len: 2,
447                    right_len: 1,
448                },
449                Atom::WildcardFixed(8),
450                Atom::WildcardFixed(8),
451                Atom::WildcardFixed(8),
452            ],
453            &[
454                Atom::Branch {
455                    left_len: 1,
456                    right_len: 1,
457                },
458                Atom::WildcardFixed(16),
459                Atom::WildcardFixed(8),
460            ],
461        );
462
463        /* nested right branch */
464        test_optimize(
465            &[
466                Atom::Branch {
467                    left_len: 2,
468                    right_len: 4,
469                },
470                Atom::WildcardFixed(8),
471                Atom::WildcardFixed(8),
472                Atom::Branch {
473                    left_len: 2,
474                    right_len: 1,
475                },
476                Atom::WildcardFixed(8),
477                Atom::WildcardFixed(8),
478                Atom::WildcardFixed(8),
479            ],
480            &[
481                Atom::Branch {
482                    left_len: 1,
483                    right_len: 3,
484                },
485                Atom::WildcardFixed(16),
486                Atom::Branch {
487                    left_len: 1,
488                    right_len: 1,
489                },
490                Atom::WildcardFixed(16),
491                Atom::WildcardFixed(8),
492            ],
493        );
494
495        /* nested left branch */
496        test_optimize(
497            &[
498                Atom::Branch {
499                    left_len: 4,
500                    right_len: 2,
501                },
502                // outer left
503                Atom::Branch {
504                    left_len: 1,
505                    right_len: 2,
506                },
507                // inner left
508                Atom::WildcardFixed(8),
509                // inner right
510                Atom::WildcardFixed(8),
511                Atom::WildcardFixed(8),
512                // outer right
513                Atom::WildcardFixed(8),
514                Atom::WildcardFixed(8),
515            ],
516            &[
517                Atom::Branch {
518                    left_len: 3,
519                    right_len: 1,
520                },
521                Atom::Branch {
522                    left_len: 1,
523                    right_len: 1,
524                },
525                Atom::WildcardFixed(8),
526                Atom::WildcardFixed(16),
527                Atom::WildcardFixed(16),
528            ],
529        );
530    }
531}