1use alloc::vec::Vec;
2
3use crate::{
4 pattern::BinaryPattern,
5 Atom,
6 GenericBinaryPattern,
7};
8
9struct Branch {
10 atom_index: usize,
12
13 right_index: usize,
15
16 end_index: usize,
18}
19
20struct Optimizer {
21 atoms: Vec<Atom>,
22 bytes: Vec<u8>,
23
24 branches: Vec<Branch>,
25}
26
27impl Optimizer {
28 pub fn new(pattern: &dyn BinaryPattern) -> Self {
29 let mut branches = Vec::with_capacity(8);
30
31 for (index, atom) in pattern.atoms().iter().enumerate() {
32 let Atom::Branch {
33 left_len,
34 right_len,
35 } = atom
36 else {
37 continue;
38 };
39
40 branches.push(Branch {
41 atom_index: index,
42 right_index: index + 1 + *left_len as usize,
43 end_index: index + 1 + *left_len as usize + *right_len as usize,
44 });
45 }
46
47 Self {
48 atoms: pattern.atoms().to_vec(),
49 bytes: pattern.byte_sequence().to_vec(),
50 branches,
51 }
52 }
53
54 pub fn optimize(mut self) -> GenericBinaryPattern<'static> {
55 while self.atoms.len() > 1 {
56 if self.join_byte_sequences() {
57 continue;
58 }
59
60 if self.join_wildcards_fixed() {
61 continue;
62 }
63
64 if self.join_wildcards_ranged() {
65 continue;
66 }
67
68 if self.join_wildcards() {
69 continue;
70 }
71
72 break;
74 }
75
76 self.fixup_branches();
77
78 GenericBinaryPattern::new(self.atoms, self.bytes)
79 }
80
81 fn fixup_branches(&mut self) {
82 for branch in self.branches.iter() {
83 self.atoms[branch.atom_index] = Atom::Branch {
84 left_len: (branch.right_index - branch.atom_index - 1) as u16,
85 right_len: (branch.end_index - branch.right_index) as u16,
86 };
87 }
88 }
89
90 fn iter_merge_pair(&mut self, merger: impl Fn(&Atom, &Atom) -> Option<Atom>) -> bool {
91 let mut updated = false;
92 let mut index = 0;
93 while index + 1 < self.atoms.len() {
94 let is_across_branch = self
95 .branches
96 .iter()
97 .any(|branch| index + 1 == branch.right_index || index + 1 == branch.end_index);
98 if is_across_branch {
99 index += 1;
100 continue;
101 }
102
103 let left = &self.atoms[index];
104 let right = &self.atoms[index + 1];
105
106 if let Some(new) = merger(left, right) {
107 self.atoms[index] = new;
108 self.atoms.remove(index + 1);
109
110 for branch in self.branches.iter_mut() {
111 if branch.atom_index > index {
112 branch.atom_index -= 1;
113 }
114
115 if branch.right_index > index {
116 branch.right_index -= 1;
117 }
118
119 if branch.end_index > index {
120 branch.end_index -= 1;
121 }
122 }
123
124 updated = true;
125 } else {
126 index += 1;
127 }
128 }
129
130 updated
131 }
132
133 fn join_byte_sequences(&mut self) -> bool {
134 self.iter_merge_pair(|left, right| {
135 let Atom::ByteSequence {
136 seq_start: left_start,
137 seq_end: left_end,
138 } = *left
139 else {
140 return None;
141 };
142
143 let Atom::ByteSequence {
144 seq_start: right_start,
145 seq_end: right_end,
146 } = *right
147 else {
148 return None;
149 };
150
151 if left_end == right_start {
152 Some(Atom::ByteSequence {
153 seq_start: left_start,
154 seq_end: right_end,
155 })
156 } else {
157 None
158 }
159 })
160 }
161
162 fn join_wildcards_fixed(&mut self) -> bool {
163 self.iter_merge_pair(|left, right| {
164 let Atom::WildcardFixed(left_value) = *left else {
165 return None;
166 };
167 let Atom::WildcardFixed(right_value) = *right else {
168 return None;
169 };
170 Some(Atom::WildcardFixed(left_value + right_value))
171 })
172 }
173
174 fn join_wildcards_ranged(&mut self) -> bool {
175 self.iter_merge_pair(|left, right| {
176 let Atom::WildcardRange {
177 min: left_min,
178 max: left_max,
179 } = *left
180 else {
181 return None;
182 };
183
184 let Atom::WildcardRange {
185 min: right_min,
186 max: right_max,
187 } = *right
188 else {
189 return None;
190 };
191
192 Some(Atom::WildcardRange {
193 min: left_min + right_min,
194 max: left_max + right_max,
195 })
196 })
197 }
198
199 fn join_wildcards(&mut self) -> bool {
200 self.iter_merge_pair(|left, right| {
201 if let Atom::WildcardFixed(fixed) = *left {
202 let Atom::WildcardRange { min, max } = *right else {
203 return None;
204 };
205
206 Some(Atom::WildcardRange {
207 min: min + fixed,
208 max: max + fixed,
209 })
210 } else if let Atom::WildcardRange { min, max } = *left {
211 let Atom::WildcardFixed(fixed) = *right else {
212 return None;
213 };
214
215 Some(Atom::WildcardRange {
216 min: min + fixed,
217 max: max + fixed,
218 })
219 } else {
220 None
221 }
222 })
223 }
224}
225
226pub fn optimize_pattern(pattern: &dyn BinaryPattern) -> GenericBinaryPattern<'static> {
233 let optimizer = Optimizer::new(pattern);
234 optimizer.optimize()
235}
236
237#[cfg(test)]
238mod test {
239 use crate::{
240 pattern::BinaryPattern,
241 Atom,
242 GenericBinaryPattern,
243 };
244
245 fn test_optimize(input: &[Atom], expected: &[Atom]) {
246 println!("Testing: {:?}", input);
247 let result = super::optimize_pattern(&GenericBinaryPattern::new(input, &[]));
248 assert_eq!(result.atoms(), expected);
249 println!(" -> success");
250 }
251
252 #[test]
253 fn test_empty() {
254 test_optimize(&[], &[]);
255 test_optimize(&[Atom::WildcardFixed(1)], &[Atom::WildcardFixed(1)]);
256 }
257
258 #[test]
259 fn test_byte_sequence() {
260 test_optimize(
261 &[
262 Atom::ByteSequence {
263 seq_start: 0,
264 seq_end: 1,
265 },
266 Atom::ByteSequence {
267 seq_start: 1,
268 seq_end: 3,
269 },
270 ],
271 &[Atom::ByteSequence {
272 seq_start: 0,
273 seq_end: 3,
274 }],
275 );
276
277 test_optimize(
278 &[
279 Atom::ByteSequence {
280 seq_start: 0,
281 seq_end: 1,
282 },
283 Atom::ByteSequence {
284 seq_start: 2,
285 seq_end: 4,
286 },
287 ],
288 &[
289 Atom::ByteSequence {
290 seq_start: 0,
291 seq_end: 1,
292 },
293 Atom::ByteSequence {
294 seq_start: 2,
295 seq_end: 4,
296 },
297 ],
298 );
299
300 test_optimize(
301 &[
302 Atom::ByteSequence {
303 seq_start: 2,
304 seq_end: 4,
305 },
306 Atom::ByteSequence {
307 seq_start: 4,
308 seq_end: 5,
309 },
310 ],
311 &[Atom::ByteSequence {
312 seq_start: 2,
313 seq_end: 5,
314 }],
315 );
316
317 test_optimize(
318 &[
319 Atom::ByteSequence {
320 seq_start: 2,
321 seq_end: 4,
322 },
323 Atom::ByteSequence {
324 seq_start: 4,
325 seq_end: 5,
326 },
327 Atom::ByteSequence {
328 seq_start: 5,
329 seq_end: 8,
330 },
331 ],
332 &[Atom::ByteSequence {
333 seq_start: 2,
334 seq_end: 8,
335 }],
336 );
337
338 test_optimize(
339 &[
340 Atom::ByteSequence {
341 seq_start: 0,
342 seq_end: 1,
343 },
344 Atom::CursorPush,
345 Atom::ByteSequence {
346 seq_start: 1,
347 seq_end: 3,
348 },
349 ],
350 &[
351 Atom::ByteSequence {
352 seq_start: 0,
353 seq_end: 1,
354 },
355 Atom::CursorPush,
356 Atom::ByteSequence {
357 seq_start: 1,
358 seq_end: 3,
359 },
360 ],
361 );
362 }
363
364 #[test]
365 fn test_wildcard_ranges() {
366 test_optimize(
367 &[
368 Atom::WildcardRange { min: 0, max: 10 },
369 Atom::WildcardRange { min: 5, max: 7 },
370 ],
371 &[Atom::WildcardRange { min: 5, max: 17 }],
372 );
373
374 test_optimize(
375 &[
376 Atom::WildcardRange { min: 0, max: 10 },
377 Atom::WildcardRange { min: 5, max: 7 },
378 Atom::WildcardRange { min: 2, max: 3 },
379 ],
380 &[Atom::WildcardRange { min: 7, max: 20 }],
381 );
382 }
383
384 #[test]
385 fn test_wildcard_fixed() {
386 test_optimize(
387 &[Atom::WildcardFixed(8), Atom::WildcardFixed(2)],
388 &[Atom::WildcardFixed(10)],
389 );
390
391 test_optimize(
392 &[
393 Atom::WildcardFixed(8),
394 Atom::WildcardFixed(2),
395 Atom::WildcardFixed(8),
396 ],
397 &[Atom::WildcardFixed(18)],
398 );
399 }
400
401 #[test]
402 fn test_wildcard_fixed_ranges() {
403 test_optimize(
404 &[
405 Atom::WildcardFixed(8),
406 Atom::WildcardRange { min: 2, max: 10 },
407 ],
408 &[Atom::WildcardRange { min: 10, max: 18 }],
409 );
410 test_optimize(
411 &[
412 Atom::WildcardRange { min: 2, max: 10 },
413 Atom::WildcardFixed(8),
414 ],
415 &[Atom::WildcardRange { min: 10, max: 18 }],
416 );
417
418 test_optimize(
419 &[
420 Atom::WildcardFixed(8),
421 Atom::WildcardRange { min: 2, max: 10 },
422 Atom::WildcardFixed(2),
423 ],
424 &[Atom::WildcardRange { min: 12, max: 20 }],
425 );
426
427 test_optimize(
428 &[
429 Atom::WildcardRange { min: 2, max: 10 },
430 Atom::WildcardFixed(8),
431 Atom::WildcardRange { min: 2, max: 10 },
432 ],
433 &[Atom::WildcardRange { min: 12, max: 28 }],
434 );
435 }
436
437 #[test]
438 fn test_merge_keep_branch() {
439 test_optimize(
440 &[
441 Atom::Branch {
442 left_len: 2,
443 right_len: 1,
444 },
445 Atom::WildcardFixed(8),
446 Atom::WildcardFixed(8),
447 Atom::WildcardFixed(8),
448 ],
449 &[
450 Atom::Branch {
451 left_len: 1,
452 right_len: 1,
453 },
454 Atom::WildcardFixed(16),
455 Atom::WildcardFixed(8),
456 ],
457 );
458
459 test_optimize(
461 &[
462 Atom::Branch {
463 left_len: 2,
464 right_len: 4,
465 },
466 Atom::WildcardFixed(8),
467 Atom::WildcardFixed(8),
468 Atom::Branch {
469 left_len: 2,
470 right_len: 1,
471 },
472 Atom::WildcardFixed(8),
473 Atom::WildcardFixed(8),
474 Atom::WildcardFixed(8),
475 ],
476 &[
477 Atom::Branch {
478 left_len: 1,
479 right_len: 3,
480 },
481 Atom::WildcardFixed(16),
482 Atom::Branch {
483 left_len: 1,
484 right_len: 1,
485 },
486 Atom::WildcardFixed(16),
487 Atom::WildcardFixed(8),
488 ],
489 );
490
491 test_optimize(
493 &[
494 Atom::Branch {
495 left_len: 4,
496 right_len: 2,
497 },
498 Atom::Branch {
500 left_len: 1,
501 right_len: 2,
502 },
503 Atom::WildcardFixed(8),
505 Atom::WildcardFixed(8),
507 Atom::WildcardFixed(8),
508 Atom::WildcardFixed(8),
510 Atom::WildcardFixed(8),
511 ],
512 &[
513 Atom::Branch {
514 left_len: 3,
515 right_len: 1,
516 },
517 Atom::Branch {
518 left_len: 1,
519 right_len: 1,
520 },
521 Atom::WildcardFixed(8),
522 Atom::WildcardFixed(16),
523 Atom::WildcardFixed(16),
524 ],
525 );
526 }
527}