1use alloc::vec::Vec;
2
3use crate::{
4 pattern::{
5 BinaryPattern,
6 OwnedBinaryPattern,
7 },
8 Atom,
9};
10
11struct Branch {
12 atom_index: usize,
14
15 right_index: usize,
17
18 end_index: usize,
20}
21
22struct Optimizer {
23 atoms: Vec<Atom>,
24 bytes: Vec<u8>,
25
26 branches: Vec<Branch>,
27}
28
29impl Optimizer {
30 pub fn new(pattern: &dyn BinaryPattern) -> Self {
31 let mut branches = Vec::with_capacity(8);
32
33 for (index, atom) in pattern.atoms().iter().enumerate() {
34 let Atom::Branch {
35 left_len,
36 right_len,
37 } = atom
38 else {
39 continue;
40 };
41
42 branches.push(Branch {
43 atom_index: index,
44 right_index: index + 1 + *left_len as usize,
45 end_index: index + 1 + *left_len as usize + *right_len as usize,
46 });
47 }
48
49 Self {
50 atoms: pattern.atoms().to_vec(),
51 bytes: pattern.byte_sequence().to_vec(),
52 branches,
53 }
54 }
55
56 pub fn optimize(mut self) -> OwnedBinaryPattern {
57 while self.atoms.len() > 1 {
58 if self.join_byte_sequences() {
59 continue;
60 }
61
62 if self.join_wildcards_fixed() {
63 continue;
64 }
65
66 if self.join_wildcards_ranged() {
67 continue;
68 }
69
70 if self.join_wildcards() {
71 continue;
72 }
73
74 break;
76 }
77
78 self.fixup_branches();
79
80 OwnedBinaryPattern::new(self.atoms, self.bytes)
81 }
82
83 fn fixup_branches(&mut self) {
84 for branch in self.branches.iter() {
85 self.atoms[branch.atom_index] = Atom::Branch {
86 left_len: (branch.right_index - branch.atom_index - 1) as u16,
87 right_len: (branch.end_index - branch.right_index) as u16,
88 };
89 }
90 }
91
92 fn iter_merge_pair(&mut self, merger: impl Fn(&Atom, &Atom) -> Option<Atom>) -> bool {
93 let mut updated = false;
94 let mut index = 0;
95 while index + 1 < self.atoms.len() {
96 let is_across_branch = self
97 .branches
98 .iter()
99 .any(|branch| index + 1 == branch.right_index || index + 1 == branch.end_index);
100 if is_across_branch {
101 index += 1;
102 continue;
103 }
104
105 let left = &self.atoms[index];
106 let right = &self.atoms[index + 1];
107
108 if let Some(new) = merger(left, right) {
109 self.atoms[index] = new;
110 self.atoms.remove(index + 1);
111
112 for branch in self.branches.iter_mut() {
113 if branch.atom_index > index {
114 branch.atom_index -= 1;
115 }
116
117 if branch.right_index > index {
118 branch.right_index -= 1;
119 }
120
121 if branch.end_index > index {
122 branch.end_index -= 1;
123 }
124 }
125
126 updated = true;
127 } else {
128 index += 1;
129 }
130 }
131
132 updated
133 }
134
135 fn join_byte_sequences(&mut self) -> bool {
136 self.iter_merge_pair(|left, right| {
137 let Atom::ByteSequence {
138 seq_start: left_start,
139 seq_end: left_end,
140 } = *left
141 else {
142 return None;
143 };
144
145 let Atom::ByteSequence {
146 seq_start: right_start,
147 seq_end: right_end,
148 } = *right
149 else {
150 return None;
151 };
152
153 if left_end == right_start {
154 Some(Atom::ByteSequence {
155 seq_start: left_start,
156 seq_end: right_end,
157 })
158 } else {
159 None
160 }
161 })
162 }
163
164 fn join_wildcards_fixed(&mut self) -> bool {
165 self.iter_merge_pair(|left, right| {
166 let Atom::WildcardFixed(left_value) = *left else {
167 return None;
168 };
169 let Atom::WildcardFixed(right_value) = *right else {
170 return None;
171 };
172 Some(Atom::WildcardFixed(left_value + right_value))
173 })
174 }
175
176 fn join_wildcards_ranged(&mut self) -> bool {
177 self.iter_merge_pair(|left, right| {
178 let Atom::WildcardRange {
179 min: left_min,
180 max: left_max,
181 } = *left
182 else {
183 return None;
184 };
185
186 let Atom::WildcardRange {
187 min: right_min,
188 max: right_max,
189 } = *right
190 else {
191 return None;
192 };
193
194 Some(Atom::WildcardRange {
195 min: left_min + right_min,
196 max: left_max + right_max,
197 })
198 })
199 }
200
201 fn join_wildcards(&mut self) -> bool {
202 self.iter_merge_pair(|left, right| {
203 if let Atom::WildcardFixed(fixed) = *left {
204 let Atom::WildcardRange { min, max } = *right else {
205 return None;
206 };
207
208 Some(Atom::WildcardRange {
209 min: min + fixed,
210 max: max + fixed,
211 })
212 } else if let Atom::WildcardRange { min, max } = *left {
213 let Atom::WildcardFixed(fixed) = *right else {
214 return None;
215 };
216
217 Some(Atom::WildcardRange {
218 min: min + fixed,
219 max: max + fixed,
220 })
221 } else {
222 None
223 }
224 })
225 }
226}
227
228pub fn optimize_pattern(pattern: &dyn BinaryPattern) -> OwnedBinaryPattern {
235 let optimizer = Optimizer::new(pattern);
236 optimizer.optimize()
237}
238
239#[cfg(test)]
240mod test {
241 use crate::{
242 pattern::{
243 BinaryPattern,
244 OwnedBinaryPattern,
245 },
246 Atom,
247 };
248
249 fn test_optimize(input: &[Atom], expected: &[Atom]) {
250 println!("Testing: {:?}", input);
251 let result = super::optimize_pattern(&OwnedBinaryPattern::new(input.to_vec(), vec![]));
252 assert_eq!(result.atoms(), expected);
253 println!(" -> success");
254 }
255
256 #[test]
257 fn test_empty() {
258 test_optimize(&[], &[]);
259 test_optimize(&[Atom::WildcardFixed(1)], &[Atom::WildcardFixed(1)]);
260 }
261
262 #[test]
263 fn test_byte_sequence() {
264 test_optimize(
265 &[
266 Atom::ByteSequence {
267 seq_start: 0,
268 seq_end: 1,
269 },
270 Atom::ByteSequence {
271 seq_start: 1,
272 seq_end: 3,
273 },
274 ],
275 &[Atom::ByteSequence {
276 seq_start: 0,
277 seq_end: 3,
278 }],
279 );
280
281 test_optimize(
282 &[
283 Atom::ByteSequence {
284 seq_start: 0,
285 seq_end: 1,
286 },
287 Atom::ByteSequence {
288 seq_start: 2,
289 seq_end: 4,
290 },
291 ],
292 &[
293 Atom::ByteSequence {
294 seq_start: 0,
295 seq_end: 1,
296 },
297 Atom::ByteSequence {
298 seq_start: 2,
299 seq_end: 4,
300 },
301 ],
302 );
303
304 test_optimize(
305 &[
306 Atom::ByteSequence {
307 seq_start: 2,
308 seq_end: 4,
309 },
310 Atom::ByteSequence {
311 seq_start: 4,
312 seq_end: 5,
313 },
314 ],
315 &[Atom::ByteSequence {
316 seq_start: 2,
317 seq_end: 5,
318 }],
319 );
320
321 test_optimize(
322 &[
323 Atom::ByteSequence {
324 seq_start: 2,
325 seq_end: 4,
326 },
327 Atom::ByteSequence {
328 seq_start: 4,
329 seq_end: 5,
330 },
331 Atom::ByteSequence {
332 seq_start: 5,
333 seq_end: 8,
334 },
335 ],
336 &[Atom::ByteSequence {
337 seq_start: 2,
338 seq_end: 8,
339 }],
340 );
341
342 test_optimize(
343 &[
344 Atom::ByteSequence {
345 seq_start: 0,
346 seq_end: 1,
347 },
348 Atom::CursorPush,
349 Atom::ByteSequence {
350 seq_start: 1,
351 seq_end: 3,
352 },
353 ],
354 &[
355 Atom::ByteSequence {
356 seq_start: 0,
357 seq_end: 1,
358 },
359 Atom::CursorPush,
360 Atom::ByteSequence {
361 seq_start: 1,
362 seq_end: 3,
363 },
364 ],
365 );
366 }
367
368 #[test]
369 fn test_wildcard_ranges() {
370 test_optimize(
371 &[
372 Atom::WildcardRange { min: 0, max: 10 },
373 Atom::WildcardRange { min: 5, max: 7 },
374 ],
375 &[Atom::WildcardRange { min: 5, max: 17 }],
376 );
377
378 test_optimize(
379 &[
380 Atom::WildcardRange { min: 0, max: 10 },
381 Atom::WildcardRange { min: 5, max: 7 },
382 Atom::WildcardRange { min: 2, max: 3 },
383 ],
384 &[Atom::WildcardRange { min: 7, max: 20 }],
385 );
386 }
387
388 #[test]
389 fn test_wildcard_fixed() {
390 test_optimize(
391 &[Atom::WildcardFixed(8), Atom::WildcardFixed(2)],
392 &[Atom::WildcardFixed(10)],
393 );
394
395 test_optimize(
396 &[
397 Atom::WildcardFixed(8),
398 Atom::WildcardFixed(2),
399 Atom::WildcardFixed(8),
400 ],
401 &[Atom::WildcardFixed(18)],
402 );
403 }
404
405 #[test]
406 fn test_wildcard_fixed_ranges() {
407 test_optimize(
408 &[
409 Atom::WildcardFixed(8),
410 Atom::WildcardRange { min: 2, max: 10 },
411 ],
412 &[Atom::WildcardRange { min: 10, max: 18 }],
413 );
414 test_optimize(
415 &[
416 Atom::WildcardRange { min: 2, max: 10 },
417 Atom::WildcardFixed(8),
418 ],
419 &[Atom::WildcardRange { min: 10, max: 18 }],
420 );
421
422 test_optimize(
423 &[
424 Atom::WildcardFixed(8),
425 Atom::WildcardRange { min: 2, max: 10 },
426 Atom::WildcardFixed(2),
427 ],
428 &[Atom::WildcardRange { min: 12, max: 20 }],
429 );
430
431 test_optimize(
432 &[
433 Atom::WildcardRange { min: 2, max: 10 },
434 Atom::WildcardFixed(8),
435 Atom::WildcardRange { min: 2, max: 10 },
436 ],
437 &[Atom::WildcardRange { min: 12, max: 28 }],
438 );
439 }
440
441 #[test]
442 fn test_merge_keep_branch() {
443 test_optimize(
444 &[
445 Atom::Branch {
446 left_len: 2,
447 right_len: 1,
448 },
449 Atom::WildcardFixed(8),
450 Atom::WildcardFixed(8),
451 Atom::WildcardFixed(8),
452 ],
453 &[
454 Atom::Branch {
455 left_len: 1,
456 right_len: 1,
457 },
458 Atom::WildcardFixed(16),
459 Atom::WildcardFixed(8),
460 ],
461 );
462
463 test_optimize(
465 &[
466 Atom::Branch {
467 left_len: 2,
468 right_len: 4,
469 },
470 Atom::WildcardFixed(8),
471 Atom::WildcardFixed(8),
472 Atom::Branch {
473 left_len: 2,
474 right_len: 1,
475 },
476 Atom::WildcardFixed(8),
477 Atom::WildcardFixed(8),
478 Atom::WildcardFixed(8),
479 ],
480 &[
481 Atom::Branch {
482 left_len: 1,
483 right_len: 3,
484 },
485 Atom::WildcardFixed(16),
486 Atom::Branch {
487 left_len: 1,
488 right_len: 1,
489 },
490 Atom::WildcardFixed(16),
491 Atom::WildcardFixed(8),
492 ],
493 );
494
495 test_optimize(
497 &[
498 Atom::Branch {
499 left_len: 4,
500 right_len: 2,
501 },
502 Atom::Branch {
504 left_len: 1,
505 right_len: 2,
506 },
507 Atom::WildcardFixed(8),
509 Atom::WildcardFixed(8),
511 Atom::WildcardFixed(8),
512 Atom::WildcardFixed(8),
514 Atom::WildcardFixed(8),
515 ],
516 &[
517 Atom::Branch {
518 left_len: 3,
519 right_len: 1,
520 },
521 Atom::Branch {
522 left_len: 1,
523 right_len: 1,
524 },
525 Atom::WildcardFixed(8),
526 Atom::WildcardFixed(16),
527 Atom::WildcardFixed(16),
528 ],
529 );
530 }
531}