1use std::ops::Range;
2
3use crate::pattern::{Atom, ParsePatError, save_len};
4use memchr::memchr_iter;
5
6pub type Offset = u64;
7const MAX_BACKTRACK_STATES: usize = 1_000_000;
8const PREFIX_BUF_LEN: usize = 16;
9const ANCHOR_MAX_LEN: usize = 4;
10
11#[derive(Copy, Clone, Debug)]
12struct BacktrackState {
13 cursor: Offset,
14 pc: usize,
15 fuzzy: Option<u8>,
16 calls_len: usize,
17 save_log_len: usize,
18}
19
20#[derive(Clone, Debug, Default)]
21struct ExecScratch {
22 work_save: Vec<Offset>,
23 calls: Vec<Offset>,
24 save_log: Vec<(usize, Offset)>,
25 stack: Vec<BacktrackState>,
26}
27
28impl ExecScratch {
29 fn reset_from_save(&mut self, save: &[Offset]) {
30 self.work_save.clear();
31 self.work_save.extend_from_slice(save);
32 }
33
34 fn commit_to_save(&self, save: &mut [Offset]) {
35 debug_assert!(
36 self.work_save.len() >= save.len(),
37 "scratch save buffer must cover caller save length"
38 );
39 save.copy_from_slice(&self.work_save[..save.len()]);
40 }
41}
42
43#[derive(Copy, Clone, Debug)]
44struct PatternPlan {
45 required_slots: usize,
46 linear_exec: bool,
47 anchor: [u8; ANCHOR_MAX_LEN],
48 anchor_len: usize,
49 anchor_offset: u64,
50 anchor_jumps: [u8; 256],
51}
52
53#[derive(Clone, Debug)]
55pub struct PreparedPattern {
56 atoms: Vec<Atom>,
57 required_slots: usize,
58 linear_exec: bool,
59 anchor: [u8; ANCHOR_MAX_LEN],
60 anchor_len: usize,
61 anchor_offset: u64,
62 anchor_jumps: [u8; 256],
63}
64
65impl PreparedPattern {
66 pub fn from_atoms(atoms: Vec<Atom>) -> Self {
68 let plan = analyze_pattern(&atoms);
69 Self {
70 atoms,
71 required_slots: plan.required_slots,
72 linear_exec: plan.linear_exec,
73 anchor: plan.anchor,
74 anchor_len: plan.anchor_len,
75 anchor_offset: plan.anchor_offset,
76 anchor_jumps: plan.anchor_jumps,
77 }
78 }
79
80 pub fn atoms(&self) -> &[Atom] {
82 &self.atoms
83 }
84
85 pub fn required_slots(&self) -> usize {
87 self.required_slots
88 }
89}
90
91#[derive(Clone, Debug, PartialEq, Eq)]
92pub struct CodeSpan {
93 pub mapped: Range<Offset>,
94 pub file: Range<usize>,
95}
96
97pub trait BinaryView {
99 fn image(&self) -> &[u8];
100 fn code_spans(&self) -> &[CodeSpan];
101 fn mapped_to_file_offset(&self, offset: Offset) -> Option<usize>;
102
103 #[inline]
104 fn pointer_size_bytes(&self) -> u8 {
105 8
106 }
107
108 #[inline]
109 fn follow_pointer_target(&self, raw: Offset) -> Option<Offset> {
110 self.mapped_to_file_offset(raw).map(|_| raw)
111 }
112
113 #[inline]
114 fn code_ranges(&self) -> impl Iterator<Item = &Range<Offset>> + '_ {
115 self.code_spans().iter().map(|span| &span.mapped)
116 }
117
118 #[inline]
119 fn is_in_code(&self, mapped: Offset) -> bool {
120 span_index_for_offset(self.code_spans(), mapped).is_some()
121 }
122
123 #[inline]
124 fn read_u8(&self, offset: Offset) -> Option<u8> {
125 self.image()
126 .get(self.mapped_to_file_offset(offset)?)
127 .copied()
128 }
129
130 #[inline]
131 fn read_i16(&self, offset: Offset) -> Option<i16> {
132 Some(i16::from_le_bytes(self.read_array::<2>(offset)?))
133 }
134
135 #[inline]
136 fn read_u16(&self, offset: Offset) -> Option<u16> {
137 Some(u16::from_le_bytes(self.read_array::<2>(offset)?))
138 }
139
140 #[inline]
141 fn read_i32(&self, offset: Offset) -> Option<i32> {
142 Some(i32::from_le_bytes(self.read_array::<4>(offset)?))
143 }
144
145 #[inline]
146 fn read_u32(&self, offset: Offset) -> Option<u32> {
147 Some(u32::from_le_bytes(self.read_array::<4>(offset)?))
148 }
149
150 #[inline]
151 fn read_pointer_raw(&self, offset: Offset) -> Option<Offset> {
152 match self.pointer_size_bytes() {
153 4 => self.read_u32(offset).map(Offset::from),
154 8 => Some(u64::from_le_bytes(self.read_array::<8>(offset)?)),
155 _ => None,
156 }
157 }
158
159 #[inline]
160 fn read_array<const N: usize>(&self, offset: Offset) -> Option<[u8; N]> {
161 let file_offset = self.mapped_to_file_offset(offset)?;
162 let end = file_offset.checked_add(N)?;
163 let bytes = self.image().get(file_offset..end)?;
164 let mut out = [0u8; N];
165 out.copy_from_slice(bytes);
166 Some(out)
167 }
168}
169
170struct ExecReader<'a, B: BinaryView> {
171 view: &'a B,
172 span_index: Option<usize>,
173}
174
175impl<'a, B: BinaryView> ExecReader<'a, B> {
176 fn new(view: &'a B, start: Offset) -> Self {
177 let mut reader = Self {
178 view,
179 span_index: None,
180 };
181 reader.span_index = reader.find_span(start);
182 reader
183 }
184
185 #[inline]
186 fn read_u8(&mut self, offset: Offset) -> Option<u8> {
187 let Some(file_offset) = self.span_file_offset(offset) else {
188 return self.view.read_u8(offset);
189 };
190 self.view
191 .image()
192 .get(file_offset)
193 .copied()
194 .or_else(|| self.view.read_u8(offset))
195 }
196
197 #[inline]
198 fn read_i16(&mut self, offset: Offset) -> Option<i16> {
199 Some(i16::from_le_bytes(self.read_array::<2>(offset)?))
200 }
201
202 #[inline]
203 fn read_u16(&mut self, offset: Offset) -> Option<u16> {
204 Some(u16::from_le_bytes(self.read_array::<2>(offset)?))
205 }
206
207 #[inline]
208 fn read_i32(&mut self, offset: Offset) -> Option<i32> {
209 Some(i32::from_le_bytes(self.read_array::<4>(offset)?))
210 }
211
212 #[inline]
213 fn read_u32(&mut self, offset: Offset) -> Option<u32> {
214 Some(u32::from_le_bytes(self.read_array::<4>(offset)?))
215 }
216
217 #[inline]
218 fn read_pointer_raw(&mut self, offset: Offset, width: u8) -> Option<Offset> {
219 match width {
220 4 => self.read_u32(offset).map(Offset::from),
221 8 => Some(u64::from_le_bytes(self.read_array::<8>(offset)?)),
222 _ => None,
223 }
224 }
225
226 fn read_array<const N: usize>(&mut self, offset: Offset) -> Option<[u8; N]> {
227 if let Some(file_offset) = self.span_file_offset(offset)
228 && let Some(end) = file_offset.checked_add(N)
229 && let Some(bytes) = self.view.image().get(file_offset..end)
230 {
231 let mut out = [0u8; N];
232 out.copy_from_slice(bytes);
233 return Some(out);
234 }
235
236 self.view.read_array::<N>(offset)
237 }
238
239 fn span_file_offset(&mut self, offset: Offset) -> Option<usize> {
240 let index = self.find_span(offset)?;
241 let span = self.view.code_spans().get(index)?;
242 let delta = offset.checked_sub(span.mapped.start)?;
243 let delta_usize = usize::try_from(delta).ok()?;
244 span.file.start.checked_add(delta_usize)
245 }
246
247 fn find_span(&mut self, offset: Offset) -> Option<usize> {
248 if let Some(index) = self.span_index
249 && self
250 .view
251 .code_spans()
252 .get(index)
253 .is_some_and(|span| span.mapped.contains(&offset))
254 {
255 return Some(index);
256 }
257
258 if let Some(index) = self.span_index
259 && let Some(current) = self.view.code_spans().get(index)
260 && offset >= current.mapped.end
261 && let Some(next_index) = index.checked_add(1)
262 && self
263 .view
264 .code_spans()
265 .get(next_index)
266 .is_some_and(|span| span.mapped.contains(&offset))
267 {
268 self.span_index = Some(next_index);
269 return Some(next_index);
270 }
271
272 let index = span_index_for_offset(self.view.code_spans(), offset)?;
273 self.span_index = Some(index);
274 Some(index)
275 }
276}
277
278#[derive(Copy, Clone, Debug)]
279pub struct Scanner<'a, B: BinaryView> {
281 view: &'a B,
282}
283
284impl<'a, B: BinaryView> Scanner<'a, B> {
285 pub fn new(view: &'a B) -> Self {
287 Self { view }
288 }
289
290 pub fn finds_code(&self, pat: &[Atom], save: &mut [Offset]) -> bool {
292 let plan = analyze_pattern(pat);
293 let required_slots = plan.required_slots;
294 debug_assert!(
295 save.len() >= required_slots,
296 "caller-provided save buffer must cover all slots referenced by the pattern"
297 );
298 self.finds_unique_direct(
299 pat,
300 plan.linear_exec,
301 plan.required_slots,
302 plan.anchor,
303 plan.anchor_len,
304 plan.anchor_offset,
305 save,
306 )
307 }
308
309 pub fn required_slots(&self, pat: &[Atom]) -> usize {
317 save_len(pat)
318 }
319
320 pub fn prepare_pattern(&self, pat: &[Atom]) -> PreparedPattern {
322 PreparedPattern::from_atoms(pat.to_vec())
323 }
324
325 pub fn prepare_pattern_str(&self, source: &str) -> Result<PreparedPattern, ParsePatError> {
330 let atoms = crate::pattern::parse(source)?;
331 Ok(PreparedPattern::from_atoms(atoms))
332 }
333
334 pub fn finds_prepared(&self, pat: &PreparedPattern, save: &mut [Offset]) -> bool {
336 debug_assert!(
337 save.len() >= pat.required_slots,
338 "caller-provided save buffer must cover all slots referenced by the prepared pattern"
339 );
340 self.finds_unique_direct(
341 &pat.atoms,
342 pat.linear_exec,
343 pat.required_slots,
344 pat.anchor,
345 pat.anchor_len,
346 pat.anchor_offset,
347 save,
348 )
349 }
350
351 pub fn matches_prepared<'p>(&self, pat: &'p PreparedPattern) -> Matches<'a, 'p, B> {
353 Matches {
354 scanner: Scanner { view: self.view },
355 pat: &pat.atoms,
356 required_slots: pat.required_slots,
357 linear_exec: pat.linear_exec,
358 range_index: 0,
359 cursor: None,
360 anchor: pat.anchor,
361 anchor_len: pat.anchor_len,
362 anchor_offset: pat.anchor_offset,
363 anchor_jumps: pat.anchor_jumps,
364 scratch: ExecScratch::default(),
365 }
366 }
367
368 pub fn matches_code<'p>(&self, pat: &'p [Atom]) -> Matches<'a, 'p, B> {
373 let plan = analyze_pattern(pat);
374 Matches {
375 scanner: Scanner { view: self.view },
376 pat,
377 required_slots: plan.required_slots,
378 linear_exec: plan.linear_exec,
379 range_index: 0,
380 cursor: None,
381 anchor: plan.anchor,
382 anchor_len: plan.anchor_len,
383 anchor_offset: plan.anchor_offset,
384 anchor_jumps: plan.anchor_jumps,
385 scratch: ExecScratch::default(),
386 }
387 }
388
389 fn exec(
390 &self,
391 start: Offset,
392 pat: &[Atom],
393 save: &mut [Offset],
394 linear_exec: bool,
395 scratch: &mut ExecScratch,
396 ) -> bool {
397 if linear_exec {
398 return self.exec_linear(start, pat, save, scratch);
399 }
400 self.exec_backtracking(start, pat, save, scratch)
401 }
402
403 #[allow(clippy::too_many_arguments)]
404 fn finds_unique_direct(
405 &self,
406 pat: &[Atom],
407 linear_exec: bool,
408 required_slots: usize,
409 anchor: [u8; ANCHOR_MAX_LEN],
410 anchor_len: usize,
411 anchor_offset: u64,
412 save: &mut [Offset],
413 ) -> bool {
414 let mut exec_scratch = ExecScratch::default();
415 let mut scratch = vec![0; required_slots];
416 let mut found_once = false;
417
418 for span in self.view.code_spans() {
419 let mut cursor = span.mapped.start;
420 loop {
421 let save_buf: &mut [Offset] = if found_once {
422 &mut scratch
423 } else {
424 &mut save[..required_slots]
425 };
426 let matched = self.find_next_direct_in_span(
427 span,
428 cursor,
429 pat,
430 save_buf,
431 linear_exec,
432 &anchor,
433 anchor_len,
434 anchor_offset,
435 &mut exec_scratch,
436 );
437 let Some(found_at) = matched else {
438 break;
439 };
440
441 if found_once {
442 return false;
443 }
444 found_once = true;
445
446 let Some(next) = found_at.checked_add(1) else {
447 break;
448 };
449 cursor = next;
450 }
451 }
452
453 found_once
454 }
455
456 #[allow(clippy::too_many_arguments)]
457 fn find_next_direct_in_span(
458 &self,
459 span: &CodeSpan,
460 start: Offset,
461 pat: &[Atom],
462 save: &mut [Offset],
463 linear_exec: bool,
464 anchor: &[u8; ANCHOR_MAX_LEN],
465 anchor_len: usize,
466 anchor_offset: u64,
467 scratch: &mut ExecScratch,
468 ) -> Option<Offset> {
469 if start >= span.mapped.end {
470 return None;
471 }
472
473 if anchor_len == 0 {
474 return self.scan_range_linear_direct(
475 span.mapped.clone(),
476 start,
477 pat,
478 save,
479 linear_exec,
480 scratch,
481 );
482 }
483 if anchor_len < 4 {
484 return self.scan_span_first_byte_direct(
485 span,
486 start,
487 pat,
488 save,
489 linear_exec,
490 anchor,
491 anchor_len,
492 anchor_offset,
493 scratch,
494 );
495 }
496 self.scan_span_quick_direct(
497 span,
498 start,
499 pat,
500 save,
501 linear_exec,
502 anchor,
503 anchor_len,
504 anchor_offset,
505 scratch,
506 )
507 }
508
509 fn scan_range_linear_direct(
510 &self,
511 range: Range<Offset>,
512 mut cursor: Offset,
513 pat: &[Atom],
514 save: &mut [Offset],
515 linear_exec: bool,
516 scratch: &mut ExecScratch,
517 ) -> Option<Offset> {
518 while cursor < range.end {
519 if self.exec(cursor, pat, save, linear_exec, scratch) {
520 return Some(cursor);
521 }
522 cursor = cursor.checked_add(1)?;
523 }
524 None
525 }
526
527 #[allow(clippy::too_many_arguments)]
528 fn scan_span_first_byte_direct(
529 &self,
530 span: &CodeSpan,
531 start: Offset,
532 pat: &[Atom],
533 save: &mut [Offset],
534 linear_exec: bool,
535 anchor: &[u8; ANCHOR_MAX_LEN],
536 anchor_len: usize,
537 anchor_offset: u64,
538 scratch: &mut ExecScratch,
539 ) -> Option<Offset> {
540 let Some(bytes) = self.view.image().get(span.file.clone()) else {
541 return self.scan_range_first_byte_direct(
542 span.mapped.clone(),
543 start,
544 pat,
545 save,
546 linear_exec,
547 anchor,
548 anchor_offset,
549 scratch,
550 );
551 };
552 let anchor_start = start.checked_add(anchor_offset)?;
553 let Some(start_file) = mapped_to_file_offset(span, anchor_start) else {
554 return self.scan_range_first_byte_direct(
555 span.mapped.clone(),
556 start,
557 pat,
558 save,
559 linear_exec,
560 anchor,
561 anchor_offset,
562 scratch,
563 );
564 };
565 let Some(start_index) = start_file.checked_sub(span.file.start) else {
566 return self.scan_range_first_byte_direct(
567 span.mapped.clone(),
568 start,
569 pat,
570 save,
571 linear_exec,
572 anchor,
573 anchor_offset,
574 scratch,
575 );
576 };
577 let Some(haystack) = bytes.get(start_index..) else {
578 return self.scan_range_first_byte_direct(
579 span.mapped.clone(),
580 start,
581 pat,
582 save,
583 linear_exec,
584 anchor,
585 anchor_offset,
586 scratch,
587 );
588 };
589
590 let needle = anchor[0];
591 let anchor_window = &anchor[..anchor_len];
592 for delta in memchr_iter(needle, haystack) {
593 if anchor_len > 1
594 && haystack
595 .get(delta..delta + anchor_len)
596 .is_none_or(|window| window != anchor_window)
597 {
598 continue;
599 }
600 let anchor_index = start_index.checked_add(delta)?;
601 let mapped_delta = Offset::try_from(anchor_index).ok()?;
602 let anchor_cursor = span.mapped.start.checked_add(mapped_delta)?;
603 let cursor = anchor_cursor.checked_sub(anchor_offset)?;
604 if self.exec(cursor, pat, save, linear_exec, scratch) {
605 return Some(cursor);
606 }
607 }
608 None
609 }
610
611 #[allow(clippy::too_many_arguments)]
612 fn scan_range_first_byte_direct(
613 &self,
614 range: Range<Offset>,
615 mut cursor: Offset,
616 pat: &[Atom],
617 save: &mut [Offset],
618 linear_exec: bool,
619 anchor: &[u8; ANCHOR_MAX_LEN],
620 anchor_offset: u64,
621 scratch: &mut ExecScratch,
622 ) -> Option<Offset> {
623 let needle = anchor[0];
624 let mut probe = cursor.checked_add(anchor_offset)?;
625 while probe < range.end {
626 if self.view.read_u8(probe) == Some(needle)
627 && self.exec(cursor, pat, save, linear_exec, scratch)
628 {
629 return Some(cursor);
630 }
631 cursor = cursor.checked_add(1)?;
632 probe = probe.checked_add(1)?;
633 }
634 None
635 }
636
637 #[allow(clippy::too_many_arguments)]
638 fn scan_span_quick_direct(
639 &self,
640 span: &CodeSpan,
641 start: Offset,
642 pat: &[Atom],
643 save: &mut [Offset],
644 linear_exec: bool,
645 anchor: &[u8; ANCHOR_MAX_LEN],
646 anchor_len: usize,
647 anchor_offset: u64,
648 scratch: &mut ExecScratch,
649 ) -> Option<Offset> {
650 let Some(bytes) = self.view.image().get(span.file.clone()) else {
651 return self.scan_range_quick_direct(
652 span.mapped.clone(),
653 start,
654 pat,
655 save,
656 linear_exec,
657 anchor,
658 anchor_len,
659 anchor_offset,
660 scratch,
661 );
662 };
663 let anchor_start = start.checked_add(anchor_offset)?;
664 let Some(start_file) = mapped_to_file_offset(span, anchor_start) else {
665 return self.scan_range_quick_direct(
666 span.mapped.clone(),
667 start,
668 pat,
669 save,
670 linear_exec,
671 anchor,
672 anchor_len,
673 anchor_offset,
674 scratch,
675 );
676 };
677 let Some(start_index) = start_file.checked_sub(span.file.start) else {
678 return self.scan_range_quick_direct(
679 span.mapped.clone(),
680 start,
681 pat,
682 save,
683 linear_exec,
684 anchor,
685 anchor_len,
686 anchor_offset,
687 scratch,
688 );
689 };
690 let prefix = &anchor[..anchor_len];
691 let Some(haystack) = bytes.get(start_index..) else {
692 return self.scan_range_quick_direct(
693 span.mapped.clone(),
694 start,
695 pat,
696 save,
697 linear_exec,
698 anchor,
699 anchor_len,
700 anchor_offset,
701 scratch,
702 );
703 };
704 if haystack.len() < anchor_len {
705 return None;
706 }
707
708 let mut jumps = [anchor_len as u8; 256];
709 for (index, byte) in prefix.iter().take(anchor_len.saturating_sub(1)).enumerate() {
710 jumps[usize::from(*byte)] = (anchor_len - index - 1) as u8;
711 }
712
713 let last = prefix[anchor_len - 1];
714 let mut index = 0usize;
715 let max_index = haystack.len() - anchor_len;
716 while index <= max_index {
717 let probe = haystack[index + anchor_len - 1];
718 let jump = usize::from(jumps[usize::from(probe)].max(1));
719 if probe == last
720 && haystack
721 .get(index..index + anchor_len)
722 .is_some_and(|window| window == prefix)
723 {
724 let total_index = start_index.checked_add(index)?;
725 let mapped_delta = Offset::try_from(total_index).ok()?;
726 let cursor = span.mapped.start.checked_add(mapped_delta)?;
727 let start_cursor = cursor.checked_sub(anchor_offset)?;
728 if self.exec(start_cursor, pat, save, linear_exec, scratch) {
729 return Some(start_cursor);
730 }
731 }
732 index = index.checked_add(jump)?;
733 }
734
735 None
736 }
737
738 #[allow(clippy::too_many_arguments)]
739 fn scan_range_quick_direct(
740 &self,
741 range: Range<Offset>,
742 start: Offset,
743 pat: &[Atom],
744 save: &mut [Offset],
745 linear_exec: bool,
746 anchor: &[u8; ANCHOR_MAX_LEN],
747 anchor_len: usize,
748 anchor_offset: u64,
749 scratch: &mut ExecScratch,
750 ) -> Option<Offset> {
751 let prefix = &anchor[..anchor_len];
752 let window = u64::try_from(anchor_len).ok()?;
753 let start = start.checked_add(anchor_offset)?;
754 if start >= range.end {
755 return None;
756 }
757 let total = range.end.checked_sub(start)?;
758 if total < window {
759 return None;
760 }
761
762 let mut jumps = [anchor_len as u8; 256];
763 for (index, byte) in prefix.iter().take(anchor_len.saturating_sub(1)).enumerate() {
764 jumps[usize::from(*byte)] = (anchor_len - index - 1) as u8;
765 }
766
767 let last = prefix[anchor_len - 1];
768 let mut index = 0u64;
769 let max_index = total - window;
770 while index <= max_index {
771 let cursor = start.checked_add(index)?;
772 let probe_at = cursor.checked_add(window - 1)?;
773 let Some(probe) = self.view.read_u8(probe_at) else {
774 index = index.checked_add(1)?;
775 continue;
776 };
777
778 let jump = u64::from(jumps[usize::from(probe)].max(1));
779 if probe == last
780 && prefix_matches_mapped(self.view, cursor, prefix)
781 && self.exec(
782 cursor.checked_sub(anchor_offset)?,
783 pat,
784 save,
785 linear_exec,
786 scratch,
787 )
788 {
789 return cursor.checked_sub(anchor_offset);
790 }
791 index = index.checked_add(jump)?;
792 }
793
794 None
795 }
796
797 fn exec_linear(
798 &self,
799 start: Offset,
800 pat: &[Atom],
801 save: &mut [Offset],
802 scratch: &mut ExecScratch,
803 ) -> bool {
804 scratch.reset_from_save(save);
805 let work_save = &mut scratch.work_save;
806 let mut cursor = start;
807 let mut pc = 0usize;
808 let mut fuzzy = None;
809 let mut reader = ExecReader::new(self.view, cursor);
810
811 loop {
812 let Some(atom) = pat.get(pc) else {
813 scratch.commit_to_save(save);
814 return true;
815 };
816
817 match *atom {
818 Atom::Byte(expected) => {
819 let Some(actual) = reader.read_u8(cursor) else {
820 return false;
821 };
822 let mask = fuzzy.take().unwrap_or(u8::MAX);
823 if (actual & mask) != (expected & mask) {
824 return false;
825 }
826 cursor = match cursor.checked_add(1) {
827 Some(next) => next,
828 None => return false,
829 };
830 pc += 1;
831 }
832 Atom::Fuzzy(mask) => {
833 fuzzy = Some(mask);
834 pc += 1;
835 }
836 Atom::Save(slot) => {
837 if let Some(dst) = work_save.get_mut(usize::from(slot)) {
838 *dst = cursor;
839 }
840 pc += 1;
841 }
842 Atom::Skip(n) => {
843 let skip = if n == 0 {
844 u64::from(self.view.pointer_size_bytes())
845 } else {
846 u64::from(n)
847 };
848 cursor = match cursor.checked_add(skip) {
849 Some(next) => next,
850 None => return false,
851 };
852 pc += 1;
853 }
854 Atom::Jump1 => {
855 let Some(byte) = reader.read_u8(cursor) else {
856 return false;
857 };
858 let base = match cursor.checked_add(1) {
859 Some(next) => next,
860 None => return false,
861 };
862 let delta = i64::from(byte as i8);
863 cursor = if delta >= 0 {
864 match base.checked_add(delta as u64) {
865 Some(next) => next,
866 None => return false,
867 }
868 } else {
869 match base.checked_sub((-delta) as u64) {
870 Some(next) => next,
871 None => return false,
872 }
873 };
874 pc += 1;
875 }
876 Atom::Jump4 => {
877 let Some(disp) = reader.read_i32(cursor) else {
878 return false;
879 };
880 let base = match cursor.checked_add(4) {
881 Some(next) => next,
882 None => return false,
883 };
884 let delta = i64::from(disp);
885 cursor = if delta >= 0 {
886 match base.checked_add(delta as u64) {
887 Some(next) => next,
888 None => return false,
889 }
890 } else {
891 match base.checked_sub((-delta) as u64) {
892 Some(next) => next,
893 None => return false,
894 }
895 };
896 pc += 1;
897 }
898 Atom::Ptr => {
899 let Some(raw) = reader.read_pointer_raw(cursor, self.view.pointer_size_bytes())
900 else {
901 return false;
902 };
903 let Some(next) = self.view.follow_pointer_target(raw) else {
904 return false;
905 };
906 cursor = next;
907 pc += 1;
908 }
909 Atom::Pir(slot) => {
910 let Some(disp) = reader.read_i32(cursor) else {
911 return false;
912 };
913 let base = work_save.get(usize::from(slot)).copied().unwrap_or(cursor);
914 let delta = i64::from(disp);
915 cursor = if delta >= 0 {
916 match base.checked_add(delta as u64) {
917 Some(next) => next,
918 None => return false,
919 }
920 } else {
921 match base.checked_sub((-delta) as u64) {
922 Some(next) => next,
923 None => return false,
924 }
925 };
926 pc += 1;
927 }
928 Atom::ReadI8(slot) => {
929 let Some(value) = reader.read_u8(cursor) else {
930 return false;
931 };
932 if let Some(dst) = work_save.get_mut(usize::from(slot)) {
933 *dst = (value as i8) as i64 as u64;
934 }
935 cursor = match cursor.checked_add(1) {
936 Some(next) => next,
937 None => return false,
938 };
939 pc += 1;
940 }
941 Atom::ReadU8(slot) => {
942 let Some(value) = reader.read_u8(cursor) else {
943 return false;
944 };
945 if let Some(dst) = work_save.get_mut(usize::from(slot)) {
946 *dst = u64::from(value);
947 }
948 cursor = match cursor.checked_add(1) {
949 Some(next) => next,
950 None => return false,
951 };
952 pc += 1;
953 }
954 Atom::ReadI16(slot) => {
955 let Some(value) = reader.read_i16(cursor) else {
956 return false;
957 };
958 if let Some(dst) = work_save.get_mut(usize::from(slot)) {
959 *dst = value as i64 as u64;
960 }
961 cursor = match cursor.checked_add(2) {
962 Some(next) => next,
963 None => return false,
964 };
965 pc += 1;
966 }
967 Atom::ReadU16(slot) => {
968 let Some(value) = reader.read_u16(cursor) else {
969 return false;
970 };
971 if let Some(dst) = work_save.get_mut(usize::from(slot)) {
972 *dst = u64::from(value);
973 }
974 cursor = match cursor.checked_add(2) {
975 Some(next) => next,
976 None => return false,
977 };
978 pc += 1;
979 }
980 Atom::ReadI32(slot) => {
981 let Some(value) = reader.read_i32(cursor) else {
982 return false;
983 };
984 if let Some(dst) = work_save.get_mut(usize::from(slot)) {
985 *dst = value as i64 as u64;
986 }
987 cursor = match cursor.checked_add(4) {
988 Some(next) => next,
989 None => return false,
990 };
991 pc += 1;
992 }
993 Atom::ReadU32(slot) => {
994 let Some(value) = reader.read_u32(cursor) else {
995 return false;
996 };
997 if let Some(dst) = work_save.get_mut(usize::from(slot)) {
998 *dst = u64::from(value);
999 }
1000 cursor = match cursor.checked_add(4) {
1001 Some(next) => next,
1002 None => return false,
1003 };
1004 pc += 1;
1005 }
1006 Atom::Zero(slot) => {
1007 if let Some(dst) = work_save.get_mut(usize::from(slot)) {
1008 *dst = 0;
1009 }
1010 pc += 1;
1011 }
1012 Atom::Back(n) => {
1013 cursor = match cursor.checked_sub(u64::from(n)) {
1014 Some(next) => next,
1015 None => return false,
1016 };
1017 pc += 1;
1018 }
1019 Atom::Aligned(align) => {
1020 let mask = (1u64 << u64::from(align)).wrapping_sub(1);
1021 if cursor & mask != 0 {
1022 return false;
1023 }
1024 pc += 1;
1025 }
1026 Atom::Check(slot) => {
1027 let expected = work_save.get(usize::from(slot)).copied().unwrap_or(0);
1028 if cursor != expected {
1029 return false;
1030 }
1031 pc += 1;
1032 }
1033 Atom::Nop => {
1034 pc += 1;
1035 }
1036 Atom::SkipRange(_, _)
1037 | Atom::Push(_)
1038 | Atom::Pop
1039 | Atom::Case(_)
1040 | Atom::Break(_) => {
1041 debug_assert!(
1042 false,
1043 "linear exec must only run on patterns without backtracking/control-flow atoms"
1044 );
1045 return false;
1046 }
1047 }
1048 }
1049 }
1050
1051 fn exec_backtracking(
1052 &self,
1053 start: Offset,
1054 pat: &[Atom],
1055 save: &mut [Offset],
1056 scratch: &mut ExecScratch,
1057 ) -> bool {
1058 scratch.reset_from_save(save);
1059 let work_save = &mut scratch.work_save;
1060 scratch.calls.clear();
1061 scratch.save_log.clear();
1062 scratch.stack.clear();
1063
1064 scratch.stack.push(BacktrackState {
1065 cursor: start,
1066 pc: 0,
1067 fuzzy: None,
1068 calls_len: 0,
1069 save_log_len: 0,
1070 });
1071
1072 #[inline]
1073 fn rollback(save: &mut [Offset], log: &mut Vec<(usize, Offset)>, target_len: usize) {
1074 while log.len() > target_len {
1075 let (slot, old) = log.pop().expect("save log length checked before pop");
1076 save[slot] = old;
1077 }
1078 }
1079
1080 #[inline]
1081 fn assign_save(
1082 save: &mut [Offset],
1083 log: &mut Vec<(usize, Offset)>,
1084 slot: usize,
1085 value: Offset,
1086 ) {
1087 if let Some(dst) = save.get_mut(slot) {
1088 log.push((slot, *dst));
1089 *dst = value;
1090 }
1091 }
1092
1093 while let Some(state) = scratch.stack.pop() {
1094 scratch.calls.truncate(state.calls_len);
1095 rollback(work_save, &mut scratch.save_log, state.save_log_len);
1096
1097 let mut cursor = state.cursor;
1098 let mut pc = state.pc;
1099 let mut fuzzy = state.fuzzy;
1100 let mut reader = ExecReader::new(self.view, cursor);
1101 loop {
1102 let Some(atom) = pat.get(pc) else {
1103 scratch.commit_to_save(save);
1104 return true;
1105 };
1106
1107 match *atom {
1108 Atom::Byte(expected) => {
1109 let Some(actual) = reader.read_u8(cursor) else {
1110 break;
1111 };
1112 let mask = fuzzy.take().unwrap_or(u8::MAX);
1113 if (actual & mask) != (expected & mask) {
1114 break;
1115 }
1116 let Some(next) = cursor.checked_add(1) else {
1117 break;
1118 };
1119 cursor = next;
1120 pc += 1;
1121 }
1122 Atom::Fuzzy(mask) => {
1123 fuzzy = Some(mask);
1124 pc += 1;
1125 }
1126 Atom::Save(slot) => {
1127 assign_save(work_save, &mut scratch.save_log, usize::from(slot), cursor);
1128 pc += 1;
1129 }
1130 Atom::Skip(n) => {
1131 let skip = if n == 0 {
1132 u64::from(self.view.pointer_size_bytes())
1133 } else {
1134 u64::from(n)
1135 };
1136 let Some(next) = cursor.checked_add(skip) else {
1137 break;
1138 };
1139 cursor = next;
1140 pc += 1;
1141 }
1142 Atom::SkipRange(min, max) => {
1143 debug_assert!(
1144 min <= max,
1145 "pattern parser enforces inclusive skip ranges with min <= max"
1146 );
1147 let min = u64::from(min);
1148 let max = u64::from(max);
1149 for delta in ((min + 1)..=max).rev() {
1150 if let Some(next_cursor) = cursor.checked_add(delta) {
1151 if scratch.stack.len() >= MAX_BACKTRACK_STATES {
1152 debug_assert!(
1153 false,
1154 "scanner backtracking stack must stay below MAX_BACKTRACK_STATES for bounded memory"
1155 );
1156 return false;
1157 }
1158 scratch.stack.push(BacktrackState {
1159 cursor: next_cursor,
1160 pc: pc + 1,
1161 fuzzy,
1162 calls_len: scratch.calls.len(),
1163 save_log_len: scratch.save_log.len(),
1164 });
1165 }
1166 }
1167 let Some(next) = cursor.checked_add(min) else {
1168 break;
1169 };
1170 cursor = next;
1171 pc += 1;
1172 }
1173 Atom::Push(skip) => {
1174 let skip = if skip == 0 {
1175 u64::from(self.view.pointer_size_bytes())
1176 } else {
1177 u64::from(skip)
1178 };
1179 let Some(resume_cursor) = cursor.checked_add(skip) else {
1180 break;
1181 };
1182 scratch.calls.push(resume_cursor);
1183 pc += 1;
1184 }
1185 Atom::Pop => {
1186 let Some(resume_cursor) = scratch.calls.pop() else {
1187 break;
1188 };
1189 cursor = resume_cursor;
1190 pc += 1;
1191 }
1192 Atom::Jump1 => {
1193 let Some(byte) = reader.read_u8(cursor) else {
1194 break;
1195 };
1196 let disp = byte as i8;
1197 let Some(base) = cursor.checked_add(1) else {
1198 break;
1199 };
1200 let delta = i64::from(disp);
1201 if delta >= 0 {
1202 let Some(next) = base.checked_add(delta as u64) else {
1203 break;
1204 };
1205 cursor = next;
1206 } else {
1207 let Some(next) = base.checked_sub((-delta) as u64) else {
1208 break;
1209 };
1210 cursor = next;
1211 }
1212 pc += 1;
1213 }
1214 Atom::Jump4 => {
1215 let Some(disp) = reader.read_i32(cursor) else {
1216 break;
1217 };
1218 let Some(base) = cursor.checked_add(4) else {
1219 break;
1220 };
1221 let delta = i64::from(disp);
1222 if delta >= 0 {
1223 let Some(next) = base.checked_add(delta as u64) else {
1224 break;
1225 };
1226 cursor = next;
1227 } else {
1228 let Some(next) = base.checked_sub((-delta) as u64) else {
1229 break;
1230 };
1231 cursor = next;
1232 }
1233 pc += 1;
1234 }
1235 Atom::Ptr => {
1236 let Some(raw) =
1237 reader.read_pointer_raw(cursor, self.view.pointer_size_bytes())
1238 else {
1239 break;
1240 };
1241 let Some(next) = self.view.follow_pointer_target(raw) else {
1242 break;
1243 };
1244 cursor = next;
1245 pc += 1;
1246 }
1247 Atom::Pir(slot) => {
1248 let Some(disp) = reader.read_i32(cursor) else {
1249 break;
1250 };
1251 let base = work_save.get(usize::from(slot)).copied().unwrap_or(cursor);
1252 let delta = i64::from(disp);
1253 if delta >= 0 {
1254 let Some(next) = base.checked_add(delta as u64) else {
1255 break;
1256 };
1257 cursor = next;
1258 } else {
1259 let Some(next) = base.checked_sub((-delta) as u64) else {
1260 break;
1261 };
1262 cursor = next;
1263 }
1264 pc += 1;
1265 }
1266 Atom::ReadI8(slot) => {
1267 let Some(value) = reader.read_u8(cursor) else {
1268 break;
1269 };
1270 assign_save(
1271 work_save,
1272 &mut scratch.save_log,
1273 usize::from(slot),
1274 (value as i8) as i64 as u64,
1275 );
1276 let Some(next) = cursor.checked_add(1) else {
1277 break;
1278 };
1279 cursor = next;
1280 pc += 1;
1281 }
1282 Atom::ReadU8(slot) => {
1283 let Some(value) = reader.read_u8(cursor) else {
1284 break;
1285 };
1286 assign_save(
1287 work_save,
1288 &mut scratch.save_log,
1289 usize::from(slot),
1290 u64::from(value),
1291 );
1292 let Some(next) = cursor.checked_add(1) else {
1293 break;
1294 };
1295 cursor = next;
1296 pc += 1;
1297 }
1298 Atom::ReadI16(slot) => {
1299 let Some(value) = reader.read_i16(cursor) else {
1300 break;
1301 };
1302 assign_save(
1303 work_save,
1304 &mut scratch.save_log,
1305 usize::from(slot),
1306 value as i64 as u64,
1307 );
1308 let Some(next) = cursor.checked_add(2) else {
1309 break;
1310 };
1311 cursor = next;
1312 pc += 1;
1313 }
1314 Atom::ReadU16(slot) => {
1315 let Some(value) = reader.read_u16(cursor) else {
1316 break;
1317 };
1318 assign_save(
1319 work_save,
1320 &mut scratch.save_log,
1321 usize::from(slot),
1322 u64::from(value),
1323 );
1324 let Some(next) = cursor.checked_add(2) else {
1325 break;
1326 };
1327 cursor = next;
1328 pc += 1;
1329 }
1330 Atom::ReadI32(slot) => {
1331 let Some(value) = reader.read_i32(cursor) else {
1332 break;
1333 };
1334 assign_save(
1335 work_save,
1336 &mut scratch.save_log,
1337 usize::from(slot),
1338 value as i64 as u64,
1339 );
1340 let Some(next) = cursor.checked_add(4) else {
1341 break;
1342 };
1343 cursor = next;
1344 pc += 1;
1345 }
1346 Atom::ReadU32(slot) => {
1347 let Some(value) = reader.read_u32(cursor) else {
1348 break;
1349 };
1350 assign_save(
1351 work_save,
1352 &mut scratch.save_log,
1353 usize::from(slot),
1354 u64::from(value),
1355 );
1356 let Some(next) = cursor.checked_add(4) else {
1357 break;
1358 };
1359 cursor = next;
1360 pc += 1;
1361 }
1362 Atom::Zero(slot) => {
1363 assign_save(work_save, &mut scratch.save_log, usize::from(slot), 0);
1364 pc += 1;
1365 }
1366 Atom::Back(n) => {
1367 let Some(next) = cursor.checked_sub(u64::from(n)) else {
1368 break;
1369 };
1370 cursor = next;
1371 pc += 1;
1372 }
1373 Atom::Aligned(align) => {
1374 let mask = (1u64 << u64::from(align)).wrapping_sub(1);
1375 if cursor & mask != 0 {
1376 break;
1377 }
1378 pc += 1;
1379 }
1380 Atom::Check(slot) => {
1381 let expected = work_save.get(usize::from(slot)).copied().unwrap_or(0);
1382 if cursor != expected {
1383 break;
1384 }
1385 pc += 1;
1386 }
1387 Atom::Case(skip) => {
1388 let Some(next_pc) = pc.checked_add(usize::from(skip)) else {
1389 break;
1390 };
1391 if scratch.stack.len() >= MAX_BACKTRACK_STATES {
1392 debug_assert!(
1393 false,
1394 "scanner backtracking stack must stay below MAX_BACKTRACK_STATES for bounded memory"
1395 );
1396 return false;
1397 }
1398 scratch.stack.push(BacktrackState {
1399 cursor,
1400 pc: next_pc,
1401 fuzzy,
1402 calls_len: scratch.calls.len(),
1403 save_log_len: scratch.save_log.len(),
1404 });
1405 pc += 1;
1406 }
1407 Atom::Break(skip) => {
1408 let Some(next_pc) = pc
1409 .checked_add(usize::from(skip))
1410 .and_then(|value| value.checked_add(1))
1411 else {
1412 break;
1413 };
1414 pc = next_pc;
1415 }
1416 Atom::Nop => {
1417 pc += 1;
1418 }
1419 }
1420 }
1421 }
1422
1423 false
1424 }
1425}
1426
1427#[derive(Clone, Debug)]
1428pub struct Matches<'a, 'p, B: BinaryView> {
1430 scanner: Scanner<'a, B>,
1431 pat: &'p [Atom],
1432 required_slots: usize,
1433 linear_exec: bool,
1434 range_index: usize,
1435 cursor: Option<Offset>,
1436 anchor: [u8; ANCHOR_MAX_LEN],
1437 anchor_len: usize,
1438 anchor_offset: u64,
1439 anchor_jumps: [u8; 256],
1440 scratch: ExecScratch,
1441}
1442
1443impl<'a, 'p, B: BinaryView> Matches<'a, 'p, B> {
1444 pub fn next(&mut self, save: &mut [Offset]) -> bool {
1456 debug_assert!(
1457 save.len() >= self.required_slots,
1458 "caller-provided save buffer must cover all slots referenced by the pattern"
1459 );
1460 let save = &mut save[..self.required_slots];
1461 while let Some(span) = self.scanner.view.code_spans().get(self.range_index) {
1462 let start = self.cursor.unwrap_or(span.mapped.start);
1463 if start >= span.mapped.end {
1464 self.range_index += 1;
1465 self.cursor = None;
1466 continue;
1467 }
1468 let matched_at = if self.anchor_len == 0 {
1469 self.scan_range_linear(span.mapped.clone(), start, save)
1470 } else if self.anchor_len < 4 {
1471 self.scan_span_first_byte(span, start, save)
1472 } else {
1473 self.scan_span_quick(span, start, save)
1474 };
1475
1476 if let Some(cursor) = matched_at {
1477 self.cursor = cursor.checked_add(1);
1478 return true;
1479 }
1480
1481 self.range_index += 1;
1482 self.cursor = None;
1483 }
1484
1485 false
1486 }
1487
1488 fn scan_range_linear(
1489 &mut self,
1490 range: Range<Offset>,
1491 mut cursor: Offset,
1492 save: &mut [Offset],
1493 ) -> Option<Offset> {
1494 while cursor < range.end {
1495 if self
1496 .scanner
1497 .exec(cursor, self.pat, save, self.linear_exec, &mut self.scratch)
1498 {
1499 return Some(cursor);
1500 }
1501 let next = cursor.checked_add(1)?;
1502 cursor = next;
1503 }
1504 None
1505 }
1506
1507 fn scan_span_first_byte(
1508 &mut self,
1509 span: &CodeSpan,
1510 start: Offset,
1511 save: &mut [Offset],
1512 ) -> Option<Offset> {
1513 let Some(bytes) = self.scanner.view.image().get(span.file.clone()) else {
1514 return self.scan_range_first_byte(span.mapped.clone(), start, save);
1515 };
1516 let Some(anchor_start) = start.checked_add(self.anchor_offset) else {
1517 return self.scan_range_first_byte(span.mapped.clone(), start, save);
1518 };
1519 let Some(start_file) = mapped_to_file_offset(span, anchor_start) else {
1520 return self.scan_range_first_byte(span.mapped.clone(), start, save);
1521 };
1522 let Some(start_index) = start_file.checked_sub(span.file.start) else {
1523 return self.scan_range_first_byte(span.mapped.clone(), start, save);
1524 };
1525
1526 debug_assert_eq!(
1527 span.mapped.end.checked_sub(span.mapped.start),
1528 u64::try_from(span.file.end.saturating_sub(span.file.start)).ok(),
1529 "code span mapped/file ranges must have identical lengths"
1530 );
1531
1532 let Some(haystack) = bytes.get(start_index..) else {
1533 return self.scan_range_first_byte(span.mapped.clone(), start, save);
1534 };
1535 let needle = self.anchor[0];
1536 let anchor = &self.anchor[..self.anchor_len];
1537 let anchor_offset = usize::try_from(self.anchor_offset).ok()?;
1538 for delta in memchr_iter(needle, haystack) {
1539 if self.anchor_len > 1
1540 && haystack
1541 .get(delta..delta + self.anchor_len)
1542 .is_none_or(|window| window != anchor)
1543 {
1544 continue;
1545 }
1546 let anchor_index = start_index.checked_add(delta)?;
1547 let Some(start_index_in_span) = anchor_index.checked_sub(anchor_offset) else {
1548 continue;
1549 };
1550 let mapped_delta = Offset::try_from(start_index_in_span).ok()?;
1551 let cursor = span.mapped.start.checked_add(mapped_delta)?;
1552 if self
1553 .scanner
1554 .exec(cursor, self.pat, save, self.linear_exec, &mut self.scratch)
1555 {
1556 return Some(cursor);
1557 }
1558 }
1559 None
1560 }
1561
1562 fn scan_range_first_byte(
1563 &mut self,
1564 range: Range<Offset>,
1565 mut cursor: Offset,
1566 save: &mut [Offset],
1567 ) -> Option<Offset> {
1568 let needle = self.anchor[0];
1569 let mut probe = cursor.checked_add(self.anchor_offset)?;
1570 while probe < range.end {
1571 if self.scanner.view.read_u8(probe) == Some(needle)
1572 && self
1573 .scanner
1574 .exec(cursor, self.pat, save, self.linear_exec, &mut self.scratch)
1575 {
1576 return Some(cursor);
1577 }
1578 cursor = cursor.checked_add(1)?;
1579 probe = probe.checked_add(1)?;
1580 }
1581 None
1582 }
1583
1584 fn scan_range_quick(
1585 &mut self,
1586 range: Range<Offset>,
1587 start: Offset,
1588 save: &mut [Offset],
1589 ) -> Option<Offset> {
1590 let prefix = &self.anchor[..self.anchor_len];
1591 let window = u64::try_from(self.anchor_len).ok()?;
1592 let start = start.checked_add(self.anchor_offset)?;
1593 if start >= range.end {
1594 return None;
1595 }
1596 let total = range.end.checked_sub(start)?;
1597 if total < window {
1598 return None;
1599 }
1600
1601 let last = prefix[self.anchor_len - 1];
1602 let mut index = 0u64;
1603 let max_index = total - window;
1604 while index <= max_index {
1605 let cursor = start.checked_add(index)?;
1606 let probe_at = cursor.checked_add(window - 1)?;
1607 let Some(probe) = self.scanner.view.read_u8(probe_at) else {
1608 index = index.checked_add(1)?;
1609 continue;
1610 };
1611
1612 let jump = u64::from(self.anchor_jumps[usize::from(probe)].max(1));
1613 if probe == last
1614 && self.prefix_matches_mapped(cursor)
1615 && self.scanner.exec(
1616 cursor.checked_sub(self.anchor_offset)?,
1617 self.pat,
1618 save,
1619 self.linear_exec,
1620 &mut self.scratch,
1621 )
1622 {
1623 return cursor.checked_sub(self.anchor_offset);
1624 }
1625 index = index.checked_add(jump)?;
1626 }
1627
1628 None
1629 }
1630
1631 fn scan_span_quick(
1632 &mut self,
1633 span: &CodeSpan,
1634 start: Offset,
1635 save: &mut [Offset],
1636 ) -> Option<Offset> {
1637 let Some(bytes) = self.scanner.view.image().get(span.file.clone()) else {
1638 return self.scan_range_quick(span.mapped.clone(), start, save);
1639 };
1640 let Some(anchor_start) = start.checked_add(self.anchor_offset) else {
1641 return self.scan_range_quick(span.mapped.clone(), start, save);
1642 };
1643 let Some(start_file) = mapped_to_file_offset(span, anchor_start) else {
1644 return self.scan_range_quick(span.mapped.clone(), start, save);
1645 };
1646 let Some(start_index) = start_file.checked_sub(span.file.start) else {
1647 return self.scan_range_quick(span.mapped.clone(), start, save);
1648 };
1649
1650 let prefix = &self.anchor[..self.anchor_len];
1651 let Some(haystack) = bytes.get(start_index..) else {
1652 return self.scan_range_quick(span.mapped.clone(), start, save);
1653 };
1654 if haystack.len() < self.anchor_len {
1655 return None;
1656 }
1657
1658 let last = prefix[self.anchor_len - 1];
1659 let mut index = 0usize;
1660 let max_index = haystack.len() - self.anchor_len;
1661 while index <= max_index {
1662 let probe = haystack[index + self.anchor_len - 1];
1663
1664 let jump = usize::from(self.anchor_jumps[usize::from(probe)].max(1));
1665 if probe == last
1666 && haystack
1667 .get(index..index + self.anchor_len)
1668 .is_some_and(|window| window == prefix)
1669 {
1670 let total_index = start_index.checked_add(index)?;
1671 let mapped_delta = Offset::try_from(total_index).ok()?;
1672 let cursor = span.mapped.start.checked_add(mapped_delta)?;
1673 let start_cursor = cursor.checked_sub(self.anchor_offset)?;
1674 if self.scanner.exec(
1675 start_cursor,
1676 self.pat,
1677 save,
1678 self.linear_exec,
1679 &mut self.scratch,
1680 ) {
1681 return Some(start_cursor);
1682 }
1683 }
1684 index = index.checked_add(jump)?;
1685 }
1686
1687 None
1688 }
1689
1690 fn prefix_matches_mapped(&self, cursor: Offset) -> bool {
1691 for (index, expected) in self.anchor[..self.anchor_len].iter().enumerate() {
1692 let delta = index as u64;
1693 let Some(offset) = cursor.checked_add(delta) else {
1694 return false;
1695 };
1696 if self.scanner.view.read_u8(offset) != Some(*expected) {
1697 return false;
1698 }
1699 }
1700 true
1701 }
1702}
1703
1704fn build_prefix(pat: &[Atom]) -> ([u8; PREFIX_BUF_LEN], usize) {
1705 let mut prefix = [0u8; PREFIX_BUF_LEN];
1706 let mut len = 0usize;
1707 for atom in pat {
1708 match *atom {
1709 Atom::Byte(byte) => {
1710 if len >= PREFIX_BUF_LEN {
1711 break;
1712 }
1713 prefix[len] = byte;
1714 len += 1;
1715 }
1716 Atom::Save(_) | Atom::Aligned(_) | Atom::Nop => {}
1717 _ => break,
1718 }
1719 }
1720 (prefix, len)
1721}
1722
1723fn analyze_pattern(pat: &[Atom]) -> PatternPlan {
1724 let required_slots = save_len(pat);
1725 let linear_exec = is_linear_pattern(pat);
1726 let (prefix, prefix_len) = build_prefix(pat);
1727 let (anchor, anchor_len, anchor_offset) = select_anchor(&prefix, prefix_len);
1728 let anchor_jumps = build_anchor_jumps(&anchor, anchor_len);
1729 PatternPlan {
1730 required_slots,
1731 linear_exec,
1732 anchor,
1733 anchor_len,
1734 anchor_offset,
1735 anchor_jumps,
1736 }
1737}
1738
1739fn build_anchor_jumps(anchor: &[u8; ANCHOR_MAX_LEN], anchor_len: usize) -> [u8; 256] {
1740 let default_jump = anchor_len.max(1) as u8;
1741 let mut jumps = [default_jump; 256];
1742 for (index, byte) in anchor.iter().take(anchor_len.saturating_sub(1)).enumerate() {
1743 jumps[usize::from(*byte)] = (anchor_len - index - 1) as u8;
1744 }
1745 jumps
1746}
1747
1748fn select_anchor(
1755 prefix: &[u8; PREFIX_BUF_LEN],
1756 prefix_len: usize,
1757) -> ([u8; ANCHOR_MAX_LEN], usize, u64) {
1758 let mut anchor = [0u8; ANCHOR_MAX_LEN];
1759 if prefix_len == 0 {
1760 return (anchor, 0, 0);
1761 }
1762
1763 let anchor_len = prefix_len.min(ANCHOR_MAX_LEN);
1764 let mut best_start = 0usize;
1765 let mut best_score = 0u32;
1766 for start in 0..=prefix_len - anchor_len {
1767 let score = anchor_window_score(&prefix[start..start + anchor_len]);
1768 if score > best_score || (score == best_score && start > best_start) {
1769 best_score = score;
1770 best_start = start;
1771 }
1772 }
1773
1774 for (index, byte) in prefix[best_start..best_start + anchor_len]
1775 .iter()
1776 .copied()
1777 .enumerate()
1778 {
1779 anchor[index] = byte;
1780 }
1781 (anchor, anchor_len, best_start as u64)
1782}
1783
1784fn anchor_window_score(window: &[u8]) -> u32 {
1789 let mut seen = [false; 256];
1790 let mut distinct = 0u32;
1791 let mut byte_score = 0u32;
1792 for byte in window.iter().copied() {
1793 let idx = usize::from(byte);
1794 if !seen[idx] {
1795 seen[idx] = true;
1796 distinct += 1;
1797 }
1798 byte_score += anchor_byte_weight(byte);
1799 }
1800
1801 let duplicate_count = window.len() as u32 - distinct;
1802 let last_weight = window.last().copied().map(anchor_byte_weight).unwrap_or(0);
1803 (distinct * 8) + byte_score + (last_weight * 2) - (duplicate_count * 3)
1804}
1805
1806fn anchor_byte_weight(byte: u8) -> u32 {
1811 match byte {
1812 0x00 | 0x48 | 0x8b | 0x89 | 0x90 | 0xcc | 0xe8 | 0xe9 | 0xff => 1,
1813 0x40..=0x4f | 0x50..=0x5f | 0x70..=0x7f => 2,
1814 0x66 | 0x67 => 2,
1815 _ => 4,
1816 }
1817}
1818
1819fn is_linear_pattern(pat: &[Atom]) -> bool {
1820 !pat.iter().any(|atom| {
1821 matches!(
1822 atom,
1823 Atom::SkipRange(_, _) | Atom::Push(_) | Atom::Pop | Atom::Case(_) | Atom::Break(_)
1824 )
1825 })
1826}
1827
1828fn span_index_for_offset(spans: &[CodeSpan], offset: Offset) -> Option<usize> {
1829 let mut low = 0usize;
1830 let mut high = spans.len();
1831 while low < high {
1832 let mid = low + (high - low) / 2;
1833 let span = &spans[mid];
1834 if span.mapped.end <= offset {
1835 low = mid + 1;
1836 } else {
1837 high = mid;
1838 }
1839 }
1840
1841 spans.get(low).and_then(|span| {
1842 if span.mapped.contains(&offset) {
1843 Some(low)
1844 } else {
1845 None
1846 }
1847 })
1848}
1849
1850fn mapped_to_file_offset(span: &CodeSpan, mapped: Offset) -> Option<usize> {
1851 let delta = mapped.checked_sub(span.mapped.start)?;
1852 if mapped >= span.mapped.end {
1853 return None;
1854 }
1855 let delta_usize = usize::try_from(delta).ok()?;
1856 span.file.start.checked_add(delta_usize)
1857}
1858
1859fn prefix_matches_mapped<B: BinaryView>(view: &B, cursor: Offset, prefix: &[u8]) -> bool {
1860 for (index, expected) in prefix.iter().enumerate() {
1861 let Some(offset) = cursor.checked_add(index as u64) else {
1862 return false;
1863 };
1864 if view.read_u8(offset) != Some(*expected) {
1865 return false;
1866 }
1867 }
1868 true
1869}
1870
1871#[cfg(test)]
1872mod tests {
1873 use proptest::prelude::*;
1874
1875 use super::{
1876 BinaryView, CodeSpan, Offset, PreparedPattern, Scanner, build_prefix, is_linear_pattern,
1877 select_anchor, span_index_for_offset,
1878 };
1879 use crate::pattern::Atom;
1880
1881 #[derive(Debug)]
1882 struct TestView {
1883 bytes: Vec<u8>,
1884 spans: Vec<CodeSpan>,
1885 }
1886
1887 impl TestView {
1888 fn new(bytes: &[u8]) -> Self {
1889 let end = bytes.len() as Offset;
1890 Self {
1891 bytes: bytes.to_vec(),
1892 spans: vec![CodeSpan {
1893 mapped: 0..end,
1894 file: 0..bytes.len(),
1895 }],
1896 }
1897 }
1898 }
1899
1900 impl BinaryView for TestView {
1901 fn image(&self) -> &[u8] {
1902 &self.bytes
1903 }
1904
1905 fn code_spans(&self) -> &[CodeSpan] {
1906 &self.spans
1907 }
1908
1909 fn mapped_to_file_offset(&self, offset: Offset) -> Option<usize> {
1910 usize::try_from(offset)
1911 .ok()
1912 .filter(|index| *index < self.bytes.len())
1913 }
1914 }
1915
1916 #[test]
1917 fn skip_range_tries_shorter_distances_first() {
1918 let view = TestView::new(&[0x00, 0x41, 0x41]);
1919 let scanner = Scanner::new(&view);
1920 let pat = [
1921 Atom::Save(0),
1922 Atom::SkipRange(0, 2),
1923 Atom::Save(1),
1924 Atom::Byte(0x41),
1925 ];
1926 let mut matches = scanner.matches_code(&pat);
1927 let mut save = [0u64; 2];
1928
1929 assert!(matches.next(&mut save));
1930 assert_eq!(save[1], 1);
1931 }
1932
1933 #[test]
1934 fn skip_range_backtracks_to_later_distances() {
1935 let view = TestView::new(&[0x00, 0x00, 0x41]);
1936 let scanner = Scanner::new(&view);
1937 let pat = [
1938 Atom::Save(0),
1939 Atom::SkipRange(0, 2),
1940 Atom::Save(1),
1941 Atom::Byte(0x41),
1942 ];
1943 let mut matches = scanner.matches_code(&pat);
1944 let mut save = [0u64; 2];
1945
1946 assert!(matches.next(&mut save));
1947 assert_eq!(save[1], 2);
1948 }
1949
1950 #[test]
1951 fn fuzzy_masks_only_the_next_byte_match() {
1952 let view = TestView::new(&[0xab, 0x0f]);
1953 let scanner = Scanner::new(&view);
1954 let pat = [
1955 Atom::Save(0),
1956 Atom::Fuzzy(0xf0),
1957 Atom::Byte(0xa0),
1958 Atom::Byte(0x0f),
1959 ];
1960 let mut save = [0u64; 1];
1961
1962 assert!(scanner.matches_code(&pat).next(&mut save));
1963 assert_eq!(save[0], 0);
1964 }
1965
1966 #[test]
1967 fn nop_does_not_change_matching_behavior() {
1968 let view = TestView::new(&[0x41]);
1969 let scanner = Scanner::new(&view);
1970 let pat = [Atom::Save(0), Atom::Nop, Atom::Byte(0x41)];
1971 let mut save = [0u64; 1];
1972
1973 assert!(scanner.matches_code(&pat).next(&mut save));
1974 assert_eq!(save[0], 0);
1975 }
1976
1977 #[test]
1978 fn ptr_follows_mapped_target_and_captures_destination() {
1979 let mut bytes = vec![0x68];
1980 bytes.extend_from_slice(&12u64.to_le_bytes());
1981 bytes.extend_from_slice(&[0, 0, 0]);
1982 bytes.extend_from_slice(&[0x31, 0xc0, 0xc3]);
1983 let view = TestView::new(&bytes);
1984 let scanner = Scanner::new(&view);
1985 let pat = [
1986 Atom::Save(0),
1987 Atom::Byte(0x68),
1988 Atom::Ptr,
1989 Atom::Save(1),
1990 Atom::Byte(0x31),
1991 Atom::Byte(0xc0),
1992 Atom::Byte(0xc3),
1993 ];
1994 let mut save = [0u64; 2];
1995
1996 assert!(scanner.matches_code(&pat).next(&mut save));
1997 assert_eq!(save[0], 0);
1998 assert_eq!(save[1], 12);
1999 }
2000
2001 #[test]
2002 fn ptr_fails_when_target_is_not_mapped() {
2003 let mut bytes = vec![0x68];
2004 bytes.extend_from_slice(&1024u64.to_le_bytes());
2005 let view = TestView::new(&bytes);
2006 let scanner = Scanner::new(&view);
2007 let pat = [Atom::Save(0), Atom::Byte(0x68), Atom::Ptr, Atom::Byte(0x90)];
2008 let mut save = [0u64; 1];
2009
2010 assert!(!scanner.matches_code(&pat).next(&mut save));
2011 }
2012
2013 #[test]
2014 fn push_zero_uses_pointer_width_for_resume_cursor() {
2015 let view = TestView::new(&[0xaa, 0, 0, 0, 0, 0, 0, 0, 0x55]);
2016 let scanner = Scanner::new(&view);
2017 let pat = [
2018 Atom::Save(0),
2019 Atom::Push(0),
2020 Atom::Byte(0xaa),
2021 Atom::Pop,
2022 Atom::Save(1),
2023 Atom::Byte(0x55),
2024 ];
2025 let mut save = [0u64; 2];
2026
2027 assert!(scanner.matches_code(&pat).next(&mut save));
2028 assert_eq!(save[1], 8);
2029 }
2030
2031 #[test]
2032 fn pir_follows_saved_base_slot_with_signed_disp32() {
2033 let bytes = [0x90, 0x11, 0x22, 0x33, 0xfc, 0xff, 0xff, 0xff];
2034 let view = TestView::new(&bytes);
2035 let scanner = Scanner::new(&view);
2036 let pat = [
2037 Atom::Save(0),
2038 Atom::Skip(4),
2039 Atom::Save(1),
2040 Atom::Pir(1),
2041 Atom::Byte(0x90),
2042 ];
2043 let mut save = [0u64; 2];
2044
2045 assert!(scanner.matches_code(&pat).next(&mut save));
2046 assert_eq!(save[0], 0);
2047 assert_eq!(save[1], 4);
2048 }
2049
2050 #[test]
2051 fn pir_fails_when_disp32_cannot_be_read() {
2052 let bytes = [0x90, 0x01, 0x02, 0x03];
2053 let view = TestView::new(&bytes);
2054 let scanner = Scanner::new(&view);
2055 let pat = [
2056 Atom::Save(0),
2057 Atom::Byte(0x90),
2058 Atom::Pir(0),
2059 Atom::Byte(0x90),
2060 ];
2061 let mut save = [0u64; 1];
2062
2063 assert!(!scanner.matches_code(&pat).next(&mut save));
2064 }
2065
2066 #[test]
2067 fn finds_code_uses_consistent_save_semantics_for_uniqueness() {
2068 let view = TestView::new(&[0x00, 0xaa, 0xaa]);
2069 let scanner = Scanner::new(&view);
2070 let pat = [
2071 Atom::Save(0),
2072 Atom::Byte(0xaa),
2073 Atom::Save(1),
2074 Atom::Check(1),
2075 ];
2076 let mut save = [0u64; 2];
2077
2078 assert!(!scanner.finds_code(&pat, &mut save));
2079 }
2080
2081 #[test]
2082 fn prepared_pattern_exposes_required_slots() {
2083 let pat = vec![
2084 Atom::Save(0),
2085 Atom::Byte(0xaa),
2086 Atom::Save(2),
2087 Atom::Byte(0xbb),
2088 ];
2089 let prepared = PreparedPattern::from_atoms(pat);
2090 assert_eq!(prepared.required_slots(), 3);
2091 }
2092
2093 #[test]
2094 fn matches_prepared_matches_runtime_behavior() {
2095 let view = TestView::new(&[0x00, 0xaa, 0xbb]);
2096 let scanner = Scanner::new(&view);
2097 let pat = [Atom::Save(0), Atom::Byte(0xaa), Atom::Byte(0xbb)];
2098 let prepared = scanner.prepare_pattern(&pat);
2099
2100 let mut save_runtime = [0u64; 1];
2101 let mut save_prepared = [0u64; 1];
2102 assert!(scanner.matches_code(&pat).next(&mut save_runtime));
2103 assert!(scanner.matches_prepared(&prepared).next(&mut save_prepared));
2104 assert_eq!(save_runtime, save_prepared);
2105 }
2106
2107 #[test]
2108 fn prepare_pattern_str_parses_runtime_text() {
2109 let view = TestView::new(&[0x00, 0xaa, 0xbb]);
2110 let scanner = Scanner::new(&view);
2111 let prepared = scanner
2112 .prepare_pattern_str("AA BB")
2113 .expect("runtime pattern text should parse");
2114
2115 let mut save = vec![0u64; prepared.required_slots()];
2116 assert!(scanner.matches_prepared(&prepared).next(&mut save));
2117 }
2118
2119 #[test]
2120 fn prepare_pattern_str_reports_parse_errors() {
2121 let view = TestView::new(&[]);
2122 let scanner = Scanner::new(&view);
2123 assert!(scanner.prepare_pattern_str("A?").is_err());
2124 }
2125
2126 #[test]
2127 fn quick_prefix_strategy_finds_match_near_range_end() {
2128 let view = TestView::new(&[0x00, 0x11, 0x22, 0x33, 0x44]);
2129 let scanner = Scanner::new(&view);
2130 let pat = [
2131 Atom::Save(0),
2132 Atom::Byte(0x11),
2133 Atom::Byte(0x22),
2134 Atom::Byte(0x33),
2135 Atom::Byte(0x44),
2136 ];
2137 let mut save = [0u64; 1];
2138
2139 assert!(scanner.matches_code(&pat).next(&mut save));
2140 assert_eq!(save[0], 1);
2141 }
2142
2143 #[test]
2144 fn prefix_builder_keeps_optimizable_leading_atoms() {
2145 let (prefix, len) = build_prefix(&[
2146 Atom::Save(0),
2147 Atom::Aligned(0),
2148 Atom::Nop,
2149 Atom::Byte(0xaa),
2150 Atom::Save(1),
2151 Atom::Byte(0xbb),
2152 Atom::Check(1),
2153 Atom::Byte(0xcc),
2154 ]);
2155
2156 assert_eq!(len, 2);
2157 assert_eq!(&prefix[..len], &[0xaa, 0xbb]);
2158 }
2159
2160 #[test]
2161 fn anchor_selection_prefers_stronger_window_over_common_suffix() {
2162 let mut prefix = [0u8; super::PREFIX_BUF_LEN];
2163 let bytes = [0xde, 0xad, 0xbe, 0xef, 0x48, 0x8b, 0x05, 0x48];
2164 for (index, byte) in bytes.iter().copied().enumerate() {
2165 prefix[index] = byte;
2166 }
2167
2168 let (anchor, len, offset) = select_anchor(&prefix, bytes.len());
2169 assert_eq!(len, 4);
2170 assert_eq!(offset, 0);
2171 assert_eq!(&anchor[..len], &[0xde, 0xad, 0xbe, 0xef]);
2172 }
2173
2174 #[test]
2175 fn code_ranges_yield_mapped_ranges_in_order() {
2176 let view = TestView {
2177 bytes: vec![0u8; 16],
2178 spans: vec![
2179 CodeSpan {
2180 mapped: 10..13,
2181 file: 0..3,
2182 },
2183 CodeSpan {
2184 mapped: 30..35,
2185 file: 8..13,
2186 },
2187 ],
2188 };
2189
2190 let ranges = view.code_ranges().cloned().collect::<Vec<_>>();
2191 assert_eq!(ranges, vec![10..13, 30..35]);
2192 }
2193
2194 #[test]
2195 fn is_in_code_detects_hits_and_gaps() {
2196 let view = TestView {
2197 bytes: vec![0u8; 16],
2198 spans: vec![
2199 CodeSpan {
2200 mapped: 5..8,
2201 file: 0..3,
2202 },
2203 CodeSpan {
2204 mapped: 12..15,
2205 file: 8..11,
2206 },
2207 ],
2208 };
2209
2210 assert!(view.is_in_code(5));
2211 assert!(view.is_in_code(7));
2212 assert!(!view.is_in_code(8));
2213 assert!(!view.is_in_code(11));
2214 assert!(view.is_in_code(14));
2215 assert!(!view.is_in_code(15));
2216 }
2217
2218 #[test]
2219 fn linear_exec_selector_rejects_backtracking_atoms() {
2220 assert!(is_linear_pattern(&[
2221 Atom::Save(0),
2222 Atom::Byte(0x48),
2223 Atom::Skip(3)
2224 ]));
2225 assert!(!is_linear_pattern(&[Atom::Save(0), Atom::SkipRange(1, 3)]));
2226 assert!(!is_linear_pattern(&[Atom::Push(1), Atom::Pop]));
2227 assert!(!is_linear_pattern(&[Atom::Case(1), Atom::Break(0)]));
2228 }
2229
2230 #[test]
2231 fn span_index_binary_search_locates_offsets() {
2232 let spans = vec![
2233 CodeSpan {
2234 mapped: 5..8,
2235 file: 0..3,
2236 },
2237 CodeSpan {
2238 mapped: 12..15,
2239 file: 8..11,
2240 },
2241 CodeSpan {
2242 mapped: 30..35,
2243 file: 20..25,
2244 },
2245 ];
2246
2247 assert_eq!(span_index_for_offset(&spans, 5), Some(0));
2248 assert_eq!(span_index_for_offset(&spans, 14), Some(1));
2249 assert_eq!(span_index_for_offset(&spans, 34), Some(2));
2250 assert_eq!(span_index_for_offset(&spans, 8), None);
2251 assert_eq!(span_index_for_offset(&spans, 100), None);
2252 }
2253
2254 proptest! {
2255 #[test]
2256 fn parsed_single_byte_scan_matches_manual_search(
2257 haystack in prop::collection::vec(any::<u8>(), 0..128),
2258 needle in any::<u8>(),
2259 ) {
2260 let source = format!("{needle:02X}");
2261 let atoms = crate::pattern::parse(&source).expect("hex byte pattern should parse");
2262 let required_slots = crate::pattern::save_len(&atoms);
2263
2264 let view = TestView::new(&haystack);
2265 let scanner = Scanner::new(&view);
2266
2267 let expected_first = haystack
2268 .iter()
2269 .position(|byte| *byte == needle)
2270 .map(|index| index as u64);
2271 let expected_unique = haystack.iter().filter(|byte| **byte == needle).count() == 1;
2272
2273 let mut iter_save = vec![0u64; required_slots];
2274 let mut matches = scanner.matches_code(&atoms);
2275 let found = matches.next(&mut iter_save);
2276 prop_assert_eq!(found, expected_first.is_some());
2277 if let Some(expected_start) = expected_first {
2278 prop_assert_eq!(iter_save[0], expected_start);
2279 }
2280
2281 let mut unique_save = vec![0u64; required_slots];
2282 let unique = scanner.finds_code(&atoms, &mut unique_save);
2283 prop_assert_eq!(unique, expected_unique);
2284 if unique {
2285 prop_assert_eq!(Some(unique_save[0]), expected_first);
2286 }
2287 }
2288 }
2289}