Skip to main content

goblin_sigscan/
scan.rs

1use std::ops::Range;
2
3use crate::pattern::{Atom, ParsePatError, save_len};
4use memchr::memchr_iter;
5
6pub type Offset = u64;
7const MAX_BACKTRACK_STATES: usize = 1_000_000;
8const PREFIX_BUF_LEN: usize = 16;
9const ANCHOR_MAX_LEN: usize = 4;
10
11#[derive(Copy, Clone, Debug)]
12struct BacktrackState {
13    cursor: Offset,
14    pc: usize,
15    fuzzy: Option<u8>,
16    calls_len: usize,
17    save_log_len: usize,
18}
19
20#[derive(Clone, Debug, Default)]
21struct ExecScratch {
22    work_save: Vec<Offset>,
23    calls: Vec<Offset>,
24    save_log: Vec<(usize, Offset)>,
25    stack: Vec<BacktrackState>,
26}
27
28impl ExecScratch {
29    fn reset_from_save(&mut self, save: &[Offset]) {
30        self.work_save.clear();
31        self.work_save.extend_from_slice(save);
32    }
33
34    fn commit_to_save(&self, save: &mut [Offset]) {
35        debug_assert!(
36            self.work_save.len() >= save.len(),
37            "scratch save buffer must cover caller save length"
38        );
39        save.copy_from_slice(&self.work_save[..save.len()]);
40    }
41}
42
43#[derive(Copy, Clone, Debug)]
44struct PatternPlan {
45    required_slots: usize,
46    linear_exec: bool,
47    anchor: [u8; ANCHOR_MAX_LEN],
48    anchor_len: usize,
49    anchor_offset: u64,
50    anchor_jumps: [u8; 256],
51}
52
53/// Reusable scanner metadata and atoms for repeated scans.
54#[derive(Clone, Debug)]
55pub struct PreparedPattern {
56    atoms: Vec<Atom>,
57    required_slots: usize,
58    linear_exec: bool,
59    anchor: [u8; ANCHOR_MAX_LEN],
60    anchor_len: usize,
61    anchor_offset: u64,
62    anchor_jumps: [u8; 256],
63}
64
65impl PreparedPattern {
66    /// Builds a prepared pattern from parsed atoms.
67    pub fn from_atoms(atoms: Vec<Atom>) -> Self {
68        let plan = analyze_pattern(&atoms);
69        Self {
70            atoms,
71            required_slots: plan.required_slots,
72            linear_exec: plan.linear_exec,
73            anchor: plan.anchor,
74            anchor_len: plan.anchor_len,
75            anchor_offset: plan.anchor_offset,
76            anchor_jumps: plan.anchor_jumps,
77        }
78    }
79
80    /// Returns the parsed atoms backing this prepared pattern.
81    pub fn atoms(&self) -> &[Atom] {
82        &self.atoms
83    }
84
85    /// Returns the minimum save-slot buffer length required for scanning.
86    pub fn required_slots(&self) -> usize {
87        self.required_slots
88    }
89}
90
91#[derive(Clone, Debug, PartialEq, Eq)]
92pub struct CodeSpan {
93    pub mapped: Range<Offset>,
94    pub file: Range<usize>,
95}
96
97/// Read-only view over a mapped binary image for scanner execution.
98pub trait BinaryView {
99    fn image(&self) -> &[u8];
100    fn code_spans(&self) -> &[CodeSpan];
101    fn mapped_to_file_offset(&self, offset: Offset) -> Option<usize>;
102
103    #[inline]
104    fn pointer_size_bytes(&self) -> u8 {
105        8
106    }
107
108    #[inline]
109    fn follow_pointer_target(&self, raw: Offset) -> Option<Offset> {
110        self.mapped_to_file_offset(raw).map(|_| raw)
111    }
112
113    #[inline]
114    fn code_ranges(&self) -> impl Iterator<Item = &Range<Offset>> + '_ {
115        self.code_spans().iter().map(|span| &span.mapped)
116    }
117
118    #[inline]
119    fn is_in_code(&self, mapped: Offset) -> bool {
120        span_index_for_offset(self.code_spans(), mapped).is_some()
121    }
122
123    #[inline]
124    fn read_u8(&self, offset: Offset) -> Option<u8> {
125        self.image()
126            .get(self.mapped_to_file_offset(offset)?)
127            .copied()
128    }
129
130    #[inline]
131    fn read_i16(&self, offset: Offset) -> Option<i16> {
132        Some(i16::from_le_bytes(self.read_array::<2>(offset)?))
133    }
134
135    #[inline]
136    fn read_u16(&self, offset: Offset) -> Option<u16> {
137        Some(u16::from_le_bytes(self.read_array::<2>(offset)?))
138    }
139
140    #[inline]
141    fn read_i32(&self, offset: Offset) -> Option<i32> {
142        Some(i32::from_le_bytes(self.read_array::<4>(offset)?))
143    }
144
145    #[inline]
146    fn read_u32(&self, offset: Offset) -> Option<u32> {
147        Some(u32::from_le_bytes(self.read_array::<4>(offset)?))
148    }
149
150    #[inline]
151    fn read_pointer_raw(&self, offset: Offset) -> Option<Offset> {
152        match self.pointer_size_bytes() {
153            4 => self.read_u32(offset).map(Offset::from),
154            8 => Some(u64::from_le_bytes(self.read_array::<8>(offset)?)),
155            _ => None,
156        }
157    }
158
159    #[inline]
160    fn read_array<const N: usize>(&self, offset: Offset) -> Option<[u8; N]> {
161        let file_offset = self.mapped_to_file_offset(offset)?;
162        let end = file_offset.checked_add(N)?;
163        let bytes = self.image().get(file_offset..end)?;
164        let mut out = [0u8; N];
165        out.copy_from_slice(bytes);
166        Some(out)
167    }
168}
169
170struct ExecReader<'a, B: BinaryView> {
171    view: &'a B,
172    span_index: Option<usize>,
173}
174
175impl<'a, B: BinaryView> ExecReader<'a, B> {
176    fn new(view: &'a B, start: Offset) -> Self {
177        let mut reader = Self {
178            view,
179            span_index: None,
180        };
181        reader.span_index = reader.find_span(start);
182        reader
183    }
184
185    #[inline]
186    fn read_u8(&mut self, offset: Offset) -> Option<u8> {
187        let Some(file_offset) = self.span_file_offset(offset) else {
188            return self.view.read_u8(offset);
189        };
190        self.view
191            .image()
192            .get(file_offset)
193            .copied()
194            .or_else(|| self.view.read_u8(offset))
195    }
196
197    #[inline]
198    fn read_i16(&mut self, offset: Offset) -> Option<i16> {
199        Some(i16::from_le_bytes(self.read_array::<2>(offset)?))
200    }
201
202    #[inline]
203    fn read_u16(&mut self, offset: Offset) -> Option<u16> {
204        Some(u16::from_le_bytes(self.read_array::<2>(offset)?))
205    }
206
207    #[inline]
208    fn read_i32(&mut self, offset: Offset) -> Option<i32> {
209        Some(i32::from_le_bytes(self.read_array::<4>(offset)?))
210    }
211
212    #[inline]
213    fn read_u32(&mut self, offset: Offset) -> Option<u32> {
214        Some(u32::from_le_bytes(self.read_array::<4>(offset)?))
215    }
216
217    #[inline]
218    fn read_pointer_raw(&mut self, offset: Offset, width: u8) -> Option<Offset> {
219        match width {
220            4 => self.read_u32(offset).map(Offset::from),
221            8 => Some(u64::from_le_bytes(self.read_array::<8>(offset)?)),
222            _ => None,
223        }
224    }
225
226    fn read_array<const N: usize>(&mut self, offset: Offset) -> Option<[u8; N]> {
227        if let Some(file_offset) = self.span_file_offset(offset)
228            && let Some(end) = file_offset.checked_add(N)
229            && let Some(bytes) = self.view.image().get(file_offset..end)
230        {
231            let mut out = [0u8; N];
232            out.copy_from_slice(bytes);
233            return Some(out);
234        }
235
236        self.view.read_array::<N>(offset)
237    }
238
239    fn span_file_offset(&mut self, offset: Offset) -> Option<usize> {
240        let index = self.find_span(offset)?;
241        let span = self.view.code_spans().get(index)?;
242        let delta = offset.checked_sub(span.mapped.start)?;
243        let delta_usize = usize::try_from(delta).ok()?;
244        span.file.start.checked_add(delta_usize)
245    }
246
247    fn find_span(&mut self, offset: Offset) -> Option<usize> {
248        if let Some(index) = self.span_index
249            && self
250                .view
251                .code_spans()
252                .get(index)
253                .is_some_and(|span| span.mapped.contains(&offset))
254        {
255            return Some(index);
256        }
257
258        if let Some(index) = self.span_index
259            && let Some(current) = self.view.code_spans().get(index)
260            && offset >= current.mapped.end
261            && let Some(next_index) = index.checked_add(1)
262            && self
263                .view
264                .code_spans()
265                .get(next_index)
266                .is_some_and(|span| span.mapped.contains(&offset))
267        {
268            self.span_index = Some(next_index);
269            return Some(next_index);
270        }
271
272        let index = span_index_for_offset(self.view.code_spans(), offset)?;
273        self.span_index = Some(index);
274        Some(index)
275    }
276}
277
278#[derive(Copy, Clone, Debug)]
279/// Pattern scanner over a [`BinaryView`].
280pub struct Scanner<'a, B: BinaryView> {
281    view: &'a B,
282}
283
284impl<'a, B: BinaryView> Scanner<'a, B> {
285    /// Creates a scanner for a binary view.
286    pub fn new(view: &'a B) -> Self {
287        Self { view }
288    }
289
290    /// Returns `true` only when the pattern has exactly one code match.
291    pub fn finds_code(&self, pat: &[Atom], save: &mut [Offset]) -> bool {
292        let plan = analyze_pattern(pat);
293        let required_slots = plan.required_slots;
294        debug_assert!(
295            save.len() >= required_slots,
296            "caller-provided save buffer must cover all slots referenced by the pattern"
297        );
298        self.finds_unique_direct(
299            pat,
300            plan.linear_exec,
301            plan.required_slots,
302            plan.anchor,
303            plan.anchor_len,
304            plan.anchor_offset,
305            save,
306        )
307    }
308
309    /// Returns the minimum required save-slot buffer length for `pat`.
310    ///
311    /// Allocate at least this many elements before calling [`Self::finds_code`]
312    /// or [`Self::matches_code`]/[`Matches::next`].
313    ///
314    /// Patterns produced by `pattern::parse` and `pattern!` always require at
315    /// least one slot because they include an implicit `Save(0)` base capture.
316    pub fn required_slots(&self, pat: &[Atom]) -> usize {
317        save_len(pat)
318    }
319
320    /// Prepares reusable scanner metadata for a parsed pattern.
321    pub fn prepare_pattern(&self, pat: &[Atom]) -> PreparedPattern {
322        PreparedPattern::from_atoms(pat.to_vec())
323    }
324
325    /// Parses and prepares a pattern string for scanning.
326    ///
327    /// This is slower than [`Self::prepare_pattern`] because it performs
328    /// runtime text parsing and allocates atom storage on each call.
329    pub fn prepare_pattern_str(&self, source: &str) -> Result<PreparedPattern, ParsePatError> {
330        let atoms = crate::pattern::parse(source)?;
331        Ok(PreparedPattern::from_atoms(atoms))
332    }
333
334    /// Returns `true` only when a prepared pattern has exactly one code match.
335    pub fn finds_prepared(&self, pat: &PreparedPattern, save: &mut [Offset]) -> bool {
336        debug_assert!(
337            save.len() >= pat.required_slots,
338            "caller-provided save buffer must cover all slots referenced by the prepared pattern"
339        );
340        self.finds_unique_direct(
341            &pat.atoms,
342            pat.linear_exec,
343            pat.required_slots,
344            pat.anchor,
345            pat.anchor_len,
346            pat.anchor_offset,
347            save,
348        )
349    }
350
351    /// Returns an iterator-like matcher for a prepared pattern.
352    pub fn matches_prepared<'p>(&self, pat: &'p PreparedPattern) -> Matches<'a, 'p, B> {
353        Matches {
354            scanner: Scanner { view: self.view },
355            pat: &pat.atoms,
356            required_slots: pat.required_slots,
357            linear_exec: pat.linear_exec,
358            range_index: 0,
359            cursor: None,
360            anchor: pat.anchor,
361            anchor_len: pat.anchor_len,
362            anchor_offset: pat.anchor_offset,
363            anchor_jumps: pat.anchor_jumps,
364            scratch: ExecScratch::default(),
365        }
366    }
367
368    /// Returns an iterator-like matcher for all code-range matches.
369    ///
370    /// `save` buffers passed to [`Matches::next`] must be at least
371    /// `self.required_slots(pat)` elements long.
372    pub fn matches_code<'p>(&self, pat: &'p [Atom]) -> Matches<'a, 'p, B> {
373        let plan = analyze_pattern(pat);
374        Matches {
375            scanner: Scanner { view: self.view },
376            pat,
377            required_slots: plan.required_slots,
378            linear_exec: plan.linear_exec,
379            range_index: 0,
380            cursor: None,
381            anchor: plan.anchor,
382            anchor_len: plan.anchor_len,
383            anchor_offset: plan.anchor_offset,
384            anchor_jumps: plan.anchor_jumps,
385            scratch: ExecScratch::default(),
386        }
387    }
388
389    fn exec(
390        &self,
391        start: Offset,
392        pat: &[Atom],
393        save: &mut [Offset],
394        linear_exec: bool,
395        scratch: &mut ExecScratch,
396    ) -> bool {
397        if linear_exec {
398            return self.exec_linear(start, pat, save, scratch);
399        }
400        self.exec_backtracking(start, pat, save, scratch)
401    }
402
403    #[allow(clippy::too_many_arguments)]
404    fn finds_unique_direct(
405        &self,
406        pat: &[Atom],
407        linear_exec: bool,
408        required_slots: usize,
409        anchor: [u8; ANCHOR_MAX_LEN],
410        anchor_len: usize,
411        anchor_offset: u64,
412        save: &mut [Offset],
413    ) -> bool {
414        let mut exec_scratch = ExecScratch::default();
415        let mut scratch = vec![0; required_slots];
416        let mut found_once = false;
417
418        for span in self.view.code_spans() {
419            let mut cursor = span.mapped.start;
420            loop {
421                let save_buf: &mut [Offset] = if found_once {
422                    &mut scratch
423                } else {
424                    &mut save[..required_slots]
425                };
426                let matched = self.find_next_direct_in_span(
427                    span,
428                    cursor,
429                    pat,
430                    save_buf,
431                    linear_exec,
432                    &anchor,
433                    anchor_len,
434                    anchor_offset,
435                    &mut exec_scratch,
436                );
437                let Some(found_at) = matched else {
438                    break;
439                };
440
441                if found_once {
442                    return false;
443                }
444                found_once = true;
445
446                let Some(next) = found_at.checked_add(1) else {
447                    break;
448                };
449                cursor = next;
450            }
451        }
452
453        found_once
454    }
455
456    #[allow(clippy::too_many_arguments)]
457    fn find_next_direct_in_span(
458        &self,
459        span: &CodeSpan,
460        start: Offset,
461        pat: &[Atom],
462        save: &mut [Offset],
463        linear_exec: bool,
464        anchor: &[u8; ANCHOR_MAX_LEN],
465        anchor_len: usize,
466        anchor_offset: u64,
467        scratch: &mut ExecScratch,
468    ) -> Option<Offset> {
469        if start >= span.mapped.end {
470            return None;
471        }
472
473        if anchor_len == 0 {
474            return self.scan_range_linear_direct(
475                span.mapped.clone(),
476                start,
477                pat,
478                save,
479                linear_exec,
480                scratch,
481            );
482        }
483        if anchor_len < 4 {
484            return self.scan_span_first_byte_direct(
485                span,
486                start,
487                pat,
488                save,
489                linear_exec,
490                anchor,
491                anchor_len,
492                anchor_offset,
493                scratch,
494            );
495        }
496        self.scan_span_quick_direct(
497            span,
498            start,
499            pat,
500            save,
501            linear_exec,
502            anchor,
503            anchor_len,
504            anchor_offset,
505            scratch,
506        )
507    }
508
509    fn scan_range_linear_direct(
510        &self,
511        range: Range<Offset>,
512        mut cursor: Offset,
513        pat: &[Atom],
514        save: &mut [Offset],
515        linear_exec: bool,
516        scratch: &mut ExecScratch,
517    ) -> Option<Offset> {
518        while cursor < range.end {
519            if self.exec(cursor, pat, save, linear_exec, scratch) {
520                return Some(cursor);
521            }
522            cursor = cursor.checked_add(1)?;
523        }
524        None
525    }
526
527    #[allow(clippy::too_many_arguments)]
528    fn scan_span_first_byte_direct(
529        &self,
530        span: &CodeSpan,
531        start: Offset,
532        pat: &[Atom],
533        save: &mut [Offset],
534        linear_exec: bool,
535        anchor: &[u8; ANCHOR_MAX_LEN],
536        anchor_len: usize,
537        anchor_offset: u64,
538        scratch: &mut ExecScratch,
539    ) -> Option<Offset> {
540        let Some(bytes) = self.view.image().get(span.file.clone()) else {
541            return self.scan_range_first_byte_direct(
542                span.mapped.clone(),
543                start,
544                pat,
545                save,
546                linear_exec,
547                anchor,
548                anchor_offset,
549                scratch,
550            );
551        };
552        let anchor_start = start.checked_add(anchor_offset)?;
553        let Some(start_file) = mapped_to_file_offset(span, anchor_start) else {
554            return self.scan_range_first_byte_direct(
555                span.mapped.clone(),
556                start,
557                pat,
558                save,
559                linear_exec,
560                anchor,
561                anchor_offset,
562                scratch,
563            );
564        };
565        let Some(start_index) = start_file.checked_sub(span.file.start) else {
566            return self.scan_range_first_byte_direct(
567                span.mapped.clone(),
568                start,
569                pat,
570                save,
571                linear_exec,
572                anchor,
573                anchor_offset,
574                scratch,
575            );
576        };
577        let Some(haystack) = bytes.get(start_index..) else {
578            return self.scan_range_first_byte_direct(
579                span.mapped.clone(),
580                start,
581                pat,
582                save,
583                linear_exec,
584                anchor,
585                anchor_offset,
586                scratch,
587            );
588        };
589
590        let needle = anchor[0];
591        let anchor_window = &anchor[..anchor_len];
592        for delta in memchr_iter(needle, haystack) {
593            if anchor_len > 1
594                && haystack
595                    .get(delta..delta + anchor_len)
596                    .is_none_or(|window| window != anchor_window)
597            {
598                continue;
599            }
600            let anchor_index = start_index.checked_add(delta)?;
601            let mapped_delta = Offset::try_from(anchor_index).ok()?;
602            let anchor_cursor = span.mapped.start.checked_add(mapped_delta)?;
603            let cursor = anchor_cursor.checked_sub(anchor_offset)?;
604            if self.exec(cursor, pat, save, linear_exec, scratch) {
605                return Some(cursor);
606            }
607        }
608        None
609    }
610
611    #[allow(clippy::too_many_arguments)]
612    fn scan_range_first_byte_direct(
613        &self,
614        range: Range<Offset>,
615        mut cursor: Offset,
616        pat: &[Atom],
617        save: &mut [Offset],
618        linear_exec: bool,
619        anchor: &[u8; ANCHOR_MAX_LEN],
620        anchor_offset: u64,
621        scratch: &mut ExecScratch,
622    ) -> Option<Offset> {
623        let needle = anchor[0];
624        let mut probe = cursor.checked_add(anchor_offset)?;
625        while probe < range.end {
626            if self.view.read_u8(probe) == Some(needle)
627                && self.exec(cursor, pat, save, linear_exec, scratch)
628            {
629                return Some(cursor);
630            }
631            cursor = cursor.checked_add(1)?;
632            probe = probe.checked_add(1)?;
633        }
634        None
635    }
636
637    #[allow(clippy::too_many_arguments)]
638    fn scan_span_quick_direct(
639        &self,
640        span: &CodeSpan,
641        start: Offset,
642        pat: &[Atom],
643        save: &mut [Offset],
644        linear_exec: bool,
645        anchor: &[u8; ANCHOR_MAX_LEN],
646        anchor_len: usize,
647        anchor_offset: u64,
648        scratch: &mut ExecScratch,
649    ) -> Option<Offset> {
650        let Some(bytes) = self.view.image().get(span.file.clone()) else {
651            return self.scan_range_quick_direct(
652                span.mapped.clone(),
653                start,
654                pat,
655                save,
656                linear_exec,
657                anchor,
658                anchor_len,
659                anchor_offset,
660                scratch,
661            );
662        };
663        let anchor_start = start.checked_add(anchor_offset)?;
664        let Some(start_file) = mapped_to_file_offset(span, anchor_start) else {
665            return self.scan_range_quick_direct(
666                span.mapped.clone(),
667                start,
668                pat,
669                save,
670                linear_exec,
671                anchor,
672                anchor_len,
673                anchor_offset,
674                scratch,
675            );
676        };
677        let Some(start_index) = start_file.checked_sub(span.file.start) else {
678            return self.scan_range_quick_direct(
679                span.mapped.clone(),
680                start,
681                pat,
682                save,
683                linear_exec,
684                anchor,
685                anchor_len,
686                anchor_offset,
687                scratch,
688            );
689        };
690        let prefix = &anchor[..anchor_len];
691        let Some(haystack) = bytes.get(start_index..) else {
692            return self.scan_range_quick_direct(
693                span.mapped.clone(),
694                start,
695                pat,
696                save,
697                linear_exec,
698                anchor,
699                anchor_len,
700                anchor_offset,
701                scratch,
702            );
703        };
704        if haystack.len() < anchor_len {
705            return None;
706        }
707
708        let mut jumps = [anchor_len as u8; 256];
709        for (index, byte) in prefix.iter().take(anchor_len.saturating_sub(1)).enumerate() {
710            jumps[usize::from(*byte)] = (anchor_len - index - 1) as u8;
711        }
712
713        let last = prefix[anchor_len - 1];
714        let mut index = 0usize;
715        let max_index = haystack.len() - anchor_len;
716        while index <= max_index {
717            let probe = haystack[index + anchor_len - 1];
718            let jump = usize::from(jumps[usize::from(probe)].max(1));
719            if probe == last
720                && haystack
721                    .get(index..index + anchor_len)
722                    .is_some_and(|window| window == prefix)
723            {
724                let total_index = start_index.checked_add(index)?;
725                let mapped_delta = Offset::try_from(total_index).ok()?;
726                let cursor = span.mapped.start.checked_add(mapped_delta)?;
727                let start_cursor = cursor.checked_sub(anchor_offset)?;
728                if self.exec(start_cursor, pat, save, linear_exec, scratch) {
729                    return Some(start_cursor);
730                }
731            }
732            index = index.checked_add(jump)?;
733        }
734
735        None
736    }
737
738    #[allow(clippy::too_many_arguments)]
739    fn scan_range_quick_direct(
740        &self,
741        range: Range<Offset>,
742        start: Offset,
743        pat: &[Atom],
744        save: &mut [Offset],
745        linear_exec: bool,
746        anchor: &[u8; ANCHOR_MAX_LEN],
747        anchor_len: usize,
748        anchor_offset: u64,
749        scratch: &mut ExecScratch,
750    ) -> Option<Offset> {
751        let prefix = &anchor[..anchor_len];
752        let window = u64::try_from(anchor_len).ok()?;
753        let start = start.checked_add(anchor_offset)?;
754        if start >= range.end {
755            return None;
756        }
757        let total = range.end.checked_sub(start)?;
758        if total < window {
759            return None;
760        }
761
762        let mut jumps = [anchor_len as u8; 256];
763        for (index, byte) in prefix.iter().take(anchor_len.saturating_sub(1)).enumerate() {
764            jumps[usize::from(*byte)] = (anchor_len - index - 1) as u8;
765        }
766
767        let last = prefix[anchor_len - 1];
768        let mut index = 0u64;
769        let max_index = total - window;
770        while index <= max_index {
771            let cursor = start.checked_add(index)?;
772            let probe_at = cursor.checked_add(window - 1)?;
773            let Some(probe) = self.view.read_u8(probe_at) else {
774                index = index.checked_add(1)?;
775                continue;
776            };
777
778            let jump = u64::from(jumps[usize::from(probe)].max(1));
779            if probe == last
780                && prefix_matches_mapped(self.view, cursor, prefix)
781                && self.exec(
782                    cursor.checked_sub(anchor_offset)?,
783                    pat,
784                    save,
785                    linear_exec,
786                    scratch,
787                )
788            {
789                return cursor.checked_sub(anchor_offset);
790            }
791            index = index.checked_add(jump)?;
792        }
793
794        None
795    }
796
797    fn exec_linear(
798        &self,
799        start: Offset,
800        pat: &[Atom],
801        save: &mut [Offset],
802        scratch: &mut ExecScratch,
803    ) -> bool {
804        scratch.reset_from_save(save);
805        let work_save = &mut scratch.work_save;
806        let mut cursor = start;
807        let mut pc = 0usize;
808        let mut fuzzy = None;
809        let mut reader = ExecReader::new(self.view, cursor);
810
811        loop {
812            let Some(atom) = pat.get(pc) else {
813                scratch.commit_to_save(save);
814                return true;
815            };
816
817            match *atom {
818                Atom::Byte(expected) => {
819                    let Some(actual) = reader.read_u8(cursor) else {
820                        return false;
821                    };
822                    let mask = fuzzy.take().unwrap_or(u8::MAX);
823                    if (actual & mask) != (expected & mask) {
824                        return false;
825                    }
826                    cursor = match cursor.checked_add(1) {
827                        Some(next) => next,
828                        None => return false,
829                    };
830                    pc += 1;
831                }
832                Atom::Fuzzy(mask) => {
833                    fuzzy = Some(mask);
834                    pc += 1;
835                }
836                Atom::Save(slot) => {
837                    if let Some(dst) = work_save.get_mut(usize::from(slot)) {
838                        *dst = cursor;
839                    }
840                    pc += 1;
841                }
842                Atom::Skip(n) => {
843                    let skip = if n == 0 {
844                        u64::from(self.view.pointer_size_bytes())
845                    } else {
846                        u64::from(n)
847                    };
848                    cursor = match cursor.checked_add(skip) {
849                        Some(next) => next,
850                        None => return false,
851                    };
852                    pc += 1;
853                }
854                Atom::Jump1 => {
855                    let Some(byte) = reader.read_u8(cursor) else {
856                        return false;
857                    };
858                    let base = match cursor.checked_add(1) {
859                        Some(next) => next,
860                        None => return false,
861                    };
862                    let delta = i64::from(byte as i8);
863                    cursor = if delta >= 0 {
864                        match base.checked_add(delta as u64) {
865                            Some(next) => next,
866                            None => return false,
867                        }
868                    } else {
869                        match base.checked_sub((-delta) as u64) {
870                            Some(next) => next,
871                            None => return false,
872                        }
873                    };
874                    pc += 1;
875                }
876                Atom::Jump4 => {
877                    let Some(disp) = reader.read_i32(cursor) else {
878                        return false;
879                    };
880                    let base = match cursor.checked_add(4) {
881                        Some(next) => next,
882                        None => return false,
883                    };
884                    let delta = i64::from(disp);
885                    cursor = if delta >= 0 {
886                        match base.checked_add(delta as u64) {
887                            Some(next) => next,
888                            None => return false,
889                        }
890                    } else {
891                        match base.checked_sub((-delta) as u64) {
892                            Some(next) => next,
893                            None => return false,
894                        }
895                    };
896                    pc += 1;
897                }
898                Atom::Ptr => {
899                    let Some(raw) = reader.read_pointer_raw(cursor, self.view.pointer_size_bytes())
900                    else {
901                        return false;
902                    };
903                    let Some(next) = self.view.follow_pointer_target(raw) else {
904                        return false;
905                    };
906                    cursor = next;
907                    pc += 1;
908                }
909                Atom::Pir(slot) => {
910                    let Some(disp) = reader.read_i32(cursor) else {
911                        return false;
912                    };
913                    let base = work_save.get(usize::from(slot)).copied().unwrap_or(cursor);
914                    let delta = i64::from(disp);
915                    cursor = if delta >= 0 {
916                        match base.checked_add(delta as u64) {
917                            Some(next) => next,
918                            None => return false,
919                        }
920                    } else {
921                        match base.checked_sub((-delta) as u64) {
922                            Some(next) => next,
923                            None => return false,
924                        }
925                    };
926                    pc += 1;
927                }
928                Atom::ReadI8(slot) => {
929                    let Some(value) = reader.read_u8(cursor) else {
930                        return false;
931                    };
932                    if let Some(dst) = work_save.get_mut(usize::from(slot)) {
933                        *dst = (value as i8) as i64 as u64;
934                    }
935                    cursor = match cursor.checked_add(1) {
936                        Some(next) => next,
937                        None => return false,
938                    };
939                    pc += 1;
940                }
941                Atom::ReadU8(slot) => {
942                    let Some(value) = reader.read_u8(cursor) else {
943                        return false;
944                    };
945                    if let Some(dst) = work_save.get_mut(usize::from(slot)) {
946                        *dst = u64::from(value);
947                    }
948                    cursor = match cursor.checked_add(1) {
949                        Some(next) => next,
950                        None => return false,
951                    };
952                    pc += 1;
953                }
954                Atom::ReadI16(slot) => {
955                    let Some(value) = reader.read_i16(cursor) else {
956                        return false;
957                    };
958                    if let Some(dst) = work_save.get_mut(usize::from(slot)) {
959                        *dst = value as i64 as u64;
960                    }
961                    cursor = match cursor.checked_add(2) {
962                        Some(next) => next,
963                        None => return false,
964                    };
965                    pc += 1;
966                }
967                Atom::ReadU16(slot) => {
968                    let Some(value) = reader.read_u16(cursor) else {
969                        return false;
970                    };
971                    if let Some(dst) = work_save.get_mut(usize::from(slot)) {
972                        *dst = u64::from(value);
973                    }
974                    cursor = match cursor.checked_add(2) {
975                        Some(next) => next,
976                        None => return false,
977                    };
978                    pc += 1;
979                }
980                Atom::ReadI32(slot) => {
981                    let Some(value) = reader.read_i32(cursor) else {
982                        return false;
983                    };
984                    if let Some(dst) = work_save.get_mut(usize::from(slot)) {
985                        *dst = value as i64 as u64;
986                    }
987                    cursor = match cursor.checked_add(4) {
988                        Some(next) => next,
989                        None => return false,
990                    };
991                    pc += 1;
992                }
993                Atom::ReadU32(slot) => {
994                    let Some(value) = reader.read_u32(cursor) else {
995                        return false;
996                    };
997                    if let Some(dst) = work_save.get_mut(usize::from(slot)) {
998                        *dst = u64::from(value);
999                    }
1000                    cursor = match cursor.checked_add(4) {
1001                        Some(next) => next,
1002                        None => return false,
1003                    };
1004                    pc += 1;
1005                }
1006                Atom::Zero(slot) => {
1007                    if let Some(dst) = work_save.get_mut(usize::from(slot)) {
1008                        *dst = 0;
1009                    }
1010                    pc += 1;
1011                }
1012                Atom::Back(n) => {
1013                    cursor = match cursor.checked_sub(u64::from(n)) {
1014                        Some(next) => next,
1015                        None => return false,
1016                    };
1017                    pc += 1;
1018                }
1019                Atom::Aligned(align) => {
1020                    let mask = (1u64 << u64::from(align)).wrapping_sub(1);
1021                    if cursor & mask != 0 {
1022                        return false;
1023                    }
1024                    pc += 1;
1025                }
1026                Atom::Check(slot) => {
1027                    let expected = work_save.get(usize::from(slot)).copied().unwrap_or(0);
1028                    if cursor != expected {
1029                        return false;
1030                    }
1031                    pc += 1;
1032                }
1033                Atom::Nop => {
1034                    pc += 1;
1035                }
1036                Atom::SkipRange(_, _)
1037                | Atom::Push(_)
1038                | Atom::Pop
1039                | Atom::Case(_)
1040                | Atom::Break(_) => {
1041                    debug_assert!(
1042                        false,
1043                        "linear exec must only run on patterns without backtracking/control-flow atoms"
1044                    );
1045                    return false;
1046                }
1047            }
1048        }
1049    }
1050
1051    fn exec_backtracking(
1052        &self,
1053        start: Offset,
1054        pat: &[Atom],
1055        save: &mut [Offset],
1056        scratch: &mut ExecScratch,
1057    ) -> bool {
1058        scratch.reset_from_save(save);
1059        let work_save = &mut scratch.work_save;
1060        scratch.calls.clear();
1061        scratch.save_log.clear();
1062        scratch.stack.clear();
1063
1064        scratch.stack.push(BacktrackState {
1065            cursor: start,
1066            pc: 0,
1067            fuzzy: None,
1068            calls_len: 0,
1069            save_log_len: 0,
1070        });
1071
1072        #[inline]
1073        fn rollback(save: &mut [Offset], log: &mut Vec<(usize, Offset)>, target_len: usize) {
1074            while log.len() > target_len {
1075                let (slot, old) = log.pop().expect("save log length checked before pop");
1076                save[slot] = old;
1077            }
1078        }
1079
1080        #[inline]
1081        fn assign_save(
1082            save: &mut [Offset],
1083            log: &mut Vec<(usize, Offset)>,
1084            slot: usize,
1085            value: Offset,
1086        ) {
1087            if let Some(dst) = save.get_mut(slot) {
1088                log.push((slot, *dst));
1089                *dst = value;
1090            }
1091        }
1092
1093        while let Some(state) = scratch.stack.pop() {
1094            scratch.calls.truncate(state.calls_len);
1095            rollback(work_save, &mut scratch.save_log, state.save_log_len);
1096
1097            let mut cursor = state.cursor;
1098            let mut pc = state.pc;
1099            let mut fuzzy = state.fuzzy;
1100            let mut reader = ExecReader::new(self.view, cursor);
1101            loop {
1102                let Some(atom) = pat.get(pc) else {
1103                    scratch.commit_to_save(save);
1104                    return true;
1105                };
1106
1107                match *atom {
1108                    Atom::Byte(expected) => {
1109                        let Some(actual) = reader.read_u8(cursor) else {
1110                            break;
1111                        };
1112                        let mask = fuzzy.take().unwrap_or(u8::MAX);
1113                        if (actual & mask) != (expected & mask) {
1114                            break;
1115                        }
1116                        let Some(next) = cursor.checked_add(1) else {
1117                            break;
1118                        };
1119                        cursor = next;
1120                        pc += 1;
1121                    }
1122                    Atom::Fuzzy(mask) => {
1123                        fuzzy = Some(mask);
1124                        pc += 1;
1125                    }
1126                    Atom::Save(slot) => {
1127                        assign_save(work_save, &mut scratch.save_log, usize::from(slot), cursor);
1128                        pc += 1;
1129                    }
1130                    Atom::Skip(n) => {
1131                        let skip = if n == 0 {
1132                            u64::from(self.view.pointer_size_bytes())
1133                        } else {
1134                            u64::from(n)
1135                        };
1136                        let Some(next) = cursor.checked_add(skip) else {
1137                            break;
1138                        };
1139                        cursor = next;
1140                        pc += 1;
1141                    }
1142                    Atom::SkipRange(min, max) => {
1143                        debug_assert!(
1144                            min <= max,
1145                            "pattern parser enforces inclusive skip ranges with min <= max"
1146                        );
1147                        let min = u64::from(min);
1148                        let max = u64::from(max);
1149                        for delta in ((min + 1)..=max).rev() {
1150                            if let Some(next_cursor) = cursor.checked_add(delta) {
1151                                if scratch.stack.len() >= MAX_BACKTRACK_STATES {
1152                                    debug_assert!(
1153                                        false,
1154                                        "scanner backtracking stack must stay below MAX_BACKTRACK_STATES for bounded memory"
1155                                    );
1156                                    return false;
1157                                }
1158                                scratch.stack.push(BacktrackState {
1159                                    cursor: next_cursor,
1160                                    pc: pc + 1,
1161                                    fuzzy,
1162                                    calls_len: scratch.calls.len(),
1163                                    save_log_len: scratch.save_log.len(),
1164                                });
1165                            }
1166                        }
1167                        let Some(next) = cursor.checked_add(min) else {
1168                            break;
1169                        };
1170                        cursor = next;
1171                        pc += 1;
1172                    }
1173                    Atom::Push(skip) => {
1174                        let skip = if skip == 0 {
1175                            u64::from(self.view.pointer_size_bytes())
1176                        } else {
1177                            u64::from(skip)
1178                        };
1179                        let Some(resume_cursor) = cursor.checked_add(skip) else {
1180                            break;
1181                        };
1182                        scratch.calls.push(resume_cursor);
1183                        pc += 1;
1184                    }
1185                    Atom::Pop => {
1186                        let Some(resume_cursor) = scratch.calls.pop() else {
1187                            break;
1188                        };
1189                        cursor = resume_cursor;
1190                        pc += 1;
1191                    }
1192                    Atom::Jump1 => {
1193                        let Some(byte) = reader.read_u8(cursor) else {
1194                            break;
1195                        };
1196                        let disp = byte as i8;
1197                        let Some(base) = cursor.checked_add(1) else {
1198                            break;
1199                        };
1200                        let delta = i64::from(disp);
1201                        if delta >= 0 {
1202                            let Some(next) = base.checked_add(delta as u64) else {
1203                                break;
1204                            };
1205                            cursor = next;
1206                        } else {
1207                            let Some(next) = base.checked_sub((-delta) as u64) else {
1208                                break;
1209                            };
1210                            cursor = next;
1211                        }
1212                        pc += 1;
1213                    }
1214                    Atom::Jump4 => {
1215                        let Some(disp) = reader.read_i32(cursor) else {
1216                            break;
1217                        };
1218                        let Some(base) = cursor.checked_add(4) else {
1219                            break;
1220                        };
1221                        let delta = i64::from(disp);
1222                        if delta >= 0 {
1223                            let Some(next) = base.checked_add(delta as u64) else {
1224                                break;
1225                            };
1226                            cursor = next;
1227                        } else {
1228                            let Some(next) = base.checked_sub((-delta) as u64) else {
1229                                break;
1230                            };
1231                            cursor = next;
1232                        }
1233                        pc += 1;
1234                    }
1235                    Atom::Ptr => {
1236                        let Some(raw) =
1237                            reader.read_pointer_raw(cursor, self.view.pointer_size_bytes())
1238                        else {
1239                            break;
1240                        };
1241                        let Some(next) = self.view.follow_pointer_target(raw) else {
1242                            break;
1243                        };
1244                        cursor = next;
1245                        pc += 1;
1246                    }
1247                    Atom::Pir(slot) => {
1248                        let Some(disp) = reader.read_i32(cursor) else {
1249                            break;
1250                        };
1251                        let base = work_save.get(usize::from(slot)).copied().unwrap_or(cursor);
1252                        let delta = i64::from(disp);
1253                        if delta >= 0 {
1254                            let Some(next) = base.checked_add(delta as u64) else {
1255                                break;
1256                            };
1257                            cursor = next;
1258                        } else {
1259                            let Some(next) = base.checked_sub((-delta) as u64) else {
1260                                break;
1261                            };
1262                            cursor = next;
1263                        }
1264                        pc += 1;
1265                    }
1266                    Atom::ReadI8(slot) => {
1267                        let Some(value) = reader.read_u8(cursor) else {
1268                            break;
1269                        };
1270                        assign_save(
1271                            work_save,
1272                            &mut scratch.save_log,
1273                            usize::from(slot),
1274                            (value as i8) as i64 as u64,
1275                        );
1276                        let Some(next) = cursor.checked_add(1) else {
1277                            break;
1278                        };
1279                        cursor = next;
1280                        pc += 1;
1281                    }
1282                    Atom::ReadU8(slot) => {
1283                        let Some(value) = reader.read_u8(cursor) else {
1284                            break;
1285                        };
1286                        assign_save(
1287                            work_save,
1288                            &mut scratch.save_log,
1289                            usize::from(slot),
1290                            u64::from(value),
1291                        );
1292                        let Some(next) = cursor.checked_add(1) else {
1293                            break;
1294                        };
1295                        cursor = next;
1296                        pc += 1;
1297                    }
1298                    Atom::ReadI16(slot) => {
1299                        let Some(value) = reader.read_i16(cursor) else {
1300                            break;
1301                        };
1302                        assign_save(
1303                            work_save,
1304                            &mut scratch.save_log,
1305                            usize::from(slot),
1306                            value as i64 as u64,
1307                        );
1308                        let Some(next) = cursor.checked_add(2) else {
1309                            break;
1310                        };
1311                        cursor = next;
1312                        pc += 1;
1313                    }
1314                    Atom::ReadU16(slot) => {
1315                        let Some(value) = reader.read_u16(cursor) else {
1316                            break;
1317                        };
1318                        assign_save(
1319                            work_save,
1320                            &mut scratch.save_log,
1321                            usize::from(slot),
1322                            u64::from(value),
1323                        );
1324                        let Some(next) = cursor.checked_add(2) else {
1325                            break;
1326                        };
1327                        cursor = next;
1328                        pc += 1;
1329                    }
1330                    Atom::ReadI32(slot) => {
1331                        let Some(value) = reader.read_i32(cursor) else {
1332                            break;
1333                        };
1334                        assign_save(
1335                            work_save,
1336                            &mut scratch.save_log,
1337                            usize::from(slot),
1338                            value as i64 as u64,
1339                        );
1340                        let Some(next) = cursor.checked_add(4) else {
1341                            break;
1342                        };
1343                        cursor = next;
1344                        pc += 1;
1345                    }
1346                    Atom::ReadU32(slot) => {
1347                        let Some(value) = reader.read_u32(cursor) else {
1348                            break;
1349                        };
1350                        assign_save(
1351                            work_save,
1352                            &mut scratch.save_log,
1353                            usize::from(slot),
1354                            u64::from(value),
1355                        );
1356                        let Some(next) = cursor.checked_add(4) else {
1357                            break;
1358                        };
1359                        cursor = next;
1360                        pc += 1;
1361                    }
1362                    Atom::Zero(slot) => {
1363                        assign_save(work_save, &mut scratch.save_log, usize::from(slot), 0);
1364                        pc += 1;
1365                    }
1366                    Atom::Back(n) => {
1367                        let Some(next) = cursor.checked_sub(u64::from(n)) else {
1368                            break;
1369                        };
1370                        cursor = next;
1371                        pc += 1;
1372                    }
1373                    Atom::Aligned(align) => {
1374                        let mask = (1u64 << u64::from(align)).wrapping_sub(1);
1375                        if cursor & mask != 0 {
1376                            break;
1377                        }
1378                        pc += 1;
1379                    }
1380                    Atom::Check(slot) => {
1381                        let expected = work_save.get(usize::from(slot)).copied().unwrap_or(0);
1382                        if cursor != expected {
1383                            break;
1384                        }
1385                        pc += 1;
1386                    }
1387                    Atom::Case(skip) => {
1388                        let Some(next_pc) = pc.checked_add(usize::from(skip)) else {
1389                            break;
1390                        };
1391                        if scratch.stack.len() >= MAX_BACKTRACK_STATES {
1392                            debug_assert!(
1393                                false,
1394                                "scanner backtracking stack must stay below MAX_BACKTRACK_STATES for bounded memory"
1395                            );
1396                            return false;
1397                        }
1398                        scratch.stack.push(BacktrackState {
1399                            cursor,
1400                            pc: next_pc,
1401                            fuzzy,
1402                            calls_len: scratch.calls.len(),
1403                            save_log_len: scratch.save_log.len(),
1404                        });
1405                        pc += 1;
1406                    }
1407                    Atom::Break(skip) => {
1408                        let Some(next_pc) = pc
1409                            .checked_add(usize::from(skip))
1410                            .and_then(|value| value.checked_add(1))
1411                        else {
1412                            break;
1413                        };
1414                        pc = next_pc;
1415                    }
1416                    Atom::Nop => {
1417                        pc += 1;
1418                    }
1419                }
1420            }
1421        }
1422
1423        false
1424    }
1425}
1426
1427#[derive(Clone, Debug)]
1428/// Stateful matcher produced by [`Scanner::matches_code`].
1429pub struct Matches<'a, 'p, B: BinaryView> {
1430    scanner: Scanner<'a, B>,
1431    pat: &'p [Atom],
1432    required_slots: usize,
1433    linear_exec: bool,
1434    range_index: usize,
1435    cursor: Option<Offset>,
1436    anchor: [u8; ANCHOR_MAX_LEN],
1437    anchor_len: usize,
1438    anchor_offset: u64,
1439    anchor_jumps: [u8; 256],
1440    scratch: ExecScratch,
1441}
1442
1443impl<'a, 'p, B: BinaryView> Matches<'a, 'p, B> {
1444    /// Advances to the next match and writes save-slot values into `save`.
1445    ///
1446    /// # Save buffer contract
1447    ///
1448    /// - `save.len()` must be at least the pattern's required slot count.
1449    /// - Only the required prefix is written; extra tail elements are untouched.
1450    /// - On `true`, `save` contains captures for the returned match.
1451    /// - On `false`, `save` is left unchanged.
1452    ///
1453    /// For parsed patterns (`pattern::parse` / `pattern!`), slot `0` is the
1454    /// match start and corresponds to the parser-inserted `Save(0)`.
1455    pub fn next(&mut self, save: &mut [Offset]) -> bool {
1456        debug_assert!(
1457            save.len() >= self.required_slots,
1458            "caller-provided save buffer must cover all slots referenced by the pattern"
1459        );
1460        let save = &mut save[..self.required_slots];
1461        while let Some(span) = self.scanner.view.code_spans().get(self.range_index) {
1462            let start = self.cursor.unwrap_or(span.mapped.start);
1463            if start >= span.mapped.end {
1464                self.range_index += 1;
1465                self.cursor = None;
1466                continue;
1467            }
1468            let matched_at = if self.anchor_len == 0 {
1469                self.scan_range_linear(span.mapped.clone(), start, save)
1470            } else if self.anchor_len < 4 {
1471                self.scan_span_first_byte(span, start, save)
1472            } else {
1473                self.scan_span_quick(span, start, save)
1474            };
1475
1476            if let Some(cursor) = matched_at {
1477                self.cursor = cursor.checked_add(1);
1478                return true;
1479            }
1480
1481            self.range_index += 1;
1482            self.cursor = None;
1483        }
1484
1485        false
1486    }
1487
1488    fn scan_range_linear(
1489        &mut self,
1490        range: Range<Offset>,
1491        mut cursor: Offset,
1492        save: &mut [Offset],
1493    ) -> Option<Offset> {
1494        while cursor < range.end {
1495            if self
1496                .scanner
1497                .exec(cursor, self.pat, save, self.linear_exec, &mut self.scratch)
1498            {
1499                return Some(cursor);
1500            }
1501            let next = cursor.checked_add(1)?;
1502            cursor = next;
1503        }
1504        None
1505    }
1506
1507    fn scan_span_first_byte(
1508        &mut self,
1509        span: &CodeSpan,
1510        start: Offset,
1511        save: &mut [Offset],
1512    ) -> Option<Offset> {
1513        let Some(bytes) = self.scanner.view.image().get(span.file.clone()) else {
1514            return self.scan_range_first_byte(span.mapped.clone(), start, save);
1515        };
1516        let Some(anchor_start) = start.checked_add(self.anchor_offset) else {
1517            return self.scan_range_first_byte(span.mapped.clone(), start, save);
1518        };
1519        let Some(start_file) = mapped_to_file_offset(span, anchor_start) else {
1520            return self.scan_range_first_byte(span.mapped.clone(), start, save);
1521        };
1522        let Some(start_index) = start_file.checked_sub(span.file.start) else {
1523            return self.scan_range_first_byte(span.mapped.clone(), start, save);
1524        };
1525
1526        debug_assert_eq!(
1527            span.mapped.end.checked_sub(span.mapped.start),
1528            u64::try_from(span.file.end.saturating_sub(span.file.start)).ok(),
1529            "code span mapped/file ranges must have identical lengths"
1530        );
1531
1532        let Some(haystack) = bytes.get(start_index..) else {
1533            return self.scan_range_first_byte(span.mapped.clone(), start, save);
1534        };
1535        let needle = self.anchor[0];
1536        let anchor = &self.anchor[..self.anchor_len];
1537        let anchor_offset = usize::try_from(self.anchor_offset).ok()?;
1538        for delta in memchr_iter(needle, haystack) {
1539            if self.anchor_len > 1
1540                && haystack
1541                    .get(delta..delta + self.anchor_len)
1542                    .is_none_or(|window| window != anchor)
1543            {
1544                continue;
1545            }
1546            let anchor_index = start_index.checked_add(delta)?;
1547            let Some(start_index_in_span) = anchor_index.checked_sub(anchor_offset) else {
1548                continue;
1549            };
1550            let mapped_delta = Offset::try_from(start_index_in_span).ok()?;
1551            let cursor = span.mapped.start.checked_add(mapped_delta)?;
1552            if self
1553                .scanner
1554                .exec(cursor, self.pat, save, self.linear_exec, &mut self.scratch)
1555            {
1556                return Some(cursor);
1557            }
1558        }
1559        None
1560    }
1561
1562    fn scan_range_first_byte(
1563        &mut self,
1564        range: Range<Offset>,
1565        mut cursor: Offset,
1566        save: &mut [Offset],
1567    ) -> Option<Offset> {
1568        let needle = self.anchor[0];
1569        let mut probe = cursor.checked_add(self.anchor_offset)?;
1570        while probe < range.end {
1571            if self.scanner.view.read_u8(probe) == Some(needle)
1572                && self
1573                    .scanner
1574                    .exec(cursor, self.pat, save, self.linear_exec, &mut self.scratch)
1575            {
1576                return Some(cursor);
1577            }
1578            cursor = cursor.checked_add(1)?;
1579            probe = probe.checked_add(1)?;
1580        }
1581        None
1582    }
1583
1584    fn scan_range_quick(
1585        &mut self,
1586        range: Range<Offset>,
1587        start: Offset,
1588        save: &mut [Offset],
1589    ) -> Option<Offset> {
1590        let prefix = &self.anchor[..self.anchor_len];
1591        let window = u64::try_from(self.anchor_len).ok()?;
1592        let start = start.checked_add(self.anchor_offset)?;
1593        if start >= range.end {
1594            return None;
1595        }
1596        let total = range.end.checked_sub(start)?;
1597        if total < window {
1598            return None;
1599        }
1600
1601        let last = prefix[self.anchor_len - 1];
1602        let mut index = 0u64;
1603        let max_index = total - window;
1604        while index <= max_index {
1605            let cursor = start.checked_add(index)?;
1606            let probe_at = cursor.checked_add(window - 1)?;
1607            let Some(probe) = self.scanner.view.read_u8(probe_at) else {
1608                index = index.checked_add(1)?;
1609                continue;
1610            };
1611
1612            let jump = u64::from(self.anchor_jumps[usize::from(probe)].max(1));
1613            if probe == last
1614                && self.prefix_matches_mapped(cursor)
1615                && self.scanner.exec(
1616                    cursor.checked_sub(self.anchor_offset)?,
1617                    self.pat,
1618                    save,
1619                    self.linear_exec,
1620                    &mut self.scratch,
1621                )
1622            {
1623                return cursor.checked_sub(self.anchor_offset);
1624            }
1625            index = index.checked_add(jump)?;
1626        }
1627
1628        None
1629    }
1630
1631    fn scan_span_quick(
1632        &mut self,
1633        span: &CodeSpan,
1634        start: Offset,
1635        save: &mut [Offset],
1636    ) -> Option<Offset> {
1637        let Some(bytes) = self.scanner.view.image().get(span.file.clone()) else {
1638            return self.scan_range_quick(span.mapped.clone(), start, save);
1639        };
1640        let Some(anchor_start) = start.checked_add(self.anchor_offset) else {
1641            return self.scan_range_quick(span.mapped.clone(), start, save);
1642        };
1643        let Some(start_file) = mapped_to_file_offset(span, anchor_start) else {
1644            return self.scan_range_quick(span.mapped.clone(), start, save);
1645        };
1646        let Some(start_index) = start_file.checked_sub(span.file.start) else {
1647            return self.scan_range_quick(span.mapped.clone(), start, save);
1648        };
1649
1650        let prefix = &self.anchor[..self.anchor_len];
1651        let Some(haystack) = bytes.get(start_index..) else {
1652            return self.scan_range_quick(span.mapped.clone(), start, save);
1653        };
1654        if haystack.len() < self.anchor_len {
1655            return None;
1656        }
1657
1658        let last = prefix[self.anchor_len - 1];
1659        let mut index = 0usize;
1660        let max_index = haystack.len() - self.anchor_len;
1661        while index <= max_index {
1662            let probe = haystack[index + self.anchor_len - 1];
1663
1664            let jump = usize::from(self.anchor_jumps[usize::from(probe)].max(1));
1665            if probe == last
1666                && haystack
1667                    .get(index..index + self.anchor_len)
1668                    .is_some_and(|window| window == prefix)
1669            {
1670                let total_index = start_index.checked_add(index)?;
1671                let mapped_delta = Offset::try_from(total_index).ok()?;
1672                let cursor = span.mapped.start.checked_add(mapped_delta)?;
1673                let start_cursor = cursor.checked_sub(self.anchor_offset)?;
1674                if self.scanner.exec(
1675                    start_cursor,
1676                    self.pat,
1677                    save,
1678                    self.linear_exec,
1679                    &mut self.scratch,
1680                ) {
1681                    return Some(start_cursor);
1682                }
1683            }
1684            index = index.checked_add(jump)?;
1685        }
1686
1687        None
1688    }
1689
1690    fn prefix_matches_mapped(&self, cursor: Offset) -> bool {
1691        for (index, expected) in self.anchor[..self.anchor_len].iter().enumerate() {
1692            let delta = index as u64;
1693            let Some(offset) = cursor.checked_add(delta) else {
1694                return false;
1695            };
1696            if self.scanner.view.read_u8(offset) != Some(*expected) {
1697                return false;
1698            }
1699        }
1700        true
1701    }
1702}
1703
1704fn build_prefix(pat: &[Atom]) -> ([u8; PREFIX_BUF_LEN], usize) {
1705    let mut prefix = [0u8; PREFIX_BUF_LEN];
1706    let mut len = 0usize;
1707    for atom in pat {
1708        match *atom {
1709            Atom::Byte(byte) => {
1710                if len >= PREFIX_BUF_LEN {
1711                    break;
1712                }
1713                prefix[len] = byte;
1714                len += 1;
1715            }
1716            Atom::Save(_) | Atom::Aligned(_) | Atom::Nop => {}
1717            _ => break,
1718        }
1719    }
1720    (prefix, len)
1721}
1722
1723fn analyze_pattern(pat: &[Atom]) -> PatternPlan {
1724    let required_slots = save_len(pat);
1725    let linear_exec = is_linear_pattern(pat);
1726    let (prefix, prefix_len) = build_prefix(pat);
1727    let (anchor, anchor_len, anchor_offset) = select_anchor(&prefix, prefix_len);
1728    let anchor_jumps = build_anchor_jumps(&anchor, anchor_len);
1729    PatternPlan {
1730        required_slots,
1731        linear_exec,
1732        anchor,
1733        anchor_len,
1734        anchor_offset,
1735        anchor_jumps,
1736    }
1737}
1738
1739fn build_anchor_jumps(anchor: &[u8; ANCHOR_MAX_LEN], anchor_len: usize) -> [u8; 256] {
1740    let default_jump = anchor_len.max(1) as u8;
1741    let mut jumps = [default_jump; 256];
1742    for (index, byte) in anchor.iter().take(anchor_len.saturating_sub(1)).enumerate() {
1743        jumps[usize::from(*byte)] = (anchor_len - index - 1) as u8;
1744    }
1745    jumps
1746}
1747
1748/// Chooses the best fixed-size literal anchor window from the prefix.
1749///
1750/// The scanner uses this anchor for candidate filtering before running full
1751/// pattern execution. We score each possible window and select the one with the
1752/// highest expected selectivity (ties prefer later windows for slightly better
1753/// locality with following atoms).
1754fn select_anchor(
1755    prefix: &[u8; PREFIX_BUF_LEN],
1756    prefix_len: usize,
1757) -> ([u8; ANCHOR_MAX_LEN], usize, u64) {
1758    let mut anchor = [0u8; ANCHOR_MAX_LEN];
1759    if prefix_len == 0 {
1760        return (anchor, 0, 0);
1761    }
1762
1763    let anchor_len = prefix_len.min(ANCHOR_MAX_LEN);
1764    let mut best_start = 0usize;
1765    let mut best_score = 0u32;
1766    for start in 0..=prefix_len - anchor_len {
1767        let score = anchor_window_score(&prefix[start..start + anchor_len]);
1768        if score > best_score || (score == best_score && start > best_start) {
1769            best_score = score;
1770            best_start = start;
1771        }
1772    }
1773
1774    for (index, byte) in prefix[best_start..best_start + anchor_len]
1775        .iter()
1776        .copied()
1777        .enumerate()
1778    {
1779        anchor[index] = byte;
1780    }
1781    (anchor, anchor_len, best_start as u64)
1782}
1783
1784/// Scores an anchor window by estimated filtering strength.
1785///
1786/// Higher scores prefer windows with more distinct and less common bytes, and a
1787/// stronger terminal byte because quick search probes the window tail first.
1788fn anchor_window_score(window: &[u8]) -> u32 {
1789    let mut seen = [false; 256];
1790    let mut distinct = 0u32;
1791    let mut byte_score = 0u32;
1792    for byte in window.iter().copied() {
1793        let idx = usize::from(byte);
1794        if !seen[idx] {
1795            seen[idx] = true;
1796            distinct += 1;
1797        }
1798        byte_score += anchor_byte_weight(byte);
1799    }
1800
1801    let duplicate_count = window.len() as u32 - distinct;
1802    let last_weight = window.last().copied().map(anchor_byte_weight).unwrap_or(0);
1803    (distinct * 8) + byte_score + (last_weight * 2) - (duplicate_count * 3)
1804}
1805
1806/// Heuristic byte rarity weight used by [`anchor_window_score`].
1807///
1808/// Common x86 opcode bytes/prefixes get lower weights so mixed or rarer windows
1809/// are chosen as anchors more often.
1810fn anchor_byte_weight(byte: u8) -> u32 {
1811    match byte {
1812        0x00 | 0x48 | 0x8b | 0x89 | 0x90 | 0xcc | 0xe8 | 0xe9 | 0xff => 1,
1813        0x40..=0x4f | 0x50..=0x5f | 0x70..=0x7f => 2,
1814        0x66 | 0x67 => 2,
1815        _ => 4,
1816    }
1817}
1818
1819fn is_linear_pattern(pat: &[Atom]) -> bool {
1820    !pat.iter().any(|atom| {
1821        matches!(
1822            atom,
1823            Atom::SkipRange(_, _) | Atom::Push(_) | Atom::Pop | Atom::Case(_) | Atom::Break(_)
1824        )
1825    })
1826}
1827
1828fn span_index_for_offset(spans: &[CodeSpan], offset: Offset) -> Option<usize> {
1829    let mut low = 0usize;
1830    let mut high = spans.len();
1831    while low < high {
1832        let mid = low + (high - low) / 2;
1833        let span = &spans[mid];
1834        if span.mapped.end <= offset {
1835            low = mid + 1;
1836        } else {
1837            high = mid;
1838        }
1839    }
1840
1841    spans.get(low).and_then(|span| {
1842        if span.mapped.contains(&offset) {
1843            Some(low)
1844        } else {
1845            None
1846        }
1847    })
1848}
1849
1850fn mapped_to_file_offset(span: &CodeSpan, mapped: Offset) -> Option<usize> {
1851    let delta = mapped.checked_sub(span.mapped.start)?;
1852    if mapped >= span.mapped.end {
1853        return None;
1854    }
1855    let delta_usize = usize::try_from(delta).ok()?;
1856    span.file.start.checked_add(delta_usize)
1857}
1858
1859fn prefix_matches_mapped<B: BinaryView>(view: &B, cursor: Offset, prefix: &[u8]) -> bool {
1860    for (index, expected) in prefix.iter().enumerate() {
1861        let Some(offset) = cursor.checked_add(index as u64) else {
1862            return false;
1863        };
1864        if view.read_u8(offset) != Some(*expected) {
1865            return false;
1866        }
1867    }
1868    true
1869}
1870
1871#[cfg(test)]
1872mod tests {
1873    use proptest::prelude::*;
1874
1875    use super::{
1876        BinaryView, CodeSpan, Offset, PreparedPattern, Scanner, build_prefix, is_linear_pattern,
1877        select_anchor, span_index_for_offset,
1878    };
1879    use crate::pattern::Atom;
1880
1881    #[derive(Debug)]
1882    struct TestView {
1883        bytes: Vec<u8>,
1884        spans: Vec<CodeSpan>,
1885    }
1886
1887    impl TestView {
1888        fn new(bytes: &[u8]) -> Self {
1889            let end = bytes.len() as Offset;
1890            Self {
1891                bytes: bytes.to_vec(),
1892                spans: vec![CodeSpan {
1893                    mapped: 0..end,
1894                    file: 0..bytes.len(),
1895                }],
1896            }
1897        }
1898    }
1899
1900    impl BinaryView for TestView {
1901        fn image(&self) -> &[u8] {
1902            &self.bytes
1903        }
1904
1905        fn code_spans(&self) -> &[CodeSpan] {
1906            &self.spans
1907        }
1908
1909        fn mapped_to_file_offset(&self, offset: Offset) -> Option<usize> {
1910            usize::try_from(offset)
1911                .ok()
1912                .filter(|index| *index < self.bytes.len())
1913        }
1914    }
1915
1916    #[test]
1917    fn skip_range_tries_shorter_distances_first() {
1918        let view = TestView::new(&[0x00, 0x41, 0x41]);
1919        let scanner = Scanner::new(&view);
1920        let pat = [
1921            Atom::Save(0),
1922            Atom::SkipRange(0, 2),
1923            Atom::Save(1),
1924            Atom::Byte(0x41),
1925        ];
1926        let mut matches = scanner.matches_code(&pat);
1927        let mut save = [0u64; 2];
1928
1929        assert!(matches.next(&mut save));
1930        assert_eq!(save[1], 1);
1931    }
1932
1933    #[test]
1934    fn skip_range_backtracks_to_later_distances() {
1935        let view = TestView::new(&[0x00, 0x00, 0x41]);
1936        let scanner = Scanner::new(&view);
1937        let pat = [
1938            Atom::Save(0),
1939            Atom::SkipRange(0, 2),
1940            Atom::Save(1),
1941            Atom::Byte(0x41),
1942        ];
1943        let mut matches = scanner.matches_code(&pat);
1944        let mut save = [0u64; 2];
1945
1946        assert!(matches.next(&mut save));
1947        assert_eq!(save[1], 2);
1948    }
1949
1950    #[test]
1951    fn fuzzy_masks_only_the_next_byte_match() {
1952        let view = TestView::new(&[0xab, 0x0f]);
1953        let scanner = Scanner::new(&view);
1954        let pat = [
1955            Atom::Save(0),
1956            Atom::Fuzzy(0xf0),
1957            Atom::Byte(0xa0),
1958            Atom::Byte(0x0f),
1959        ];
1960        let mut save = [0u64; 1];
1961
1962        assert!(scanner.matches_code(&pat).next(&mut save));
1963        assert_eq!(save[0], 0);
1964    }
1965
1966    #[test]
1967    fn nop_does_not_change_matching_behavior() {
1968        let view = TestView::new(&[0x41]);
1969        let scanner = Scanner::new(&view);
1970        let pat = [Atom::Save(0), Atom::Nop, Atom::Byte(0x41)];
1971        let mut save = [0u64; 1];
1972
1973        assert!(scanner.matches_code(&pat).next(&mut save));
1974        assert_eq!(save[0], 0);
1975    }
1976
1977    #[test]
1978    fn ptr_follows_mapped_target_and_captures_destination() {
1979        let mut bytes = vec![0x68];
1980        bytes.extend_from_slice(&12u64.to_le_bytes());
1981        bytes.extend_from_slice(&[0, 0, 0]);
1982        bytes.extend_from_slice(&[0x31, 0xc0, 0xc3]);
1983        let view = TestView::new(&bytes);
1984        let scanner = Scanner::new(&view);
1985        let pat = [
1986            Atom::Save(0),
1987            Atom::Byte(0x68),
1988            Atom::Ptr,
1989            Atom::Save(1),
1990            Atom::Byte(0x31),
1991            Atom::Byte(0xc0),
1992            Atom::Byte(0xc3),
1993        ];
1994        let mut save = [0u64; 2];
1995
1996        assert!(scanner.matches_code(&pat).next(&mut save));
1997        assert_eq!(save[0], 0);
1998        assert_eq!(save[1], 12);
1999    }
2000
2001    #[test]
2002    fn ptr_fails_when_target_is_not_mapped() {
2003        let mut bytes = vec![0x68];
2004        bytes.extend_from_slice(&1024u64.to_le_bytes());
2005        let view = TestView::new(&bytes);
2006        let scanner = Scanner::new(&view);
2007        let pat = [Atom::Save(0), Atom::Byte(0x68), Atom::Ptr, Atom::Byte(0x90)];
2008        let mut save = [0u64; 1];
2009
2010        assert!(!scanner.matches_code(&pat).next(&mut save));
2011    }
2012
2013    #[test]
2014    fn push_zero_uses_pointer_width_for_resume_cursor() {
2015        let view = TestView::new(&[0xaa, 0, 0, 0, 0, 0, 0, 0, 0x55]);
2016        let scanner = Scanner::new(&view);
2017        let pat = [
2018            Atom::Save(0),
2019            Atom::Push(0),
2020            Atom::Byte(0xaa),
2021            Atom::Pop,
2022            Atom::Save(1),
2023            Atom::Byte(0x55),
2024        ];
2025        let mut save = [0u64; 2];
2026
2027        assert!(scanner.matches_code(&pat).next(&mut save));
2028        assert_eq!(save[1], 8);
2029    }
2030
2031    #[test]
2032    fn pir_follows_saved_base_slot_with_signed_disp32() {
2033        let bytes = [0x90, 0x11, 0x22, 0x33, 0xfc, 0xff, 0xff, 0xff];
2034        let view = TestView::new(&bytes);
2035        let scanner = Scanner::new(&view);
2036        let pat = [
2037            Atom::Save(0),
2038            Atom::Skip(4),
2039            Atom::Save(1),
2040            Atom::Pir(1),
2041            Atom::Byte(0x90),
2042        ];
2043        let mut save = [0u64; 2];
2044
2045        assert!(scanner.matches_code(&pat).next(&mut save));
2046        assert_eq!(save[0], 0);
2047        assert_eq!(save[1], 4);
2048    }
2049
2050    #[test]
2051    fn pir_fails_when_disp32_cannot_be_read() {
2052        let bytes = [0x90, 0x01, 0x02, 0x03];
2053        let view = TestView::new(&bytes);
2054        let scanner = Scanner::new(&view);
2055        let pat = [
2056            Atom::Save(0),
2057            Atom::Byte(0x90),
2058            Atom::Pir(0),
2059            Atom::Byte(0x90),
2060        ];
2061        let mut save = [0u64; 1];
2062
2063        assert!(!scanner.matches_code(&pat).next(&mut save));
2064    }
2065
2066    #[test]
2067    fn finds_code_uses_consistent_save_semantics_for_uniqueness() {
2068        let view = TestView::new(&[0x00, 0xaa, 0xaa]);
2069        let scanner = Scanner::new(&view);
2070        let pat = [
2071            Atom::Save(0),
2072            Atom::Byte(0xaa),
2073            Atom::Save(1),
2074            Atom::Check(1),
2075        ];
2076        let mut save = [0u64; 2];
2077
2078        assert!(!scanner.finds_code(&pat, &mut save));
2079    }
2080
2081    #[test]
2082    fn prepared_pattern_exposes_required_slots() {
2083        let pat = vec![
2084            Atom::Save(0),
2085            Atom::Byte(0xaa),
2086            Atom::Save(2),
2087            Atom::Byte(0xbb),
2088        ];
2089        let prepared = PreparedPattern::from_atoms(pat);
2090        assert_eq!(prepared.required_slots(), 3);
2091    }
2092
2093    #[test]
2094    fn matches_prepared_matches_runtime_behavior() {
2095        let view = TestView::new(&[0x00, 0xaa, 0xbb]);
2096        let scanner = Scanner::new(&view);
2097        let pat = [Atom::Save(0), Atom::Byte(0xaa), Atom::Byte(0xbb)];
2098        let prepared = scanner.prepare_pattern(&pat);
2099
2100        let mut save_runtime = [0u64; 1];
2101        let mut save_prepared = [0u64; 1];
2102        assert!(scanner.matches_code(&pat).next(&mut save_runtime));
2103        assert!(scanner.matches_prepared(&prepared).next(&mut save_prepared));
2104        assert_eq!(save_runtime, save_prepared);
2105    }
2106
2107    #[test]
2108    fn prepare_pattern_str_parses_runtime_text() {
2109        let view = TestView::new(&[0x00, 0xaa, 0xbb]);
2110        let scanner = Scanner::new(&view);
2111        let prepared = scanner
2112            .prepare_pattern_str("AA BB")
2113            .expect("runtime pattern text should parse");
2114
2115        let mut save = vec![0u64; prepared.required_slots()];
2116        assert!(scanner.matches_prepared(&prepared).next(&mut save));
2117    }
2118
2119    #[test]
2120    fn prepare_pattern_str_reports_parse_errors() {
2121        let view = TestView::new(&[]);
2122        let scanner = Scanner::new(&view);
2123        assert!(scanner.prepare_pattern_str("A?").is_err());
2124    }
2125
2126    #[test]
2127    fn quick_prefix_strategy_finds_match_near_range_end() {
2128        let view = TestView::new(&[0x00, 0x11, 0x22, 0x33, 0x44]);
2129        let scanner = Scanner::new(&view);
2130        let pat = [
2131            Atom::Save(0),
2132            Atom::Byte(0x11),
2133            Atom::Byte(0x22),
2134            Atom::Byte(0x33),
2135            Atom::Byte(0x44),
2136        ];
2137        let mut save = [0u64; 1];
2138
2139        assert!(scanner.matches_code(&pat).next(&mut save));
2140        assert_eq!(save[0], 1);
2141    }
2142
2143    #[test]
2144    fn prefix_builder_keeps_optimizable_leading_atoms() {
2145        let (prefix, len) = build_prefix(&[
2146            Atom::Save(0),
2147            Atom::Aligned(0),
2148            Atom::Nop,
2149            Atom::Byte(0xaa),
2150            Atom::Save(1),
2151            Atom::Byte(0xbb),
2152            Atom::Check(1),
2153            Atom::Byte(0xcc),
2154        ]);
2155
2156        assert_eq!(len, 2);
2157        assert_eq!(&prefix[..len], &[0xaa, 0xbb]);
2158    }
2159
2160    #[test]
2161    fn anchor_selection_prefers_stronger_window_over_common_suffix() {
2162        let mut prefix = [0u8; super::PREFIX_BUF_LEN];
2163        let bytes = [0xde, 0xad, 0xbe, 0xef, 0x48, 0x8b, 0x05, 0x48];
2164        for (index, byte) in bytes.iter().copied().enumerate() {
2165            prefix[index] = byte;
2166        }
2167
2168        let (anchor, len, offset) = select_anchor(&prefix, bytes.len());
2169        assert_eq!(len, 4);
2170        assert_eq!(offset, 0);
2171        assert_eq!(&anchor[..len], &[0xde, 0xad, 0xbe, 0xef]);
2172    }
2173
2174    #[test]
2175    fn code_ranges_yield_mapped_ranges_in_order() {
2176        let view = TestView {
2177            bytes: vec![0u8; 16],
2178            spans: vec![
2179                CodeSpan {
2180                    mapped: 10..13,
2181                    file: 0..3,
2182                },
2183                CodeSpan {
2184                    mapped: 30..35,
2185                    file: 8..13,
2186                },
2187            ],
2188        };
2189
2190        let ranges = view.code_ranges().cloned().collect::<Vec<_>>();
2191        assert_eq!(ranges, vec![10..13, 30..35]);
2192    }
2193
2194    #[test]
2195    fn is_in_code_detects_hits_and_gaps() {
2196        let view = TestView {
2197            bytes: vec![0u8; 16],
2198            spans: vec![
2199                CodeSpan {
2200                    mapped: 5..8,
2201                    file: 0..3,
2202                },
2203                CodeSpan {
2204                    mapped: 12..15,
2205                    file: 8..11,
2206                },
2207            ],
2208        };
2209
2210        assert!(view.is_in_code(5));
2211        assert!(view.is_in_code(7));
2212        assert!(!view.is_in_code(8));
2213        assert!(!view.is_in_code(11));
2214        assert!(view.is_in_code(14));
2215        assert!(!view.is_in_code(15));
2216    }
2217
2218    #[test]
2219    fn linear_exec_selector_rejects_backtracking_atoms() {
2220        assert!(is_linear_pattern(&[
2221            Atom::Save(0),
2222            Atom::Byte(0x48),
2223            Atom::Skip(3)
2224        ]));
2225        assert!(!is_linear_pattern(&[Atom::Save(0), Atom::SkipRange(1, 3)]));
2226        assert!(!is_linear_pattern(&[Atom::Push(1), Atom::Pop]));
2227        assert!(!is_linear_pattern(&[Atom::Case(1), Atom::Break(0)]));
2228    }
2229
2230    #[test]
2231    fn span_index_binary_search_locates_offsets() {
2232        let spans = vec![
2233            CodeSpan {
2234                mapped: 5..8,
2235                file: 0..3,
2236            },
2237            CodeSpan {
2238                mapped: 12..15,
2239                file: 8..11,
2240            },
2241            CodeSpan {
2242                mapped: 30..35,
2243                file: 20..25,
2244            },
2245        ];
2246
2247        assert_eq!(span_index_for_offset(&spans, 5), Some(0));
2248        assert_eq!(span_index_for_offset(&spans, 14), Some(1));
2249        assert_eq!(span_index_for_offset(&spans, 34), Some(2));
2250        assert_eq!(span_index_for_offset(&spans, 8), None);
2251        assert_eq!(span_index_for_offset(&spans, 100), None);
2252    }
2253
2254    proptest! {
2255        #[test]
2256        fn parsed_single_byte_scan_matches_manual_search(
2257            haystack in prop::collection::vec(any::<u8>(), 0..128),
2258            needle in any::<u8>(),
2259        ) {
2260            let source = format!("{needle:02X}");
2261            let atoms = crate::pattern::parse(&source).expect("hex byte pattern should parse");
2262            let required_slots = crate::pattern::save_len(&atoms);
2263
2264            let view = TestView::new(&haystack);
2265            let scanner = Scanner::new(&view);
2266
2267            let expected_first = haystack
2268                .iter()
2269                .position(|byte| *byte == needle)
2270                .map(|index| index as u64);
2271            let expected_unique = haystack.iter().filter(|byte| **byte == needle).count() == 1;
2272
2273            let mut iter_save = vec![0u64; required_slots];
2274            let mut matches = scanner.matches_code(&atoms);
2275            let found = matches.next(&mut iter_save);
2276            prop_assert_eq!(found, expected_first.is_some());
2277            if let Some(expected_start) = expected_first {
2278                prop_assert_eq!(iter_save[0], expected_start);
2279            }
2280
2281            let mut unique_save = vec![0u64; required_slots];
2282            let unique = scanner.finds_code(&atoms, &mut unique_save);
2283            prop_assert_eq!(unique, expected_unique);
2284            if unique {
2285                prop_assert_eq!(Some(unique_save[0]), expected_first);
2286            }
2287        }
2288    }
2289}