Skip to main content

fuzzy_regex/engine/
fuzzy_bridge.rs

1//! Bridge for fuzzy literal matching using Levenshtein automata and Bitap.
2
3#![allow(
4    clippy::needless_range_loop,
5    clippy::match_same_arms,
6    clippy::too_many_lines,
7    clippy::items_after_statements,
8    clippy::float_cmp,
9    clippy::allow_attributes
10)]
11
12use crate::types::{FuzzyLimits, FuzzyPenalties};
13use std::cell::RefCell;
14
15use super::GuardNfa;
16use super::bitap::BitapMatcher;
17use super::damlev::{DamLevNfa, EditLimits, SearchBuffers};
18use super::hash::{FxHashMap, FxHashSet};
19use crate::ir::{EditCharRestriction, LiteralPattern};
20
21/// Cached search results from fuzzy search.
22/// Maps (`pattern_index`, `start_position`) -> `Vec<FuzzyMatchResult>`
23#[derive(Debug, Default)]
24pub struct CachedMatches {
25    /// Matches organized by (`pattern_index`, `start_byte_position`)
26    by_pattern_and_start: FxHashMap<(usize, usize), Vec<FuzzyMatchResult>>,
27}
28
29impl CachedMatches {
30    /// Get the best match for a pattern at a specific start position.
31    #[must_use]
32    pub fn get(&self, pattern_index: usize, start: usize) -> Option<&FuzzyMatchResult> {
33        self.by_pattern_and_start
34            .get(&(pattern_index, start))
35            .and_then(|v| v.first())
36    }
37
38    /// Get all matches for a pattern at a specific start position.
39    pub fn get_all(&self, pattern_index: usize, start: usize) -> Option<&[FuzzyMatchResult]> {
40        self.by_pattern_and_start
41            .get(&(pattern_index, start))
42            .map(Vec::as_slice)
43    }
44
45    /// Insert a match result for a pattern at a specific start position.
46    pub fn insert(&mut self, pattern_index: usize, start: usize, result: FuzzyMatchResult) {
47        self.by_pattern_and_start
48            .entry((pattern_index, start))
49            .or_default()
50            .push(result);
51    }
52
53    /// Iterate over all matches.
54    pub fn iter(&self) -> impl Iterator<Item = ((usize, usize), &[FuzzyMatchResult])> + '_ {
55        self.by_pattern_and_start
56            .iter()
57            .map(|((pattern_idx, start), results)| ((*pattern_idx, *start), results.as_slice()))
58    }
59
60    /// Check if the cache is empty.
61    #[must_use]
62    pub fn is_empty(&self) -> bool {
63        self.by_pattern_and_start.is_empty()
64    }
65}
66
67/// Bridge to Levenshtein automata for efficient fuzzy literal matching.
68#[derive(Debug)]
69pub struct FuzzyBridge {
70    /// One Levenshtein NFA per pattern.
71    automata: Vec<DamLevNfa>,
72    /// Guard-based NFA for fast first-match (single pattern only).
73    #[allow(dead_code)]
74    guard_nfa: Option<GuardNfa>,
75    /// Bitap matchers for short patterns (≤64 chars). None if pattern is too long.
76    bitap_matchers: Vec<Option<BitapMatcher>>,
77    /// Pattern texts for reference.
78    patterns: Vec<String>,
79    /// Edit limits per pattern (for calculating effective thresholds).
80    limits: Vec<Option<FuzzyLimits>>,
81    /// Character class restrictions for edits (parallel to patterns).
82    edit_char_restrictions: Vec<Option<EditCharRestriction>>,
83    /// Whether any pattern has character restrictions (for fast path).
84    has_char_restrictions: bool,
85    /// Case insensitive mode.
86    case_insensitive: bool,
87    /// Reusable search buffers to avoid per-call allocations.
88    search_buffers: RefCell<SearchBuffers>,
89    /// Word list patterns (from \L<name>) added at runtime.
90    word_list_patterns: Vec<String>,
91    /// Word list edit limits.
92    word_list_limits: Vec<Option<FuzzyLimits>>,
93}
94
95impl FuzzyBridge {
96    /// Create a new fuzzy bridge from literal patterns.
97    #[must_use]
98    pub fn new(
99        literals: &[LiteralPattern],
100        _default_limits: Option<FuzzyLimits>,
101        _penalties: Option<FuzzyPenalties>,
102        case_insensitive: bool,
103    ) -> Option<Self> {
104        if literals.is_empty() {
105            return None;
106        }
107
108        let patterns: Vec<String> = literals.iter().map(|l| l.text.clone()).collect();
109        let limits: Vec<Option<FuzzyLimits>> = literals.iter().map(|l| l.limits.clone()).collect();
110        let edit_char_restrictions: Vec<Option<EditCharRestriction>> =
111            literals.iter().map(|l| l.edit_chars.clone()).collect();
112
113        // Build Levenshtein NFA and Bitap matcher for each pattern
114        let mut automata: Vec<DamLevNfa> = Vec::with_capacity(literals.len());
115        let mut bitap_matchers: Vec<Option<BitapMatcher>> = Vec::with_capacity(literals.len());
116
117        for lit in literals {
118            let edit_limits = if let Some(ref lim) = lit.limits {
119                // When e<= not specified, max_edits is the sum of individual limits
120                let max_edits = lim.get_edits().unwrap_or_else(|| {
121                    let i = lim.get_insertions().unwrap_or(0);
122                    let d = lim.get_deletions().unwrap_or(0);
123                    let s = lim.get_substitutions().unwrap_or(0);
124                    let t = lim.get_swaps().unwrap_or(0);
125                    i.saturating_add(d).saturating_add(s).saturating_add(t)
126                });
127                EditLimits::with_limits(
128                    max_edits,
129                    lim.get_insertions(),
130                    lim.get_deletions(),
131                    lim.get_substitutions(),
132                    lim.get_swaps(),
133                )
134            } else {
135                EditLimits::new(0) // Exact match
136            };
137
138            // Build Levenshtein NFA (always works)
139            automata.push(DamLevNfa::new(
140                &lit.text,
141                edit_limits.clone(),
142                case_insensitive,
143            ));
144
145            // Build Bitap matcher (only for short patterns)
146            bitap_matchers.push(BitapMatcher::new(&lit.text, edit_limits, case_insensitive));
147        }
148
149        // Build Guard NFA for single pattern (fast path for find_first)
150        let guard_nfa = if literals.len() == 1 {
151            let lit = &literals[0];
152            let edit_limits = if let Some(ref lim) = lit.limits {
153                let max_edits = lim.get_edits().unwrap_or(0);
154                EditLimits::new(max_edits)
155            } else {
156                EditLimits::new(0)
157            };
158            Some(GuardNfa::new(&lit.text, edit_limits, case_insensitive))
159        } else {
160            None
161        };
162
163        let has_char_restrictions = edit_char_restrictions
164            .iter()
165            .any(std::option::Option::is_some);
166
167        Some(FuzzyBridge {
168            automata,
169            guard_nfa,
170            bitap_matchers,
171            patterns,
172            limits,
173            edit_char_restrictions,
174            has_char_restrictions,
175            case_insensitive,
176            search_buffers: RefCell::new(SearchBuffers::new()),
177            word_list_patterns: Vec::new(),
178            word_list_limits: Vec::new(),
179        })
180    }
181
182    /// Get the number of patterns.
183    #[must_use]
184    pub fn pattern_count(&self) -> usize {
185        self.patterns.len()
186    }
187
188    /// Returns whether case-insensitive matching is enabled.
189    #[must_use]
190    pub fn is_case_insensitive(&self) -> bool {
191        self.case_insensitive
192    }
193
194    /// Add word list patterns (from \L<name>) for matching.
195    /// The words will be matched against text with the given fuzzy limits.
196    pub fn add_word_list(&mut self, words: &[String], limits: Option<&FuzzyLimits>) {
197        for word in words {
198            self.word_list_patterns.push(word.clone());
199            self.word_list_limits.push(limits.cloned());
200        }
201    }
202
203    /// Get total pattern count including word lists.
204    #[must_use]
205    pub fn total_pattern_count(&self) -> usize {
206        self.patterns.len() + self.word_list_patterns.len()
207    }
208
209    /// Get the limits for patterns.
210    #[must_use]
211    pub fn limits(&self) -> &[Option<FuzzyLimits>] {
212        &self.limits
213    }
214
215    /// Get a Bitap matcher for a pattern index (first pattern).
216    #[must_use]
217    pub fn get_bitap_matcher(&self) -> Option<&BitapMatcher> {
218        self.bitap_matchers.first().and_then(|m| m.as_ref())
219    }
220
221    /// Get the character length of a pattern.
222    pub fn pattern_char_len(&self, index: usize) -> Option<usize> {
223        self.patterns.get(index).map(|s| s.chars().count())
224    }
225
226    /// Check if all patterns have 0 max edits (exact matching only).
227    /// Returns true if all patterns either have no limits (exact match)
228    /// or have explicit limits of 0 edits.
229    pub fn is_all_exact(&self) -> bool {
230        self.limits.iter().all(|lim| {
231            match lim {
232                None => true, // No limits = exact match
233                Some(lim) => {
234                    lim.get_edits().unwrap_or_else(|| {
235                        let ins = lim.get_insertions().unwrap_or(0);
236                        let del = lim.get_deletions().unwrap_or(0);
237                        let sub = lim.get_substitutions().unwrap_or(0);
238                        let swap = lim.get_swaps().unwrap_or(0);
239                        ins.saturating_add(del)
240                            .saturating_add(sub)
241                            .saturating_add(swap)
242                    }) == 0
243                }
244            }
245        })
246    }
247
248    /// Get the maximum edit distance for a pattern.
249    pub fn pattern_max_edits(&self, index: usize) -> Option<u8> {
250        self.limits.get(index).and_then(|lim| {
251            lim.as_ref().map(|l| {
252                l.get_edits().unwrap_or_else(|| {
253                    let i = l.get_insertions().unwrap_or(0);
254                    let d = l.get_deletions().unwrap_or(0);
255                    let s = l.get_substitutions().unwrap_or(0);
256                    let t = l.get_swaps().unwrap_or(0);
257                    i.saturating_add(d).saturating_add(s).saturating_add(t)
258                })
259            })
260        })
261    }
262
263    /// Get the maximum pattern length across all patterns.
264    pub fn max_pattern_len(&self) -> usize {
265        self.patterns
266            .iter()
267            .map(|p| p.chars().count())
268            .max()
269            .unwrap_or(0)
270    }
271
272    /// Get the maximum edit distance across all patterns.
273    pub fn max_edits(&self) -> Option<u8> {
274        self.limits
275            .iter()
276            .filter_map(|lim| {
277                lim.as_ref().map(|l| {
278                    l.get_edits().unwrap_or_else(|| {
279                        let i = l.get_insertions().unwrap_or(0);
280                        let d = l.get_deletions().unwrap_or(0);
281                        let s = l.get_substitutions().unwrap_or(0);
282                        let t = l.get_swaps().unwrap_or(0);
283                        i.saturating_add(d).saturating_add(s).saturating_add(t)
284                    })
285                })
286            })
287            .max()
288    }
289
290    /// Check if all patterns are compatible with Bitap streaming (<=64 chars).
291    pub fn all_patterns_bitap_compatible(&self) -> bool {
292        self.bitap_matchers.iter().all(Option::is_some)
293    }
294
295    /// Search the entire text once and cache all matches.
296    /// Uses Bitap when available (faster), falls back to Levenshtein NFA.
297    pub fn search_all(&self, text: &str, threshold: f32) -> CachedMatches {
298        let mut cached = CachedMatches::default();
299
300        for (pattern_idx, nfa) in self.automata.iter().enumerate() {
301            let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
302
303            // Use Bitap when available (faster for short patterns), fall back to NFA
304            let matches = if let Some(ref bitap) = self.bitap_matchers[pattern_idx] {
305                bitap.find_all(text, pattern_threshold)
306            } else {
307                nfa.find_all(text, pattern_threshold)
308            };
309
310            for m in matches {
311                // Validate character class restrictions if present
312                if let Some(restriction) = self
313                    .edit_char_restrictions
314                    .get(pattern_idx)
315                    .and_then(|r| r.as_ref())
316                {
317                    let matched_text = &text[m.start..m.end];
318                    if !self.validate_edit_chars(
319                        &self.patterns[pattern_idx],
320                        matched_text,
321                        restriction,
322                    ) {
323                        continue;
324                    }
325                }
326
327                let result = FuzzyMatchResult {
328                    end: m.end,
329                    similarity: m.similarity,
330                    insertions: m.insertions,
331                    deletions: m.deletions,
332                    substitutions: m.substitutions,
333                    swaps: m.swaps,
334                };
335
336                cached
337                    .by_pattern_and_start
338                    .entry((pattern_idx, m.start))
339                    .or_default()
340                    .push(result);
341            }
342        }
343
344        // Sort each entry by similarity (highest first), then by length (longest first)
345        // Longer matches are preferred when similarities are equal, as they allow
346        // subsequent pattern parts to match correctly
347        for matches in cached.by_pattern_and_start.values_mut() {
348            matches.sort_by(|a, b| match b.similarity.partial_cmp(&a.similarity) {
349                Some(std::cmp::Ordering::Equal) | None => b.end.cmp(&a.end),
350                Some(ord) => ord,
351            });
352        }
353
354        cached
355    }
356
357    /// Search for fuzzy matches at a specific position and return as `CachedMatches`.
358    ///
359    /// This is optimized for anchored patterns where we only need to match
360    /// at one position (e.g., position 0 for `^` anchored patterns).
361    pub fn search_cached_at_position(
362        &self,
363        text: &str,
364        pos: usize,
365        threshold: f32,
366    ) -> CachedMatches {
367        let mut cached = CachedMatches::default();
368
369        if pos >= text.len() {
370            return cached;
371        }
372
373        let substring = &text[pos..];
374
375        for (pattern_idx, nfa) in self.automata.iter().enumerate() {
376            let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
377
378            // Use Bitap when available (faster for short patterns), fall back to NFA
379            let matches = if let Some(ref bitap) = self.bitap_matchers[pattern_idx] {
380                bitap.find_all(substring, pattern_threshold)
381            } else {
382                nfa.find_all(substring, pattern_threshold)
383            };
384
385            // Only keep matches starting at position 0 of the substring (which is `pos` in original text)
386            for m in matches {
387                if m.start != 0 {
388                    continue;
389                }
390
391                // Validate character class restrictions if present
392                if let Some(restriction) = self
393                    .edit_char_restrictions
394                    .get(pattern_idx)
395                    .and_then(|r| r.as_ref())
396                {
397                    let matched_text = &substring[m.start..m.end];
398                    if !self.validate_edit_chars(
399                        &self.patterns[pattern_idx],
400                        matched_text,
401                        restriction,
402                    ) {
403                        continue;
404                    }
405                }
406
407                let result = FuzzyMatchResult {
408                    end: pos + m.end,
409                    similarity: m.similarity,
410                    insertions: m.insertions,
411                    deletions: m.deletions,
412                    substitutions: m.substitutions,
413                    swaps: m.swaps,
414                };
415
416                cached
417                    .by_pattern_and_start
418                    .entry((pattern_idx, pos))
419                    .or_default()
420                    .push(result);
421            }
422        }
423
424        // Sort each entry by similarity (highest first), then by length (longest first)
425        for matches in cached.by_pattern_and_start.values_mut() {
426            matches.sort_by(|a, b| match b.similarity.partial_cmp(&a.similarity) {
427                Some(std::cmp::Ordering::Equal) | None => b.end.cmp(&a.end),
428                Some(ord) => ord,
429            });
430        }
431
432        cached
433    }
434
435    /// Fast non-overlapping search optimized for iteration (greedy leftmost).
436    ///
437    /// Returns matches directly as a Vec, avoiding the `HashMap` overhead.
438    /// Uses optimized Bitap path when available.
439    ///
440    /// When `require_first_char` is true, matches must start with the same first
441    /// character as the pattern. This filters out spurious matches like "bore"
442    /// when searching for "Lorem".
443    pub fn search_non_overlapping(
444        &self,
445        text: &str,
446        threshold: f32,
447        pattern_idx: usize,
448        require_first_char: bool,
449    ) -> Vec<super::damlev::DamLevMatch> {
450        if pattern_idx >= self.automata.len() {
451            return Vec::new();
452        }
453
454        let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
455
456        // Use optimized Bitap non-overlapping search when available
457        if let Some(ref bitap) = self.bitap_matchers[pattern_idx] {
458            let matches =
459                bitap.find_all_non_overlapping(text, pattern_threshold, require_first_char);
460
461            // Validate character class restrictions if present
462            if let Some(restriction) = self
463                .edit_char_restrictions
464                .get(pattern_idx)
465                .and_then(|r| r.as_ref())
466            {
467                return matches
468                    .into_iter()
469                    .filter(|m| {
470                        let matched_text = &text[m.start..m.end];
471                        self.validate_edit_chars(
472                            &self.patterns[pattern_idx],
473                            matched_text,
474                            restriction,
475                        )
476                    })
477                    .collect();
478            }
479
480            return matches;
481        }
482
483        // Fallback to NFA
484        self.automata[pattern_idx].find_all(text, pattern_threshold)
485    }
486
487    /// Find up to `n` non-overlapping matches for a single pattern.
488    ///
489    /// This is more efficient than `search_non_overlapping` when only a limited
490    /// number of matches is needed, as it stops searching after finding `n` matches.
491    pub fn search_non_overlapping_n(
492        &self,
493        text: &str,
494        threshold: f32,
495        pattern_idx: usize,
496        require_first_char: bool,
497        n: usize,
498    ) -> Vec<super::damlev::DamLevMatch> {
499        if n == 0 || pattern_idx >= self.automata.len() {
500            return Vec::new();
501        }
502
503        let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
504
505        // Use optimized Bitap non-overlapping search with limit when available
506        if let Some(ref bitap) = self.bitap_matchers[pattern_idx] {
507            let matches =
508                bitap.find_n_non_overlapping(text, pattern_threshold, require_first_char, n);
509
510            // Validate character class restrictions if present
511            if let Some(restriction) = self
512                .edit_char_restrictions
513                .get(pattern_idx)
514                .and_then(|r| r.as_ref())
515            {
516                return matches
517                    .into_iter()
518                    .filter(|m| {
519                        let matched_text = &text[m.start..m.end];
520                        self.validate_edit_chars(
521                            &self.patterns[pattern_idx],
522                            matched_text,
523                            restriction,
524                        )
525                    })
526                    .collect();
527            }
528
529            return matches;
530        }
531
532        // Fallback to NFA with limit
533        self.automata[pattern_idx].find_n(text, pattern_threshold, n)
534    }
535
536    /// Find the first match in text (fast path for single-match queries).
537    ///
538    /// This is optimized for `find()` calls where only the first match is needed.
539    /// Uses early-exit to avoid scanning the entire text after finding a match.
540    /// Returns None if no match is found.
541    pub fn search_first(
542        &self,
543        text: &str,
544        threshold: f32,
545        pattern_idx: usize,
546    ) -> Option<super::damlev::DamLevMatch> {
547        if pattern_idx >= self.automata.len() {
548            return None;
549        }
550
551        // Fast path: try exact substring match first
552        // This is much faster than Bitap when the pattern exists verbatim in text
553        let pattern = &self.patterns[pattern_idx];
554        if let Some(pos) = text.find(pattern) {
555            // Verify exact match meets threshold (always does for similarity=1.0)
556            let sim = 1.0f32;
557            if sim >= threshold {
558                // Check if pattern text fits within text bounds
559                let end = pos + pattern.len();
560                if end <= text.len() {
561                    return Some(super::damlev::DamLevMatch {
562                        start: pos,
563                        end,
564                        insertions: 0,
565                        deletions: 0,
566                        substitutions: 0,
567                        swaps: 0,
568                        similarity: sim,
569                    });
570                }
571            }
572        }
573
574        let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
575
576        // Use Bitap for first match - needed for fuzzy matching
577        if let Some(ref bitap) = self.bitap_matchers[pattern_idx] {
578            let result = bitap.find_first_non_overlapping(text, pattern_threshold);
579
580            // Validate character class restrictions if present
581            if self.has_char_restrictions
582                && let Some(restriction) = self
583                    .edit_char_restrictions
584                    .get(pattern_idx)
585                    .and_then(|r| r.as_ref())
586            {
587                return result.filter(|m| {
588                    let matched_text = &text[m.start..m.end];
589                    self.validate_edit_chars(&self.patterns[pattern_idx], matched_text, restriction)
590                });
591            }
592
593            return result;
594        }
595
596        // Fallback to NFA
597        self.automata[pattern_idx].find_first(text, pattern_threshold)
598    }
599
600    /// Find best non-overlapping matches, preferring highest similarity.
601    ///
602    /// This method finds all candidates then selects the best non-overlapping set,
603    /// ensuring higher similarity matches are preferred over lower ones.
604    ///
605    /// When `require_first_char` is true (default), matches must start with the same
606    /// first character as the pattern. This filters out spurious matches like "bore"
607    /// when searching for "Lorem".
608    pub fn search_best_non_overlapping(
609        &self,
610        text: &str,
611        threshold: f32,
612        pattern_idx: usize,
613        require_first_char: bool,
614    ) -> Vec<super::damlev::DamLevMatch> {
615        if pattern_idx >= self.automata.len() {
616            return Vec::new();
617        }
618
619        let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
620
621        // Use optimized Bitap best-match selection when available
622        if let Some(ref bitap) = self.bitap_matchers[pattern_idx] {
623            let matches =
624                bitap.find_best_non_overlapping(text, pattern_threshold, require_first_char);
625
626            // Validate character class restrictions if present
627            if let Some(restriction) = self
628                .edit_char_restrictions
629                .get(pattern_idx)
630                .and_then(|r| r.as_ref())
631            {
632                return matches
633                    .into_iter()
634                    .filter(|m| {
635                        let matched_text = &text[m.start..m.end];
636                        self.validate_edit_chars(
637                            &self.patterns[pattern_idx],
638                            matched_text,
639                            restriction,
640                        )
641                    })
642                    .collect();
643            }
644
645            return matches;
646        }
647
648        // Fallback: get all matches from NFA and select best non-overlapping
649        let mut all_matches = self.automata[pattern_idx].find_all(text, pattern_threshold);
650
651        // Filter: require first character to match pattern's first char
652        // Respects case_insensitive setting
653        if require_first_char {
654            let pattern = &self.patterns[pattern_idx];
655            if let Some(pattern_first) = pattern.chars().next() {
656                let case_insensitive = self.case_insensitive;
657                all_matches.retain(|m| {
658                    if let Some(match_first) = text[m.start..m.end].chars().next() {
659                        if case_insensitive {
660                            match_first.eq_ignore_ascii_case(&pattern_first)
661                        } else {
662                            match_first == pattern_first
663                        }
664                    } else {
665                        false
666                    }
667                });
668            }
669        }
670
671        // Sort by similarity descending
672        all_matches.sort_by(|a, b| match b.similarity.partial_cmp(&a.similarity) {
673            Some(std::cmp::Ordering::Equal) | None => a.start.cmp(&b.start),
674            Some(ord) => ord,
675        });
676
677        // Greedily select non-overlapping
678        let mut result = Vec::new();
679        let mut occupied = vec![false; text.len() + 1];
680
681        for m in all_matches {
682            let overlaps = (m.start..m.end).any(|i| occupied[i]);
683            if !overlaps {
684                for i in m.start..m.end {
685                    occupied[i] = true;
686                }
687                result.push(m);
688            }
689        }
690
691        result.sort_by_key(|m| m.start);
692        result
693    }
694
695    /// Search using prefilter candidates for faster matching.
696    ///
697    /// Uses prefilter to identify candidate positions, then searches with NFA.
698    pub fn search_all_with_prefilter(
699        &self,
700        text: &str,
701        threshold: f32,
702        prefilter: &super::prefilter::Prefilter,
703    ) -> CachedMatches {
704        let mut cached = CachedMatches::default();
705
706        // Collect candidate positions from prefilter
707        let max_offset = prefilter.max_offset();
708        let candidates: FxHashSet<usize> = prefilter
709            .find_candidates(text.as_bytes())
710            .flat_map(|pos| pos..=pos.saturating_add(max_offset))
711            .collect();
712
713        if candidates.is_empty() {
714            return cached;
715        }
716
717        let mut buffers = self.search_buffers.borrow_mut();
718
719        for (pattern_idx, nfa) in self.automata.iter().enumerate() {
720            let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
721            let matches = nfa.find_all_with_candidates_buffered(
722                text,
723                pattern_threshold,
724                &candidates,
725                &mut buffers,
726            );
727
728            for m in matches {
729                if let Some(restriction) = self
730                    .edit_char_restrictions
731                    .get(pattern_idx)
732                    .and_then(|r| r.as_ref())
733                {
734                    let matched_text = &text[m.start..m.end];
735                    if !self.validate_edit_chars(
736                        &self.patterns[pattern_idx],
737                        matched_text,
738                        restriction,
739                    ) {
740                        continue;
741                    }
742                }
743
744                let result = FuzzyMatchResult {
745                    end: m.end,
746                    similarity: m.similarity,
747                    insertions: m.insertions,
748                    deletions: m.deletions,
749                    substitutions: m.substitutions,
750                    swaps: m.swaps,
751                };
752
753                cached
754                    .by_pattern_and_start
755                    .entry((pattern_idx, m.start))
756                    .or_default()
757                    .push(result);
758            }
759        }
760
761        // Sort each entry by similarity (highest first), then by length (longest first)
762        for matches in cached.by_pattern_and_start.values_mut() {
763            matches.sort_by(|a, b| match b.similarity.partial_cmp(&a.similarity) {
764                Some(std::cmp::Ordering::Equal) | None => b.end.cmp(&a.end),
765                Some(ord) => ord,
766            });
767        }
768
769        cached
770    }
771
772    /// Find the first match using Guard NFA (fast path for single pattern).
773    ///
774    /// Returns immediately on first match - optimal for `find_first` operations.
775    #[inline]
776    pub fn find_first_guard_nfa(
777        &self,
778        text: &str,
779        threshold: f32,
780    ) -> Option<(usize, FuzzyMatchResult)> {
781        let guard_nfa = self.guard_nfa.as_ref()?;
782
783        guard_nfa.find_first(text, threshold).map(|m| {
784            (
785                m.start,
786                FuzzyMatchResult {
787                    end: m.end,
788                    similarity: m.similarity,
789                    insertions: m.insertions,
790                    deletions: m.deletions,
791                    substitutions: m.substitutions,
792                    swaps: m.swaps,
793                },
794            )
795        })
796    }
797
798    /// Find the first match for a single-pattern search using prefilter.
799    ///
800    /// This is optimized for first-match mode: returns as soon as a match is found
801    /// without scanning the entire text. Only works for single-pattern searches.
802    pub fn find_first_with_prefilter(
803        &self,
804        text: &str,
805        threshold: f32,
806        prefilter: &super::prefilter::Prefilter,
807    ) -> Option<(usize, FuzzyMatchResult)> {
808        if self.automata.len() != 1 {
809            return None; // Only works for single pattern
810        }
811
812        // Collect candidate positions from prefilter
813        let max_offset = prefilter.max_offset();
814        let candidates: FxHashSet<usize> = prefilter
815            .find_candidates(text.as_bytes())
816            .flat_map(|pos| pos..=pos.saturating_add(max_offset))
817            .collect();
818
819        if candidates.is_empty() {
820            return None;
821        }
822
823        let pattern_threshold = self.calculate_effective_threshold(0, threshold);
824
825        // Try Bitap first (much faster for short patterns)
826        if let Some(ref bitap) = self.bitap_matchers[0]
827            && let Some(m) = bitap.find_first_with_candidates(text, pattern_threshold, &candidates)
828        {
829            // Validate character class restrictions if present
830            let restriction = self.edit_char_restrictions.first().and_then(|r| r.as_ref());
831            let validation_passed = restriction.is_none_or(|r| {
832                let matched_text = &text[m.start..m.end];
833                self.validate_edit_chars(&self.patterns[0], matched_text, r)
834            });
835
836            if validation_passed {
837                return Some((
838                    m.start,
839                    FuzzyMatchResult {
840                        end: m.end,
841                        similarity: m.similarity,
842                        insertions: m.insertions,
843                        deletions: m.deletions,
844                        substitutions: m.substitutions,
845                        swaps: m.swaps,
846                    },
847                ));
848            }
849            // Fall through to NFA if validation failed
850        }
851
852        // Fall back to Levenshtein NFA
853        let nfa = &self.automata[0];
854        if let Some(m) = nfa.find_first_with_candidates(text, pattern_threshold, &candidates) {
855            // Validate character class restrictions if present
856            if let Some(restriction) = self.edit_char_restrictions.first().and_then(|r| r.as_ref())
857            {
858                let matched_text = &text[m.start..m.end];
859                if !self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
860                    return None;
861                }
862            }
863
864            return Some((
865                m.start,
866                FuzzyMatchResult {
867                    end: m.end,
868                    similarity: m.similarity,
869                    insertions: m.insertions,
870                    deletions: m.deletions,
871                    substitutions: m.substitutions,
872                    swaps: m.swaps,
873                },
874            ));
875        }
876
877        None
878    }
879
880    /// Search for matches starting from a single position.
881    ///
882    /// Returns the best match starting at the given position, or None if no match.
883    /// This is used for greedy first-match mode.
884    pub fn search_at_position(
885        &self,
886        text: &str,
887        start_pos: usize,
888        threshold: f32,
889    ) -> Option<(usize, FuzzyMatchResult)> {
890        if self.automata.len() != 1 {
891            return None; // Only works for single pattern
892        }
893
894        let pattern_threshold = self.calculate_effective_threshold(0, threshold);
895
896        // Create a single-position candidate set
897        let candidates: FxHashSet<usize> = std::iter::once(start_pos).collect();
898
899        // Try Bitap first (much faster for short patterns)
900        if let Some(ref bitap) = self.bitap_matchers[0]
901            && let Some(m) = bitap.find_first_with_candidates(text, pattern_threshold, &candidates)
902            && m.start == start_pos
903        {
904            // Validate character class restrictions if present
905            if let Some(restriction) = self.edit_char_restrictions.first().and_then(|r| r.as_ref())
906            {
907                let matched_text = &text[m.start..m.end];
908                if self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
909                    return Some((
910                        m.start,
911                        FuzzyMatchResult {
912                            end: m.end,
913                            similarity: m.similarity,
914                            insertions: m.insertions,
915                            deletions: m.deletions,
916                            substitutions: m.substitutions,
917                            swaps: m.swaps,
918                        },
919                    ));
920                }
921            } else {
922                return Some((
923                    m.start,
924                    FuzzyMatchResult {
925                        end: m.end,
926                        similarity: m.similarity,
927                        insertions: m.insertions,
928                        deletions: m.deletions,
929                        substitutions: m.substitutions,
930                        swaps: m.swaps,
931                    },
932                ));
933            }
934        }
935
936        // Fall back to Levenshtein NFA
937        let nfa = &self.automata[0];
938
939        // Find all matches starting at this position and pick the best
940        let mut buffers = self.search_buffers.borrow_mut();
941        let matches = nfa.find_all_with_candidates_buffered(
942            text,
943            pattern_threshold,
944            &candidates,
945            &mut buffers,
946        );
947
948        // Find the best match (highest similarity)
949        let best = matches
950            .into_iter()
951            .filter(|m| m.start == start_pos)
952            .max_by(|a, b| {
953                a.similarity
954                    .partial_cmp(&b.similarity)
955                    .unwrap_or(std::cmp::Ordering::Equal)
956            })?;
957
958        // Validate character class restrictions if present
959        if let Some(restriction) = self.edit_char_restrictions.first().and_then(|r| r.as_ref()) {
960            let matched_text = &text[best.start..best.end];
961            if !self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
962                return None;
963            }
964        }
965
966        Some((
967            best.start,
968            FuzzyMatchResult {
969                end: best.end,
970                similarity: best.similarity,
971                insertions: best.insertions,
972                deletions: best.deletions,
973                substitutions: best.substitutions,
974                swaps: best.swaps,
975            },
976        ))
977    }
978
979    /// Ultra-fast search at a single position using optimized Bitap.
980    ///
981    /// This method avoids all allocations for the common case and is
982    /// designed for the greedy-first hot path.
983    #[inline]
984    pub fn search_at_position_fast(
985        &self,
986        text: &[u8],
987        start_pos: usize,
988        threshold: f32,
989    ) -> Option<(usize, FuzzyMatchResult)> {
990        if self.automata.len() != 1 {
991            return None;
992        }
993
994        let pattern_threshold = self.calculate_effective_threshold(0, threshold);
995
996        // Use optimized Bitap if available
997        if let Some(ref bitap) = self.bitap_matchers[0]
998            && let Some(m) = bitap.find_at_byte_position(text, start_pos, pattern_threshold)
999        {
1000            // Skip char restriction validation for speed in common case
1001            // (restrictions are rare)
1002            if self
1003                .edit_char_restrictions
1004                .first()
1005                .and_then(|r| r.as_ref())
1006                .is_none()
1007            {
1008                return Some((
1009                    m.start,
1010                    FuzzyMatchResult {
1011                        end: m.end,
1012                        similarity: m.similarity,
1013                        insertions: m.insertions,
1014                        deletions: m.deletions,
1015                        substitutions: m.substitutions,
1016                        swaps: m.swaps,
1017                    },
1018                ));
1019            }
1020
1021            // Validate character class restrictions
1022            if let Ok(text_str) = std::str::from_utf8(text)
1023                && let Some(restriction) = &self.edit_char_restrictions[0]
1024            {
1025                let matched_text = &text_str[m.start..m.end];
1026                if self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
1027                    return Some((
1028                        m.start,
1029                        FuzzyMatchResult {
1030                            end: m.end,
1031                            similarity: m.similarity,
1032                            insertions: m.insertions,
1033                            deletions: m.deletions,
1034                            substitutions: m.substitutions,
1035                            swaps: m.swaps,
1036                        },
1037                    ));
1038                }
1039            }
1040        }
1041
1042        None
1043    }
1044
1045    /// Find first match using streaming Bitap (single-pass, O(n*k)).
1046    /// Falls back to Boyer-Moore for very short texts where setup overhead matters.
1047    #[inline]
1048    pub fn find_first_boyer_moore(
1049        &self,
1050        text: &[u8],
1051        threshold: f32,
1052        _max_offset: usize,
1053    ) -> Option<(usize, FuzzyMatchResult)> {
1054        if self.automata.len() != 1 {
1055            return None;
1056        }
1057
1058        let bitap = self.bitap_matchers[0].as_ref()?;
1059        let pattern_threshold = self.calculate_effective_threshold(0, threshold);
1060        let has_restrictions = self
1061            .edit_char_restrictions
1062            .first()
1063            .and_then(|r| r.as_ref())
1064            .is_some();
1065
1066        // Use streaming search for better performance on longer texts
1067        // Streaming is O(n*k) single pass vs Boyer-Moore which is O(n*k*w) with many candidates
1068        if let Some(m) = bitap.find_first_streaming(text, pattern_threshold) {
1069            // Quick path: no restrictions
1070            if !has_restrictions {
1071                return Some((
1072                    m.start,
1073                    FuzzyMatchResult {
1074                        end: m.end,
1075                        similarity: m.similarity,
1076                        insertions: m.insertions,
1077                        deletions: m.deletions,
1078                        substitutions: m.substitutions,
1079                        swaps: m.swaps,
1080                    },
1081                ));
1082            }
1083
1084            // Validate restrictions
1085            if let Ok(text_str) = std::str::from_utf8(text)
1086                && let Some(restriction) = &self.edit_char_restrictions[0]
1087            {
1088                let matched_text = &text_str[m.start..m.end];
1089                if self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
1090                    return Some((
1091                        m.start,
1092                        FuzzyMatchResult {
1093                            end: m.end,
1094                            similarity: m.similarity,
1095                            insertions: m.insertions,
1096                            deletions: m.deletions,
1097                            substitutions: m.substitutions,
1098                            swaps: m.swaps,
1099                        },
1100                    ));
1101                }
1102            }
1103        }
1104
1105        None
1106    }
1107
1108    /// Find first match using lazy streaming search.
1109    ///
1110    /// Processes prefilter candidates one at a time, returning immediately
1111    /// when a match is found. This is optimal when matches occur early in the text.
1112    #[inline]
1113    pub fn find_first_lazy(
1114        &self,
1115        text: &[u8],
1116        threshold: f32,
1117        prefilter: &super::prefilter::Prefilter,
1118    ) -> Option<(usize, FuzzyMatchResult)> {
1119        if self.automata.len() != 1 {
1120            return None;
1121        }
1122
1123        let bitap = self.bitap_matchers.first()?.as_ref()?;
1124        let pattern_threshold = self.calculate_effective_threshold(0, threshold);
1125        let max_offset = prefilter.max_offset();
1126        let has_restrictions = self
1127            .edit_char_restrictions
1128            .first()
1129            .and_then(|r| r.as_ref())
1130            .is_some();
1131
1132        // Check if prefilter would be ineffective (e.g., DNA with small alphabet)
1133        // If searching for 3+ bytes with max_offset > 0, prefilter may generate many false positives
1134        // In that case, streaming Bitap is more efficient
1135        let use_streaming = match prefilter {
1136            super::prefilter::Prefilter::ThreeBytes {
1137                max_offset: off, ..
1138            } if *off > 0 => true,
1139            super::prefilter::Prefilter::MultiBytes {
1140                max_offset: off, ..
1141            } if *off > 0 => true,
1142            _ => false,
1143        };
1144
1145        if use_streaming {
1146            // Fall back to streaming Bitap for better performance
1147            return self.find_first_boyer_moore(text, threshold, max_offset);
1148        }
1149
1150        // Track positions we've already tried to avoid duplicates
1151        let mut last_tried: Option<usize> = None;
1152
1153        // Process candidates lazily - return on first match
1154        for candidate in prefilter.find_candidates(text) {
1155            // Try positions from (candidate - max_offset) to candidate
1156            // Search backwards first (most likely match position)
1157            for back in 0..=max_offset {
1158                let pos = candidate.saturating_sub(back);
1159
1160                // Skip UTF-8 continuation bytes
1161                if pos > 0 && (text[pos] & 0b1100_0000) == 0b1000_0000 {
1162                    continue;
1163                }
1164
1165                // Skip if we already tried this position
1166                if last_tried == Some(pos) {
1167                    continue;
1168                }
1169                last_tried = Some(pos);
1170
1171                // Try Bitap at this position
1172                if let Some(m) = bitap.find_at_byte_position(text, pos, pattern_threshold) {
1173                    // Quick path: no restrictions
1174                    if !has_restrictions {
1175                        return Some((
1176                            m.start,
1177                            FuzzyMatchResult {
1178                                end: m.end,
1179                                similarity: m.similarity,
1180                                insertions: m.insertions,
1181                                deletions: m.deletions,
1182                                substitutions: m.substitutions,
1183                                swaps: m.swaps,
1184                            },
1185                        ));
1186                    }
1187
1188                    // Validate restrictions
1189                    if let Ok(text_str) = std::str::from_utf8(text)
1190                        && let Some(restriction) = &self.edit_char_restrictions[0]
1191                    {
1192                        let matched_text = &text_str[m.start..m.end];
1193                        if self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
1194                            return Some((
1195                                m.start,
1196                                FuzzyMatchResult {
1197                                    end: m.end,
1198                                    similarity: m.similarity,
1199                                    insertions: m.insertions,
1200                                    deletions: m.deletions,
1201                                    substitutions: m.substitutions,
1202                                    swaps: m.swaps,
1203                                },
1204                            ));
1205                        }
1206                    }
1207                }
1208            }
1209
1210            // Also try forward positions (for deletions from pattern start)
1211            for fwd in 1..=max_offset {
1212                let pos = candidate + fwd;
1213                if pos >= text.len() {
1214                    continue;
1215                }
1216
1217                // Skip UTF-8 continuation bytes
1218                if (text[pos] & 0b1100_0000) == 0b1000_0000 {
1219                    continue;
1220                }
1221
1222                if last_tried == Some(pos) {
1223                    continue;
1224                }
1225                last_tried = Some(pos);
1226
1227                if let Some(m) = bitap.find_at_byte_position(text, pos, pattern_threshold) {
1228                    if !has_restrictions {
1229                        return Some((
1230                            m.start,
1231                            FuzzyMatchResult {
1232                                end: m.end,
1233                                similarity: m.similarity,
1234                                insertions: m.insertions,
1235                                deletions: m.deletions,
1236                                substitutions: m.substitutions,
1237                                swaps: m.swaps,
1238                            },
1239                        ));
1240                    }
1241
1242                    if let Ok(text_str) = std::str::from_utf8(text)
1243                        && let Some(restriction) = &self.edit_char_restrictions[0]
1244                    {
1245                        let matched_text = &text_str[m.start..m.end];
1246                        if self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
1247                            return Some((
1248                                m.start,
1249                                FuzzyMatchResult {
1250                                    end: m.end,
1251                                    similarity: m.similarity,
1252                                    insertions: m.insertions,
1253                                    deletions: m.deletions,
1254                                    substitutions: m.substitutions,
1255                                    swaps: m.swaps,
1256                                },
1257                            ));
1258                        }
1259                    }
1260                }
1261            }
1262        }
1263
1264        None
1265    }
1266
1267    /// Find first match using batch parallel position search.
1268    ///
1269    /// Collects candidate positions from prefilter and processes them in batches
1270    /// using SIMD multi-position search for improved throughput.
1271    ///
1272    /// This is most effective for:
1273    /// - k=0 (exact match) where SIMD can process 2-4 positions per iteration
1274    /// - ASCII patterns
1275    /// - Many candidate positions
1276    #[inline]
1277    pub fn find_first_batch_parallel(
1278        &self,
1279        text: &[u8],
1280        threshold: f32,
1281        prefilter: &super::prefilter::Prefilter,
1282    ) -> Option<(usize, FuzzyMatchResult)> {
1283        if self.automata.len() != 1 {
1284            return None;
1285        }
1286
1287        let bitap = self.bitap_matchers.first()?.as_ref()?;
1288        let pattern_threshold = self.calculate_effective_threshold(0, threshold);
1289        let max_offset = prefilter.max_offset();
1290
1291        // Collect candidate positions
1292        // Must search BOTH directions from candidate:
1293        // - Forward: for deletions from pattern start
1294        // - Backward: for insertions before pattern start
1295        let mut positions: Vec<usize> = Vec::with_capacity(64);
1296        let mut seen: FxHashSet<usize> = FxHashSet::default();
1297
1298        for candidate in prefilter.find_candidates(text) {
1299            // Search backwards (for insertions at match start)
1300            for back in 0..=max_offset {
1301                let pos = candidate.saturating_sub(back);
1302                if pos > 0 && (text[pos] & 0b1100_0000) == 0b1000_0000 {
1303                    continue;
1304                }
1305                if seen.insert(pos) {
1306                    positions.push(pos);
1307                }
1308            }
1309            // Search forwards (for deletions from pattern start)
1310            for fwd in 1..=max_offset {
1311                let pos = candidate + fwd;
1312                if pos >= text.len() {
1313                    continue;
1314                }
1315                if pos > 0 && (text[pos] & 0b1100_0000) == 0b1000_0000 {
1316                    continue;
1317                }
1318                if seen.insert(pos) {
1319                    positions.push(pos);
1320                }
1321            }
1322        }
1323
1324        if positions.is_empty() {
1325            return None;
1326        }
1327
1328        // Use batch parallel search
1329        if let Some((_idx, m)) =
1330            bitap.find_at_positions_parallel(text, &positions, pattern_threshold)
1331        {
1332            // Skip restriction validation in fast path (rare case)
1333            if self
1334                .edit_char_restrictions
1335                .first()
1336                .and_then(|r| r.as_ref())
1337                .is_none()
1338            {
1339                return Some((
1340                    m.start,
1341                    FuzzyMatchResult {
1342                        end: m.end,
1343                        similarity: m.similarity,
1344                        insertions: m.insertions,
1345                        deletions: m.deletions,
1346                        substitutions: m.substitutions,
1347                        swaps: m.swaps,
1348                    },
1349                ));
1350            }
1351
1352            // Validate edit char restrictions
1353            if let Ok(text_str) = std::str::from_utf8(text)
1354                && let Some(restriction) =
1355                    self.edit_char_restrictions.first().and_then(|r| r.as_ref())
1356            {
1357                let matched_text = &text_str[m.start..m.end];
1358                if self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
1359                    return Some((
1360                        m.start,
1361                        FuzzyMatchResult {
1362                            end: m.end,
1363                            similarity: m.similarity,
1364                            insertions: m.insertions,
1365                            deletions: m.deletions,
1366                            substitutions: m.substitutions,
1367                            swaps: m.swaps,
1368                        },
1369                    ));
1370                }
1371            }
1372        }
1373
1374        None
1375    }
1376
1377    /// Find first match across multiple patterns using parallel Bitap search.
1378    ///
1379    /// This is optimized for simple alternations where we can skip NFA simulation
1380    /// and just run Bitap for each pattern at candidate positions.
1381    ///
1382    /// Returns (`pattern_index`, start, `FuzzyMatchResult`) for the first match found.
1383    pub fn find_first_multi_pattern(
1384        &self,
1385        text: &[u8],
1386        threshold: f32,
1387        pattern_indices: &[usize],
1388        prefilter: &super::prefilter::Prefilter,
1389    ) -> Option<(usize, usize, FuzzyMatchResult)> {
1390        if pattern_indices.is_empty() {
1391            return None;
1392        }
1393
1394        let text_str = std::str::from_utf8(text).ok()?;
1395        let max_offset = prefilter.max_offset();
1396
1397        // Iterate through candidate positions from prefilter
1398        let mut last_tried: Option<usize> = None;
1399
1400        for candidate in prefilter.find_candidates(text) {
1401            // Try positions from candidate to candidate + max_offset
1402            for offset in 0..=max_offset {
1403                let pos = candidate + offset;
1404                if pos >= text.len() {
1405                    continue;
1406                }
1407
1408                // Skip if not on a char boundary or already tried
1409                if pos > 0 && (text[pos] & 0b1100_0000) == 0b1000_0000 {
1410                    continue;
1411                }
1412                if last_tried == Some(pos) {
1413                    continue;
1414                }
1415                last_tried = Some(pos);
1416
1417                // Try each pattern at this position
1418                for &pattern_idx in pattern_indices {
1419                    if pattern_idx >= self.bitap_matchers.len() {
1420                        continue;
1421                    }
1422
1423                    let pattern_threshold =
1424                        self.calculate_effective_threshold(pattern_idx, threshold);
1425
1426                    // Try Bitap first (fast path)
1427                    if let Some(ref bitap) = self.bitap_matchers[pattern_idx]
1428                        && let Some(m) = bitap.find_at_byte_position(text, pos, pattern_threshold)
1429                    {
1430                        // Check edit char restrictions
1431                        if let Some(restriction) = self
1432                            .edit_char_restrictions
1433                            .get(pattern_idx)
1434                            .and_then(|r| r.as_ref())
1435                        {
1436                            let matched_text = &text_str[m.start..m.end];
1437                            if !self.validate_edit_chars(
1438                                &self.patterns[pattern_idx],
1439                                matched_text,
1440                                restriction,
1441                            ) {
1442                                continue;
1443                            }
1444                        }
1445
1446                        return Some((
1447                            pattern_idx,
1448                            m.start,
1449                            FuzzyMatchResult {
1450                                end: m.end,
1451                                similarity: m.similarity,
1452                                insertions: m.insertions,
1453                                deletions: m.deletions,
1454                                substitutions: m.substitutions,
1455                                swaps: m.swaps,
1456                            },
1457                        ));
1458                    }
1459
1460                    // Fallback to Levenshtein NFA for patterns > 64 chars
1461                    if self
1462                        .bitap_matchers
1463                        .get(pattern_idx)
1464                        .is_none_or(Option::is_none)
1465                    {
1466                        let nfa = &self.automata[pattern_idx];
1467                        let candidates: FxHashSet<usize> = std::iter::once(pos).collect();
1468                        if let Some(m) =
1469                            nfa.find_first_with_candidates(text_str, pattern_threshold, &candidates)
1470                            && m.start == pos
1471                        {
1472                            // Check edit char restrictions
1473                            if let Some(restriction) = self
1474                                .edit_char_restrictions
1475                                .get(pattern_idx)
1476                                .and_then(|r| r.as_ref())
1477                            {
1478                                let matched_text = &text_str[m.start..m.end];
1479                                if !self.validate_edit_chars(
1480                                    &self.patterns[pattern_idx],
1481                                    matched_text,
1482                                    restriction,
1483                                ) {
1484                                    continue;
1485                                }
1486                            }
1487
1488                            return Some((
1489                                pattern_idx,
1490                                m.start,
1491                                FuzzyMatchResult {
1492                                    end: m.end,
1493                                    similarity: m.similarity,
1494                                    insertions: m.insertions,
1495                                    deletions: m.deletions,
1496                                    substitutions: m.substitutions,
1497                                    swaps: m.swaps,
1498                                },
1499                            ));
1500                        }
1501                    }
1502                }
1503            }
1504        }
1505
1506        None
1507    }
1508
1509    /// Find the first match across multiple patterns by running each pattern's
1510    /// streaming search individually. This is used when the multi-pattern prefilter
1511    /// would be ineffective (too many common bytes).
1512    ///
1513    /// Returns (`pattern_index`, start, `FuzzyMatchResult`) for the earliest match found.
1514    pub fn find_first_multi_pattern_individual(
1515        &self,
1516        text: &[u8],
1517        threshold: f32,
1518        pattern_indices: &[usize],
1519    ) -> Option<(usize, usize, FuzzyMatchResult)> {
1520        if pattern_indices.is_empty() {
1521            return None;
1522        }
1523
1524        let mut best: Option<(usize, usize, FuzzyMatchResult)> = None;
1525
1526        // Run each pattern's streaming search individually
1527        for &pattern_idx in pattern_indices {
1528            if pattern_idx >= self.bitap_matchers.len() {
1529                continue;
1530            }
1531
1532            let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
1533
1534            // Use Bitap streaming for each pattern
1535            let Some(bitap) = self.bitap_matchers[pattern_idx].as_ref() else {
1536                continue;
1537            };
1538
1539            // Try Bitap streaming first
1540            if let Some(m) = bitap.find_first_streaming(text, pattern_threshold) {
1541                // Check edit char restrictions
1542                let restriction = self
1543                    .edit_char_restrictions
1544                    .get(pattern_idx)
1545                    .and_then(|r| r.as_ref());
1546                let validation_passed = match (restriction, std::str::from_utf8(text)) {
1547                    (Some(r), Ok(text_str)) => {
1548                        let matched_text = &text_str[m.start..m.end];
1549                        self.validate_edit_chars(&self.patterns[pattern_idx], matched_text, r)
1550                    }
1551                    _ => true, // No restriction or invalid UTF-8, pass validation
1552                };
1553
1554                if validation_passed {
1555                    let result = FuzzyMatchResult {
1556                        end: m.end,
1557                        similarity: m.similarity,
1558                        insertions: m.insertions,
1559                        deletions: m.deletions,
1560                        substitutions: m.substitutions,
1561                        swaps: m.swaps,
1562                    };
1563                    // Only prefer earlier start position; for same position, first pattern wins
1564                    // (matches mrab-regex behavior where pattern order matters)
1565                    if best
1566                        .as_ref()
1567                        .is_none_or(|(_, best_start, _)| m.start < *best_start)
1568                    {
1569                        best = Some((pattern_idx, m.start, result));
1570                    }
1571                    continue;
1572                }
1573                // Fall through to NFA if validation failed
1574            }
1575
1576            // Fallback to Levenshtein NFA if Bitap didn't find a match
1577            if pattern_idx < self.automata.len() {
1578                let nfa = &self.automata[pattern_idx];
1579                if let Ok(text_str) = std::str::from_utf8(text) {
1580                    // Find all matches and take the earliest one
1581                    let matches = nfa.find_all(text_str, pattern_threshold);
1582                    if let Some(m) = matches.into_iter().min_by_key(|m| m.start) {
1583                        // Check edit char restrictions
1584                        if let Some(restriction) = self
1585                            .edit_char_restrictions
1586                            .get(pattern_idx)
1587                            .and_then(|r| r.as_ref())
1588                        {
1589                            let matched_text = &text_str[m.start..m.end];
1590                            if !self.validate_edit_chars(
1591                                &self.patterns[pattern_idx],
1592                                matched_text,
1593                                restriction,
1594                            ) {
1595                                continue;
1596                            }
1597                        }
1598
1599                        let result = FuzzyMatchResult {
1600                            end: m.end,
1601                            similarity: m.similarity,
1602                            insertions: m.insertions,
1603                            deletions: m.deletions,
1604                            substitutions: m.substitutions,
1605                            swaps: m.swaps,
1606                        };
1607                        // Only prefer earlier start position; for same position, first pattern wins
1608                        if best
1609                            .as_ref()
1610                            .is_none_or(|(_, best_start, _)| m.start < *best_start)
1611                        {
1612                            best = Some((pattern_idx, m.start, result));
1613                        }
1614                    }
1615                }
1616            }
1617        }
1618
1619        best
1620    }
1621
1622    /// Calculate the minimum effective threshold across all patterns.
1623    ///
1624    /// This returns the lowest threshold that could match any pattern,
1625    /// useful for early-exit optimizations.
1626    #[must_use]
1627    pub fn calculate_min_effective_threshold(&self, user_threshold: f32) -> f32 {
1628        let mut min_threshold = user_threshold;
1629
1630        for idx in 0..self.patterns.len() {
1631            let pattern_threshold = self.calculate_effective_threshold(idx, user_threshold);
1632            if pattern_threshold < min_threshold {
1633                min_threshold = pattern_threshold;
1634            }
1635        }
1636
1637        min_threshold
1638    }
1639
1640    /// Search for all matches across multiple patterns efficiently.
1641    ///
1642    /// This is optimized for the multi-pattern case by processing all patterns
1643    /// in parallel, avoiding redundant text scans.
1644    ///
1645    /// Returns a map of (`pattern_index`, start) -> `Vec<FuzzyMatchResult>`.
1646    pub fn search_all_multi_pattern(
1647        &self,
1648        text: &str,
1649        threshold: f32,
1650        pattern_indices: &[usize],
1651    ) -> CachedMatches {
1652        let mut cached = CachedMatches::default();
1653
1654        if pattern_indices.is_empty() {
1655            return cached;
1656        }
1657
1658        // For small number of patterns, parallel individual search is efficient
1659        // For larger pattern sets, a true Aho-Corasick automaton would be better
1660        for &pattern_idx in pattern_indices {
1661            if pattern_idx >= self.automata.len() {
1662                continue;
1663            }
1664
1665            let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
1666
1667            // Use Bitap when available (faster for short patterns)
1668            let matches = if let Some(bitap) = self
1669                .bitap_matchers
1670                .get(pattern_idx)
1671                .and_then(|b| b.as_ref())
1672            {
1673                bitap.find_all(text, pattern_threshold)
1674            } else {
1675                self.automata[pattern_idx].find_all(text, pattern_threshold)
1676            };
1677
1678            for m in matches {
1679                // Validate character class restrictions if present
1680                if let Some(restriction) = self
1681                    .edit_char_restrictions
1682                    .get(pattern_idx)
1683                    .and_then(|r| r.as_ref())
1684                {
1685                    let matched_text = &text[m.start..m.end];
1686                    if !self.validate_edit_chars(
1687                        &self.patterns[pattern_idx],
1688                        matched_text,
1689                        restriction,
1690                    ) {
1691                        continue;
1692                    }
1693                }
1694
1695                let result = FuzzyMatchResult {
1696                    end: m.end,
1697                    similarity: m.similarity,
1698                    insertions: m.insertions,
1699                    deletions: m.deletions,
1700                    substitutions: m.substitutions,
1701                    swaps: m.swaps,
1702                };
1703
1704                cached
1705                    .by_pattern_and_start
1706                    .entry((pattern_idx, m.start))
1707                    .or_default()
1708                    .push(result);
1709            }
1710        }
1711
1712        // Sort each entry by similarity (highest first)
1713        for matches in cached.by_pattern_and_start.values_mut() {
1714            matches.sort_by(|a, b| {
1715                b.similarity
1716                    .partial_cmp(&a.similarity)
1717                    .unwrap_or(std::cmp::Ordering::Equal)
1718            });
1719        }
1720
1721        cached
1722    }
1723
1724    /// Find a fuzzy match using cached results.
1725    pub fn find_at_cached(
1726        &self,
1727        cached: &CachedMatches,
1728        pattern_index: usize,
1729        from: usize,
1730    ) -> Option<FuzzyMatchResult> {
1731        cached.get(pattern_index, from).cloned()
1732    }
1733
1734    /// Find a fuzzy match for a specific pattern at a position.
1735    pub fn find_at(
1736        &self,
1737        text: &str,
1738        pattern_index: usize,
1739        from: usize,
1740        threshold: f32,
1741    ) -> Option<FuzzyMatchResult> {
1742        let pattern = self.patterns.get(pattern_index)?;
1743
1744        // Fast path for exact matching (no fuzzy edits allowed)
1745        // Just check if the pattern is a prefix of the substring
1746        let max_edits = self.limits.get(pattern_index).and_then(|lim| {
1747            lim.as_ref().map(|l| {
1748                l.get_edits().unwrap_or_else(|| {
1749                    let i = l.get_insertions().unwrap_or(0);
1750                    let d = l.get_deletions().unwrap_or(0);
1751                    let s = l.get_substitutions().unwrap_or(0);
1752                    let t = l.get_swaps().unwrap_or(0);
1753                    i.saturating_add(d).saturating_add(s).saturating_add(t)
1754                })
1755            })
1756        });
1757
1758        // Handle empty/exhausted text: can still match via pure deletions
1759        if from >= text.len() {
1760            // Check if we can delete the entire pattern
1761            let pattern_char_len = pattern.chars().count();
1762            if let Some(max) = max_edits
1763                && pattern_char_len <= max as usize
1764            {
1765                // Calculate similarity for deleting entire pattern
1766                let deletions = pattern_char_len as u8;
1767                let max_len = pattern_char_len.max(1) as f32;
1768                let sim = (1.0 - f32::from(deletions) / max_len).max(0.0);
1769                if sim >= threshold {
1770                    return Some(FuzzyMatchResult {
1771                        end: from,
1772                        similarity: sim,
1773                        insertions: 0,
1774                        deletions,
1775                        substitutions: 0,
1776                        swaps: 0,
1777                    });
1778                }
1779            }
1780            return None;
1781        }
1782
1783        let substring = &text[from..];
1784
1785        if max_edits.is_none() || max_edits == Some(0) {
1786            // Exact matching - just check if pattern is prefix
1787            if self.case_insensitive {
1788                if substring
1789                    .chars()
1790                    .take(pattern.chars().count())
1791                    .zip(pattern.chars())
1792                    .all(|(t, p)| t.to_lowercase().eq(p.to_lowercase()))
1793                {
1794                    let end_byte = pattern.len().min(substring.len());
1795                    // Make sure we end on a char boundary
1796                    let actual_end = if end_byte <= substring.len() {
1797                        let mut e = end_byte;
1798                        while e < substring.len() && !substring.is_char_boundary(e) {
1799                            e += 1;
1800                        }
1801                        e.min(substring.len())
1802                    } else {
1803                        end_byte
1804                    };
1805                    return Some(FuzzyMatchResult {
1806                        end: from + actual_end,
1807                        similarity: 1.0,
1808                        insertions: 0,
1809                        deletions: 0,
1810                        substitutions: 0,
1811                        swaps: 0,
1812                    });
1813                }
1814            } else if substring.starts_with(pattern) {
1815                return Some(FuzzyMatchResult {
1816                    end: from + pattern.len(),
1817                    similarity: 1.0,
1818                    insertions: 0,
1819                    deletions: 0,
1820                    substitutions: 0,
1821                    swaps: 0,
1822                });
1823            }
1824            return None;
1825        }
1826
1827        // Fuzzy matching path - need to search
1828        let nfa = self.automata.get(pattern_index)?;
1829        let effective_threshold = self.calculate_effective_threshold(pattern_index, threshold);
1830        let matches = nfa.find_all(substring, effective_threshold);
1831
1832        // Find match that starts at position 0 of the substring
1833        for m in matches {
1834            if m.start == 0 {
1835                // Validate character class restrictions
1836                if let Some(restriction) = self
1837                    .edit_char_restrictions
1838                    .get(pattern_index)
1839                    .and_then(|r| r.as_ref())
1840                {
1841                    let matched_text = &substring[m.start..m.end];
1842                    if !self.validate_edit_chars(
1843                        &self.patterns[pattern_index],
1844                        matched_text,
1845                        restriction,
1846                    ) {
1847                        continue;
1848                    }
1849                }
1850
1851                return Some(FuzzyMatchResult {
1852                    end: from + m.end,
1853                    similarity: m.similarity,
1854                    insertions: m.insertions,
1855                    deletions: m.deletions,
1856                    substitutions: m.substitutions,
1857                    swaps: m.swaps,
1858                });
1859            }
1860        }
1861
1862        None
1863    }
1864
1865    /// Find a fuzzy match that allows boundary insertions (for anchored patterns).
1866    /// Uses cached results to avoid O(N) per-call overhead.
1867    pub fn find_with_boundary_insertions(
1868        &self,
1869        text: &str,
1870        pattern_index: usize,
1871        from: usize,
1872        to: Option<usize>,
1873        threshold: f32,
1874        cached: Option<&CachedMatches>,
1875    ) -> Option<FuzzyMatchResult> {
1876        // If no cache, return None (don't do expensive search)
1877        let cached = cached?;
1878
1879        let limits = self.limits.get(pattern_index).and_then(|l| l.as_ref())?;
1880        let max_edits_val = limits.get_edits().unwrap_or_else(|| {
1881            let i = limits.get_insertions().unwrap_or(0);
1882            let d = limits.get_deletions().unwrap_or(0);
1883            let s = limits.get_substitutions().unwrap_or(0);
1884            let t = limits.get_swaps().unwrap_or(0);
1885            i.saturating_add(d).saturating_add(s).saturating_add(t)
1886        });
1887        let max_insertions = limits.get_insertions().unwrap_or(max_edits_val);
1888
1889        let effective_threshold = self.calculate_effective_threshold(pattern_index, threshold);
1890
1891        // Look for matches within the boundary window
1892        // A match starting before `from` could still be extended to include `from`
1893        let max_window = max_insertions as usize;
1894        let search_start = from.saturating_sub(max_window);
1895
1896        // Collect potential matches from cache
1897        let matches: Vec<_> = (search_start..=from)
1898            .filter_map(|start| {
1899                cached
1900                    .get_all(pattern_index, start)
1901                    .map(|results| results.iter().map(move |r| (start, r)))
1902            })
1903            .flatten()
1904            .collect();
1905
1906        let mut best: Option<FuzzyMatchResult> = None;
1907
1908        for (match_start, m) in matches {
1909            // Calculate boundary insertions
1910            // start_insertions: characters between match_start and from
1911            let start_insertions = (from.saturating_sub(match_start)) as u8;
1912            // end_insertions: characters between match end and expected end
1913            let end_insertions = if let Some(expected_end) = to {
1914                if m.end < expected_end {
1915                    (expected_end - m.end) as u8
1916                } else {
1917                    0
1918                }
1919            } else {
1920                0
1921            };
1922
1923            let total_boundary_insertions = start_insertions + end_insertions;
1924            let total_insertions = m.insertions + total_boundary_insertions;
1925            let total_edits =
1926                m.insertions + m.deletions + m.substitutions + total_boundary_insertions;
1927
1928            if total_edits > max_edits_val || total_insertions > max_insertions {
1929                continue;
1930            }
1931
1932            // Validate character class restrictions
1933            if let Some(restriction) = self
1934                .edit_char_restrictions
1935                .get(pattern_index)
1936                .and_then(|r| r.as_ref())
1937            {
1938                // Validate boundary chars at start
1939                let mut boundary_valid = true;
1940                if start_insertions > 0 && match_start < from {
1941                    for ch in text[match_start..from].chars() {
1942                        if !restriction.allows(ch) {
1943                            boundary_valid = false;
1944                            break;
1945                        }
1946                    }
1947                }
1948                // Validate boundary chars at end
1949                if boundary_valid
1950                    && end_insertions > 0
1951                    && let Some(expected_end) = to
1952                    && m.end < expected_end
1953                    && m.end < text.len()
1954                {
1955                    let end_slice_end = expected_end.min(text.len());
1956                    for ch in text[m.end..end_slice_end].chars() {
1957                        if !restriction.allows(ch) {
1958                            boundary_valid = false;
1959                            break;
1960                        }
1961                    }
1962                }
1963                if !boundary_valid {
1964                    continue;
1965                }
1966            }
1967
1968            // Calculate adjusted similarity
1969            let pattern_len = self.patterns[pattern_index].chars().count() as f32;
1970            let insertion_penalty = 0.5;
1971            let boundary_penalty = f32::from(total_boundary_insertions) * insertion_penalty;
1972            let adjusted_similarity = if pattern_len > 0.0 {
1973                ((pattern_len - boundary_penalty) / pattern_len).max(0.0) * m.similarity
1974            } else {
1975                m.similarity
1976            };
1977
1978            if adjusted_similarity < effective_threshold {
1979                continue;
1980            }
1981
1982            let result = FuzzyMatchResult {
1983                end: to.unwrap_or(m.end),
1984                similarity: adjusted_similarity,
1985                insertions: total_insertions,
1986                deletions: m.deletions,
1987                substitutions: m.substitutions,
1988                swaps: m.swaps,
1989            };
1990
1991            if best.as_ref().is_none_or(|b| {
1992                result.similarity > b.similarity
1993                    || (result.similarity == b.similarity && result.total_edits() < b.total_edits())
1994            }) {
1995                best = Some(result);
1996            }
1997        }
1998
1999        best
2000    }
2001
2002    /// Calculate an effective threshold.
2003    ///
2004    /// The user's threshold is always respected - both constraints must be satisfied:
2005    /// - similarity >= `user_threshold`
2006    /// - edits <= `max_edits` (from pattern syntax)
2007    #[allow(clippy::unused_self)]
2008    fn calculate_effective_threshold(&self, _pattern_index: usize, user_threshold: f32) -> f32 {
2009        // Previously this function tried to lower the threshold based on max_edits,
2010        // but this was incorrect - it allowed low-quality matches that the user
2011        // didn't want. The user's threshold should always be respected.
2012        user_threshold
2013    }
2014
2015    /// Get the pattern text for a given index.
2016    pub fn pattern_text(&self, index: usize) -> Option<&str> {
2017        self.patterns.get(index).map(String::as_str)
2018    }
2019
2020    /// Validate that all edit characters conform to the restriction.
2021    /// Uses Damerau-Levenshtein to properly detect transpositions.
2022    /// Optimized with ASCII fast path and stack allocation for small strings.
2023    fn validate_edit_chars(
2024        &self,
2025        pattern: &str,
2026        matched_text: &str,
2027        restriction: &EditCharRestriction,
2028    ) -> bool {
2029        // Fast path: exact match needs no validation
2030        if pattern == matched_text {
2031            return true;
2032        }
2033
2034        // ASCII fast path (common case) - avoids Vec<char> allocation
2035        if pattern.is_ascii() && matched_text.is_ascii() {
2036            return self.validate_edit_chars_ascii(
2037                pattern.as_bytes(),
2038                matched_text.as_bytes(),
2039                restriction,
2040            );
2041        }
2042
2043        // Unicode path
2044        let pattern_chars: Vec<char> = pattern.chars().collect();
2045        let text_chars: Vec<char> = matched_text.chars().collect();
2046        self.validate_edit_chars_slice(&pattern_chars, &text_chars, restriction)
2047    }
2048
2049    /// ASCII-optimized validation (no char conversion needed).
2050    #[inline]
2051    #[allow(clippy::unused_self)]
2052    fn validate_edit_chars_ascii(
2053        &self,
2054        pattern: &[u8],
2055        text: &[u8],
2056        restriction: &EditCharRestriction,
2057    ) -> bool {
2058        let m = pattern.len();
2059        let n = text.len();
2060
2061        #[derive(Clone, Copy)]
2062        enum Op {
2063            None,
2064            Insert,
2065            Delete,
2066            Substitute,
2067            Transpose,
2068        }
2069
2070        // Stack allocation for small strings (covers most cases)
2071        const STACK_LIMIT: usize = 32;
2072        if m < STACK_LIMIT && n < STACK_LIMIT {
2073            let mut dp = [[(0usize, Op::None); STACK_LIMIT]; STACK_LIMIT];
2074
2075            for i in 1..=m {
2076                dp[i][0] = (i, Op::Delete);
2077            }
2078            for j in 1..=n {
2079                dp[0][j] = (j, Op::Insert);
2080            }
2081
2082            for i in 1..=m {
2083                for j in 1..=n {
2084                    if pattern[i - 1] == text[j - 1] {
2085                        dp[i][j] = (dp[i - 1][j - 1].0, Op::None);
2086                    } else {
2087                        let sub = dp[i - 1][j - 1].0 + 1;
2088                        let del = dp[i - 1][j].0 + 1;
2089                        let ins = dp[i][j - 1].0 + 1;
2090
2091                        let trans = if i > 1
2092                            && j > 1
2093                            && pattern[i - 1] == text[j - 2]
2094                            && pattern[i - 2] == text[j - 1]
2095                        {
2096                            dp[i - 2][j - 2].0 + 1
2097                        } else {
2098                            usize::MAX
2099                        };
2100
2101                        let mut best = (sub, Op::Substitute);
2102                        if del < best.0 {
2103                            best = (del, Op::Delete);
2104                        }
2105                        if ins < best.0 {
2106                            best = (ins, Op::Insert);
2107                        }
2108                        if trans < best.0 {
2109                            best = (trans, Op::Transpose);
2110                        }
2111                        dp[i][j] = best;
2112                    }
2113                }
2114            }
2115
2116            // Backtrack and validate
2117            let (mut i, mut j) = (m, n);
2118            while i > 0 || j > 0 {
2119                match dp[i][j].1 {
2120                    Op::None => {
2121                        i -= 1;
2122                        j -= 1;
2123                    }
2124                    Op::Substitute => {
2125                        if !restriction.allows(text[j - 1] as char) {
2126                            return false;
2127                        }
2128                        i -= 1;
2129                        j -= 1;
2130                    }
2131                    Op::Delete => {
2132                        i -= 1;
2133                    }
2134                    Op::Insert => {
2135                        if !restriction.allows(text[j - 1] as char) {
2136                            return false;
2137                        }
2138                        j -= 1;
2139                    }
2140                    Op::Transpose => {
2141                        i -= 2;
2142                        j -= 2;
2143                    }
2144                }
2145            }
2146            return true;
2147        }
2148
2149        // Heap allocation for larger strings
2150        let mut dp = vec![vec![(0usize, Op::None); n + 1]; m + 1];
2151
2152        for i in 1..=m {
2153            dp[i][0] = (i, Op::Delete);
2154        }
2155        for j in 1..=n {
2156            dp[0][j] = (j, Op::Insert);
2157        }
2158
2159        for i in 1..=m {
2160            for j in 1..=n {
2161                if pattern[i - 1] == text[j - 1] {
2162                    dp[i][j] = (dp[i - 1][j - 1].0, Op::None);
2163                } else {
2164                    let sub = dp[i - 1][j - 1].0 + 1;
2165                    let del = dp[i - 1][j].0 + 1;
2166                    let ins = dp[i][j - 1].0 + 1;
2167
2168                    let trans = if i > 1
2169                        && j > 1
2170                        && pattern[i - 1] == text[j - 2]
2171                        && pattern[i - 2] == text[j - 1]
2172                    {
2173                        dp[i - 2][j - 2].0 + 1
2174                    } else {
2175                        usize::MAX
2176                    };
2177
2178                    let mut best = (sub, Op::Substitute);
2179                    if del < best.0 {
2180                        best = (del, Op::Delete);
2181                    }
2182                    if ins < best.0 {
2183                        best = (ins, Op::Insert);
2184                    }
2185                    if trans < best.0 {
2186                        best = (trans, Op::Transpose);
2187                    }
2188                    dp[i][j] = best;
2189                }
2190            }
2191        }
2192
2193        let (mut i, mut j) = (m, n);
2194        while i > 0 || j > 0 {
2195            match dp[i][j].1 {
2196                Op::None => {
2197                    i -= 1;
2198                    j -= 1;
2199                }
2200                Op::Substitute => {
2201                    if !restriction.allows(text[j - 1] as char) {
2202                        return false;
2203                    }
2204                    i -= 1;
2205                    j -= 1;
2206                }
2207                Op::Delete => {
2208                    i -= 1;
2209                }
2210                Op::Insert => {
2211                    if !restriction.allows(text[j - 1] as char) {
2212                        return false;
2213                    }
2214                    j -= 1;
2215                }
2216                Op::Transpose => {
2217                    i -= 2;
2218                    j -= 2;
2219                }
2220            }
2221        }
2222        true
2223    }
2224
2225    /// Unicode validation using char slices.
2226    #[inline]
2227    #[allow(clippy::unused_self)]
2228    fn validate_edit_chars_slice(
2229        &self,
2230        pattern: &[char],
2231        text: &[char],
2232        restriction: &EditCharRestriction,
2233    ) -> bool {
2234        let m = pattern.len();
2235        let n = text.len();
2236
2237        #[derive(Clone, Copy)]
2238        enum Op {
2239            None,
2240            Insert,
2241            Delete,
2242            Substitute,
2243            Transpose,
2244        }
2245
2246        // Stack allocation for small strings
2247        const STACK_LIMIT: usize = 32;
2248        if m < STACK_LIMIT && n < STACK_LIMIT {
2249            let mut dp = [[(0usize, Op::None); STACK_LIMIT]; STACK_LIMIT];
2250
2251            for i in 1..=m {
2252                dp[i][0] = (i, Op::Delete);
2253            }
2254            for j in 1..=n {
2255                dp[0][j] = (j, Op::Insert);
2256            }
2257
2258            for i in 1..=m {
2259                for j in 1..=n {
2260                    if pattern[i - 1] == text[j - 1] {
2261                        dp[i][j] = (dp[i - 1][j - 1].0, Op::None);
2262                    } else {
2263                        let sub = dp[i - 1][j - 1].0 + 1;
2264                        let del = dp[i - 1][j].0 + 1;
2265                        let ins = dp[i][j - 1].0 + 1;
2266
2267                        let trans = if i > 1
2268                            && j > 1
2269                            && pattern[i - 1] == text[j - 2]
2270                            && pattern[i - 2] == text[j - 1]
2271                        {
2272                            dp[i - 2][j - 2].0 + 1
2273                        } else {
2274                            usize::MAX
2275                        };
2276
2277                        let mut best = (sub, Op::Substitute);
2278                        if del < best.0 {
2279                            best = (del, Op::Delete);
2280                        }
2281                        if ins < best.0 {
2282                            best = (ins, Op::Insert);
2283                        }
2284                        if trans < best.0 {
2285                            best = (trans, Op::Transpose);
2286                        }
2287                        dp[i][j] = best;
2288                    }
2289                }
2290            }
2291
2292            let (mut i, mut j) = (m, n);
2293            while i > 0 || j > 0 {
2294                match dp[i][j].1 {
2295                    Op::None => {
2296                        i -= 1;
2297                        j -= 1;
2298                    }
2299                    Op::Substitute => {
2300                        if !restriction.allows(text[j - 1]) {
2301                            return false;
2302                        }
2303                        i -= 1;
2304                        j -= 1;
2305                    }
2306                    Op::Delete => {
2307                        i -= 1;
2308                    }
2309                    Op::Insert => {
2310                        if !restriction.allows(text[j - 1]) {
2311                            return false;
2312                        }
2313                        j -= 1;
2314                    }
2315                    Op::Transpose => {
2316                        i -= 2;
2317                        j -= 2;
2318                    }
2319                }
2320            }
2321            return true;
2322        }
2323
2324        // Heap allocation for larger strings
2325        let mut dp = vec![vec![(0usize, Op::None); n + 1]; m + 1];
2326
2327        for i in 1..=m {
2328            dp[i][0] = (i, Op::Delete);
2329        }
2330        for j in 1..=n {
2331            dp[0][j] = (j, Op::Insert);
2332        }
2333
2334        for i in 1..=m {
2335            for j in 1..=n {
2336                if pattern[i - 1] == text[j - 1] {
2337                    dp[i][j] = (dp[i - 1][j - 1].0, Op::None);
2338                } else {
2339                    let sub = dp[i - 1][j - 1].0 + 1;
2340                    let del = dp[i - 1][j].0 + 1;
2341                    let ins = dp[i][j - 1].0 + 1;
2342
2343                    let trans = if i > 1
2344                        && j > 1
2345                        && pattern[i - 1] == text[j - 2]
2346                        && pattern[i - 2] == text[j - 1]
2347                    {
2348                        dp[i - 2][j - 2].0 + 1
2349                    } else {
2350                        usize::MAX
2351                    };
2352
2353                    let mut best = (sub, Op::Substitute);
2354                    if del < best.0 {
2355                        best = (del, Op::Delete);
2356                    }
2357                    if ins < best.0 {
2358                        best = (ins, Op::Insert);
2359                    }
2360                    if trans < best.0 {
2361                        best = (trans, Op::Transpose);
2362                    }
2363                    dp[i][j] = best;
2364                }
2365            }
2366        }
2367
2368        let (mut i, mut j) = (m, n);
2369        while i > 0 || j > 0 {
2370            match dp[i][j].1 {
2371                Op::None => {
2372                    i -= 1;
2373                    j -= 1;
2374                }
2375                Op::Substitute => {
2376                    if !restriction.allows(text[j - 1]) {
2377                        return false;
2378                    }
2379                    i -= 1;
2380                    j -= 1;
2381                }
2382                Op::Delete => {
2383                    i -= 1;
2384                }
2385                Op::Insert => {
2386                    if !restriction.allows(text[j - 1]) {
2387                        return false;
2388                    }
2389                    j -= 1;
2390                }
2391                Op::Transpose => {
2392                    i -= 2;
2393                    j -= 2;
2394                }
2395            }
2396        }
2397        true
2398    }
2399}
2400
2401/// Result of a fuzzy match.
2402#[derive(Debug, Clone)]
2403pub struct FuzzyMatchResult {
2404    /// End position of the match (byte offset, exclusive).
2405    pub end: usize,
2406    /// Similarity score (0.0 to 1.0).
2407    pub similarity: f32,
2408    /// Number of insertion edits.
2409    pub insertions: u8,
2410    /// Number of deletion edits.
2411    pub deletions: u8,
2412    /// Number of substitution edits.
2413    pub substitutions: u8,
2414    /// Number of transposition (swap) edits.
2415    pub swaps: u8,
2416}
2417
2418impl FuzzyMatchResult {
2419    /// Returns the total number of edit operations.
2420    #[must_use]
2421    pub fn total_edits(&self) -> u8 {
2422        self.insertions
2423            .saturating_add(self.deletions)
2424            .saturating_add(self.substitutions)
2425            .saturating_add(self.swaps)
2426    }
2427}
2428
2429#[test]
2430fn test_search_all_the_quick() {
2431    use crate::ir::LiteralPattern;
2432    use crate::types::FuzzyLimits;
2433
2434    // Create a literal pattern for "quik" with 1 edit allowed
2435    let limits = FuzzyLimits::new().edits(1);
2436    let lit = LiteralPattern::new("quik".to_string(), Some(limits), None);
2437
2438    let bridge = FuzzyBridge::new(&[lit], None, None, false).unwrap();
2439
2440    let text = "The quick brown fox";
2441    let cached = bridge.search_all(text, 0.5);
2442
2443    println!("search_all results for '{text}' with pattern 'quik~1':");
2444    println!("  by_pattern_and_start: {:?}", cached.by_pattern_and_start);
2445
2446    assert!(
2447        !cached.by_pattern_and_start.is_empty(),
2448        "Should find at least one match"
2449    );
2450}