1#![allow(
4 clippy::needless_range_loop,
5 clippy::match_same_arms,
6 clippy::too_many_lines,
7 clippy::items_after_statements,
8 clippy::float_cmp,
9 clippy::allow_attributes
10)]
11
12use crate::types::{FuzzyLimits, FuzzyPenalties};
13use std::cell::RefCell;
14
15use super::GuardNfa;
16use super::bitap::BitapMatcher;
17use super::damlev::{DamLevNfa, EditLimits, SearchBuffers};
18use super::hash::{FxHashMap, FxHashSet};
19use crate::ir::{EditCharRestriction, LiteralPattern};
20
21#[derive(Debug, Default)]
24pub struct CachedMatches {
25 by_pattern_and_start: FxHashMap<(usize, usize), Vec<FuzzyMatchResult>>,
27}
28
29impl CachedMatches {
30 #[must_use]
32 pub fn get(&self, pattern_index: usize, start: usize) -> Option<&FuzzyMatchResult> {
33 self.by_pattern_and_start
34 .get(&(pattern_index, start))
35 .and_then(|v| v.first())
36 }
37
38 pub fn get_all(&self, pattern_index: usize, start: usize) -> Option<&[FuzzyMatchResult]> {
40 self.by_pattern_and_start
41 .get(&(pattern_index, start))
42 .map(Vec::as_slice)
43 }
44
45 pub fn insert(&mut self, pattern_index: usize, start: usize, result: FuzzyMatchResult) {
47 self.by_pattern_and_start
48 .entry((pattern_index, start))
49 .or_default()
50 .push(result);
51 }
52
53 pub fn iter(&self) -> impl Iterator<Item = ((usize, usize), &[FuzzyMatchResult])> + '_ {
55 self.by_pattern_and_start
56 .iter()
57 .map(|((pattern_idx, start), results)| ((*pattern_idx, *start), results.as_slice()))
58 }
59
60 #[must_use]
62 pub fn is_empty(&self) -> bool {
63 self.by_pattern_and_start.is_empty()
64 }
65}
66
67#[derive(Debug)]
69pub struct FuzzyBridge {
70 automata: Vec<DamLevNfa>,
72 #[allow(dead_code)]
74 guard_nfa: Option<GuardNfa>,
75 bitap_matchers: Vec<Option<BitapMatcher>>,
77 patterns: Vec<String>,
79 limits: Vec<Option<FuzzyLimits>>,
81 edit_char_restrictions: Vec<Option<EditCharRestriction>>,
83 has_char_restrictions: bool,
85 case_insensitive: bool,
87 search_buffers: RefCell<SearchBuffers>,
89 word_list_patterns: Vec<String>,
91 word_list_limits: Vec<Option<FuzzyLimits>>,
93}
94
95impl FuzzyBridge {
96 #[must_use]
98 pub fn new(
99 literals: &[LiteralPattern],
100 _default_limits: Option<FuzzyLimits>,
101 _penalties: Option<FuzzyPenalties>,
102 case_insensitive: bool,
103 ) -> Option<Self> {
104 if literals.is_empty() {
105 return None;
106 }
107
108 let patterns: Vec<String> = literals.iter().map(|l| l.text.clone()).collect();
109 let limits: Vec<Option<FuzzyLimits>> = literals.iter().map(|l| l.limits.clone()).collect();
110 let edit_char_restrictions: Vec<Option<EditCharRestriction>> =
111 literals.iter().map(|l| l.edit_chars.clone()).collect();
112
113 let mut automata: Vec<DamLevNfa> = Vec::with_capacity(literals.len());
115 let mut bitap_matchers: Vec<Option<BitapMatcher>> = Vec::with_capacity(literals.len());
116
117 for lit in literals {
118 let edit_limits = if let Some(ref lim) = lit.limits {
119 let max_edits = lim.get_edits().unwrap_or_else(|| {
121 let i = lim.get_insertions().unwrap_or(0);
122 let d = lim.get_deletions().unwrap_or(0);
123 let s = lim.get_substitutions().unwrap_or(0);
124 let t = lim.get_swaps().unwrap_or(0);
125 i.saturating_add(d).saturating_add(s).saturating_add(t)
126 });
127 EditLimits::with_limits(
128 max_edits,
129 lim.get_insertions(),
130 lim.get_deletions(),
131 lim.get_substitutions(),
132 lim.get_swaps(),
133 )
134 } else {
135 EditLimits::new(0) };
137
138 automata.push(DamLevNfa::new(
140 &lit.text,
141 edit_limits.clone(),
142 case_insensitive,
143 ));
144
145 bitap_matchers.push(BitapMatcher::new(&lit.text, edit_limits, case_insensitive));
147 }
148
149 let guard_nfa = if literals.len() == 1 {
151 let lit = &literals[0];
152 let edit_limits = if let Some(ref lim) = lit.limits {
153 let max_edits = lim.get_edits().unwrap_or(0);
154 EditLimits::new(max_edits)
155 } else {
156 EditLimits::new(0)
157 };
158 Some(GuardNfa::new(&lit.text, edit_limits, case_insensitive))
159 } else {
160 None
161 };
162
163 let has_char_restrictions = edit_char_restrictions
164 .iter()
165 .any(std::option::Option::is_some);
166
167 Some(FuzzyBridge {
168 automata,
169 guard_nfa,
170 bitap_matchers,
171 patterns,
172 limits,
173 edit_char_restrictions,
174 has_char_restrictions,
175 case_insensitive,
176 search_buffers: RefCell::new(SearchBuffers::new()),
177 word_list_patterns: Vec::new(),
178 word_list_limits: Vec::new(),
179 })
180 }
181
182 #[must_use]
184 pub fn pattern_count(&self) -> usize {
185 self.patterns.len()
186 }
187
188 #[must_use]
190 pub fn is_case_insensitive(&self) -> bool {
191 self.case_insensitive
192 }
193
194 pub fn add_word_list(&mut self, words: &[String], limits: Option<&FuzzyLimits>) {
197 for word in words {
198 self.word_list_patterns.push(word.clone());
199 self.word_list_limits.push(limits.cloned());
200 }
201 }
202
203 #[must_use]
205 pub fn total_pattern_count(&self) -> usize {
206 self.patterns.len() + self.word_list_patterns.len()
207 }
208
209 #[must_use]
211 pub fn limits(&self) -> &[Option<FuzzyLimits>] {
212 &self.limits
213 }
214
215 #[must_use]
217 pub fn get_bitap_matcher(&self) -> Option<&BitapMatcher> {
218 self.bitap_matchers.first().and_then(|m| m.as_ref())
219 }
220
221 pub fn pattern_char_len(&self, index: usize) -> Option<usize> {
223 self.patterns.get(index).map(|s| s.chars().count())
224 }
225
226 pub fn is_all_exact(&self) -> bool {
230 self.limits.iter().all(|lim| {
231 match lim {
232 None => true, Some(lim) => {
234 lim.get_edits().unwrap_or_else(|| {
235 let ins = lim.get_insertions().unwrap_or(0);
236 let del = lim.get_deletions().unwrap_or(0);
237 let sub = lim.get_substitutions().unwrap_or(0);
238 let swap = lim.get_swaps().unwrap_or(0);
239 ins.saturating_add(del)
240 .saturating_add(sub)
241 .saturating_add(swap)
242 }) == 0
243 }
244 }
245 })
246 }
247
248 pub fn pattern_max_edits(&self, index: usize) -> Option<u8> {
250 self.limits.get(index).and_then(|lim| {
251 lim.as_ref().map(|l| {
252 l.get_edits().unwrap_or_else(|| {
253 let i = l.get_insertions().unwrap_or(0);
254 let d = l.get_deletions().unwrap_or(0);
255 let s = l.get_substitutions().unwrap_or(0);
256 let t = l.get_swaps().unwrap_or(0);
257 i.saturating_add(d).saturating_add(s).saturating_add(t)
258 })
259 })
260 })
261 }
262
263 pub fn max_pattern_len(&self) -> usize {
265 self.patterns
266 .iter()
267 .map(|p| p.chars().count())
268 .max()
269 .unwrap_or(0)
270 }
271
272 pub fn max_edits(&self) -> Option<u8> {
274 self.limits
275 .iter()
276 .filter_map(|lim| {
277 lim.as_ref().map(|l| {
278 l.get_edits().unwrap_or_else(|| {
279 let i = l.get_insertions().unwrap_or(0);
280 let d = l.get_deletions().unwrap_or(0);
281 let s = l.get_substitutions().unwrap_or(0);
282 let t = l.get_swaps().unwrap_or(0);
283 i.saturating_add(d).saturating_add(s).saturating_add(t)
284 })
285 })
286 })
287 .max()
288 }
289
290 pub fn all_patterns_bitap_compatible(&self) -> bool {
292 self.bitap_matchers.iter().all(Option::is_some)
293 }
294
295 pub fn search_all(&self, text: &str, threshold: f32) -> CachedMatches {
298 let mut cached = CachedMatches::default();
299
300 for (pattern_idx, nfa) in self.automata.iter().enumerate() {
301 let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
302
303 let matches = if let Some(ref bitap) = self.bitap_matchers[pattern_idx] {
305 bitap.find_all(text, pattern_threshold)
306 } else {
307 nfa.find_all(text, pattern_threshold)
308 };
309
310 for m in matches {
311 if let Some(restriction) = self
313 .edit_char_restrictions
314 .get(pattern_idx)
315 .and_then(|r| r.as_ref())
316 {
317 let matched_text = &text[m.start..m.end];
318 if !self.validate_edit_chars(
319 &self.patterns[pattern_idx],
320 matched_text,
321 restriction,
322 ) {
323 continue;
324 }
325 }
326
327 let result = FuzzyMatchResult {
328 end: m.end,
329 similarity: m.similarity,
330 insertions: m.insertions,
331 deletions: m.deletions,
332 substitutions: m.substitutions,
333 swaps: m.swaps,
334 };
335
336 cached
337 .by_pattern_and_start
338 .entry((pattern_idx, m.start))
339 .or_default()
340 .push(result);
341 }
342 }
343
344 for matches in cached.by_pattern_and_start.values_mut() {
348 matches.sort_by(|a, b| match b.similarity.partial_cmp(&a.similarity) {
349 Some(std::cmp::Ordering::Equal) | None => b.end.cmp(&a.end),
350 Some(ord) => ord,
351 });
352 }
353
354 cached
355 }
356
357 pub fn search_cached_at_position(
362 &self,
363 text: &str,
364 pos: usize,
365 threshold: f32,
366 ) -> CachedMatches {
367 let mut cached = CachedMatches::default();
368
369 if pos >= text.len() {
370 return cached;
371 }
372
373 let substring = &text[pos..];
374
375 for (pattern_idx, nfa) in self.automata.iter().enumerate() {
376 let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
377
378 let matches = if let Some(ref bitap) = self.bitap_matchers[pattern_idx] {
380 bitap.find_all(substring, pattern_threshold)
381 } else {
382 nfa.find_all(substring, pattern_threshold)
383 };
384
385 for m in matches {
387 if m.start != 0 {
388 continue;
389 }
390
391 if let Some(restriction) = self
393 .edit_char_restrictions
394 .get(pattern_idx)
395 .and_then(|r| r.as_ref())
396 {
397 let matched_text = &substring[m.start..m.end];
398 if !self.validate_edit_chars(
399 &self.patterns[pattern_idx],
400 matched_text,
401 restriction,
402 ) {
403 continue;
404 }
405 }
406
407 let result = FuzzyMatchResult {
408 end: pos + m.end,
409 similarity: m.similarity,
410 insertions: m.insertions,
411 deletions: m.deletions,
412 substitutions: m.substitutions,
413 swaps: m.swaps,
414 };
415
416 cached
417 .by_pattern_and_start
418 .entry((pattern_idx, pos))
419 .or_default()
420 .push(result);
421 }
422 }
423
424 for matches in cached.by_pattern_and_start.values_mut() {
426 matches.sort_by(|a, b| match b.similarity.partial_cmp(&a.similarity) {
427 Some(std::cmp::Ordering::Equal) | None => b.end.cmp(&a.end),
428 Some(ord) => ord,
429 });
430 }
431
432 cached
433 }
434
435 pub fn search_non_overlapping(
444 &self,
445 text: &str,
446 threshold: f32,
447 pattern_idx: usize,
448 require_first_char: bool,
449 ) -> Vec<super::damlev::DamLevMatch> {
450 if pattern_idx >= self.automata.len() {
451 return Vec::new();
452 }
453
454 let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
455
456 if let Some(ref bitap) = self.bitap_matchers[pattern_idx] {
458 let matches =
459 bitap.find_all_non_overlapping(text, pattern_threshold, require_first_char);
460
461 if let Some(restriction) = self
463 .edit_char_restrictions
464 .get(pattern_idx)
465 .and_then(|r| r.as_ref())
466 {
467 return matches
468 .into_iter()
469 .filter(|m| {
470 let matched_text = &text[m.start..m.end];
471 self.validate_edit_chars(
472 &self.patterns[pattern_idx],
473 matched_text,
474 restriction,
475 )
476 })
477 .collect();
478 }
479
480 return matches;
481 }
482
483 self.automata[pattern_idx].find_all(text, pattern_threshold)
485 }
486
487 pub fn search_non_overlapping_n(
492 &self,
493 text: &str,
494 threshold: f32,
495 pattern_idx: usize,
496 require_first_char: bool,
497 n: usize,
498 ) -> Vec<super::damlev::DamLevMatch> {
499 if n == 0 || pattern_idx >= self.automata.len() {
500 return Vec::new();
501 }
502
503 let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
504
505 if let Some(ref bitap) = self.bitap_matchers[pattern_idx] {
507 let matches =
508 bitap.find_n_non_overlapping(text, pattern_threshold, require_first_char, n);
509
510 if let Some(restriction) = self
512 .edit_char_restrictions
513 .get(pattern_idx)
514 .and_then(|r| r.as_ref())
515 {
516 return matches
517 .into_iter()
518 .filter(|m| {
519 let matched_text = &text[m.start..m.end];
520 self.validate_edit_chars(
521 &self.patterns[pattern_idx],
522 matched_text,
523 restriction,
524 )
525 })
526 .collect();
527 }
528
529 return matches;
530 }
531
532 self.automata[pattern_idx].find_n(text, pattern_threshold, n)
534 }
535
536 pub fn search_first(
542 &self,
543 text: &str,
544 threshold: f32,
545 pattern_idx: usize,
546 ) -> Option<super::damlev::DamLevMatch> {
547 if pattern_idx >= self.automata.len() {
548 return None;
549 }
550
551 let pattern = &self.patterns[pattern_idx];
554 if let Some(pos) = text.find(pattern) {
555 let sim = 1.0f32;
557 if sim >= threshold {
558 let end = pos + pattern.len();
560 if end <= text.len() {
561 return Some(super::damlev::DamLevMatch {
562 start: pos,
563 end,
564 insertions: 0,
565 deletions: 0,
566 substitutions: 0,
567 swaps: 0,
568 similarity: sim,
569 });
570 }
571 }
572 }
573
574 let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
575
576 if let Some(ref bitap) = self.bitap_matchers[pattern_idx] {
578 let result = bitap.find_first_non_overlapping(text, pattern_threshold);
579
580 if self.has_char_restrictions
582 && let Some(restriction) = self
583 .edit_char_restrictions
584 .get(pattern_idx)
585 .and_then(|r| r.as_ref())
586 {
587 return result.filter(|m| {
588 let matched_text = &text[m.start..m.end];
589 self.validate_edit_chars(&self.patterns[pattern_idx], matched_text, restriction)
590 });
591 }
592
593 return result;
594 }
595
596 self.automata[pattern_idx].find_first(text, pattern_threshold)
598 }
599
600 pub fn search_best_non_overlapping(
609 &self,
610 text: &str,
611 threshold: f32,
612 pattern_idx: usize,
613 require_first_char: bool,
614 ) -> Vec<super::damlev::DamLevMatch> {
615 if pattern_idx >= self.automata.len() {
616 return Vec::new();
617 }
618
619 let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
620
621 if let Some(ref bitap) = self.bitap_matchers[pattern_idx] {
623 let matches =
624 bitap.find_best_non_overlapping(text, pattern_threshold, require_first_char);
625
626 if let Some(restriction) = self
628 .edit_char_restrictions
629 .get(pattern_idx)
630 .and_then(|r| r.as_ref())
631 {
632 return matches
633 .into_iter()
634 .filter(|m| {
635 let matched_text = &text[m.start..m.end];
636 self.validate_edit_chars(
637 &self.patterns[pattern_idx],
638 matched_text,
639 restriction,
640 )
641 })
642 .collect();
643 }
644
645 return matches;
646 }
647
648 let mut all_matches = self.automata[pattern_idx].find_all(text, pattern_threshold);
650
651 if require_first_char {
654 let pattern = &self.patterns[pattern_idx];
655 if let Some(pattern_first) = pattern.chars().next() {
656 let case_insensitive = self.case_insensitive;
657 all_matches.retain(|m| {
658 if let Some(match_first) = text[m.start..m.end].chars().next() {
659 if case_insensitive {
660 match_first.eq_ignore_ascii_case(&pattern_first)
661 } else {
662 match_first == pattern_first
663 }
664 } else {
665 false
666 }
667 });
668 }
669 }
670
671 all_matches.sort_by(|a, b| match b.similarity.partial_cmp(&a.similarity) {
673 Some(std::cmp::Ordering::Equal) | None => a.start.cmp(&b.start),
674 Some(ord) => ord,
675 });
676
677 let mut result = Vec::new();
679 let mut occupied = vec![false; text.len() + 1];
680
681 for m in all_matches {
682 let overlaps = (m.start..m.end).any(|i| occupied[i]);
683 if !overlaps {
684 for i in m.start..m.end {
685 occupied[i] = true;
686 }
687 result.push(m);
688 }
689 }
690
691 result.sort_by_key(|m| m.start);
692 result
693 }
694
695 pub fn search_all_with_prefilter(
699 &self,
700 text: &str,
701 threshold: f32,
702 prefilter: &super::prefilter::Prefilter,
703 ) -> CachedMatches {
704 let mut cached = CachedMatches::default();
705
706 let max_offset = prefilter.max_offset();
708 let candidates: FxHashSet<usize> = prefilter
709 .find_candidates(text.as_bytes())
710 .flat_map(|pos| pos..=pos.saturating_add(max_offset))
711 .collect();
712
713 if candidates.is_empty() {
714 return cached;
715 }
716
717 let mut buffers = self.search_buffers.borrow_mut();
718
719 for (pattern_idx, nfa) in self.automata.iter().enumerate() {
720 let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
721 let matches = nfa.find_all_with_candidates_buffered(
722 text,
723 pattern_threshold,
724 &candidates,
725 &mut buffers,
726 );
727
728 for m in matches {
729 if let Some(restriction) = self
730 .edit_char_restrictions
731 .get(pattern_idx)
732 .and_then(|r| r.as_ref())
733 {
734 let matched_text = &text[m.start..m.end];
735 if !self.validate_edit_chars(
736 &self.patterns[pattern_idx],
737 matched_text,
738 restriction,
739 ) {
740 continue;
741 }
742 }
743
744 let result = FuzzyMatchResult {
745 end: m.end,
746 similarity: m.similarity,
747 insertions: m.insertions,
748 deletions: m.deletions,
749 substitutions: m.substitutions,
750 swaps: m.swaps,
751 };
752
753 cached
754 .by_pattern_and_start
755 .entry((pattern_idx, m.start))
756 .or_default()
757 .push(result);
758 }
759 }
760
761 for matches in cached.by_pattern_and_start.values_mut() {
763 matches.sort_by(|a, b| match b.similarity.partial_cmp(&a.similarity) {
764 Some(std::cmp::Ordering::Equal) | None => b.end.cmp(&a.end),
765 Some(ord) => ord,
766 });
767 }
768
769 cached
770 }
771
772 #[inline]
776 pub fn find_first_guard_nfa(
777 &self,
778 text: &str,
779 threshold: f32,
780 ) -> Option<(usize, FuzzyMatchResult)> {
781 let guard_nfa = self.guard_nfa.as_ref()?;
782
783 guard_nfa.find_first(text, threshold).map(|m| {
784 (
785 m.start,
786 FuzzyMatchResult {
787 end: m.end,
788 similarity: m.similarity,
789 insertions: m.insertions,
790 deletions: m.deletions,
791 substitutions: m.substitutions,
792 swaps: m.swaps,
793 },
794 )
795 })
796 }
797
798 pub fn find_first_with_prefilter(
803 &self,
804 text: &str,
805 threshold: f32,
806 prefilter: &super::prefilter::Prefilter,
807 ) -> Option<(usize, FuzzyMatchResult)> {
808 if self.automata.len() != 1 {
809 return None; }
811
812 let max_offset = prefilter.max_offset();
814 let candidates: FxHashSet<usize> = prefilter
815 .find_candidates(text.as_bytes())
816 .flat_map(|pos| pos..=pos.saturating_add(max_offset))
817 .collect();
818
819 if candidates.is_empty() {
820 return None;
821 }
822
823 let pattern_threshold = self.calculate_effective_threshold(0, threshold);
824
825 if let Some(ref bitap) = self.bitap_matchers[0]
827 && let Some(m) = bitap.find_first_with_candidates(text, pattern_threshold, &candidates)
828 {
829 let restriction = self.edit_char_restrictions.first().and_then(|r| r.as_ref());
831 let validation_passed = restriction.is_none_or(|r| {
832 let matched_text = &text[m.start..m.end];
833 self.validate_edit_chars(&self.patterns[0], matched_text, r)
834 });
835
836 if validation_passed {
837 return Some((
838 m.start,
839 FuzzyMatchResult {
840 end: m.end,
841 similarity: m.similarity,
842 insertions: m.insertions,
843 deletions: m.deletions,
844 substitutions: m.substitutions,
845 swaps: m.swaps,
846 },
847 ));
848 }
849 }
851
852 let nfa = &self.automata[0];
854 if let Some(m) = nfa.find_first_with_candidates(text, pattern_threshold, &candidates) {
855 if let Some(restriction) = self.edit_char_restrictions.first().and_then(|r| r.as_ref())
857 {
858 let matched_text = &text[m.start..m.end];
859 if !self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
860 return None;
861 }
862 }
863
864 return Some((
865 m.start,
866 FuzzyMatchResult {
867 end: m.end,
868 similarity: m.similarity,
869 insertions: m.insertions,
870 deletions: m.deletions,
871 substitutions: m.substitutions,
872 swaps: m.swaps,
873 },
874 ));
875 }
876
877 None
878 }
879
880 pub fn search_at_position(
885 &self,
886 text: &str,
887 start_pos: usize,
888 threshold: f32,
889 ) -> Option<(usize, FuzzyMatchResult)> {
890 if self.automata.len() != 1 {
891 return None; }
893
894 let pattern_threshold = self.calculate_effective_threshold(0, threshold);
895
896 let candidates: FxHashSet<usize> = std::iter::once(start_pos).collect();
898
899 if let Some(ref bitap) = self.bitap_matchers[0]
901 && let Some(m) = bitap.find_first_with_candidates(text, pattern_threshold, &candidates)
902 && m.start == start_pos
903 {
904 if let Some(restriction) = self.edit_char_restrictions.first().and_then(|r| r.as_ref())
906 {
907 let matched_text = &text[m.start..m.end];
908 if self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
909 return Some((
910 m.start,
911 FuzzyMatchResult {
912 end: m.end,
913 similarity: m.similarity,
914 insertions: m.insertions,
915 deletions: m.deletions,
916 substitutions: m.substitutions,
917 swaps: m.swaps,
918 },
919 ));
920 }
921 } else {
922 return Some((
923 m.start,
924 FuzzyMatchResult {
925 end: m.end,
926 similarity: m.similarity,
927 insertions: m.insertions,
928 deletions: m.deletions,
929 substitutions: m.substitutions,
930 swaps: m.swaps,
931 },
932 ));
933 }
934 }
935
936 let nfa = &self.automata[0];
938
939 let mut buffers = self.search_buffers.borrow_mut();
941 let matches = nfa.find_all_with_candidates_buffered(
942 text,
943 pattern_threshold,
944 &candidates,
945 &mut buffers,
946 );
947
948 let best = matches
950 .into_iter()
951 .filter(|m| m.start == start_pos)
952 .max_by(|a, b| {
953 a.similarity
954 .partial_cmp(&b.similarity)
955 .unwrap_or(std::cmp::Ordering::Equal)
956 })?;
957
958 if let Some(restriction) = self.edit_char_restrictions.first().and_then(|r| r.as_ref()) {
960 let matched_text = &text[best.start..best.end];
961 if !self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
962 return None;
963 }
964 }
965
966 Some((
967 best.start,
968 FuzzyMatchResult {
969 end: best.end,
970 similarity: best.similarity,
971 insertions: best.insertions,
972 deletions: best.deletions,
973 substitutions: best.substitutions,
974 swaps: best.swaps,
975 },
976 ))
977 }
978
979 #[inline]
984 pub fn search_at_position_fast(
985 &self,
986 text: &[u8],
987 start_pos: usize,
988 threshold: f32,
989 ) -> Option<(usize, FuzzyMatchResult)> {
990 if self.automata.len() != 1 {
991 return None;
992 }
993
994 let pattern_threshold = self.calculate_effective_threshold(0, threshold);
995
996 if let Some(ref bitap) = self.bitap_matchers[0]
998 && let Some(m) = bitap.find_at_byte_position(text, start_pos, pattern_threshold)
999 {
1000 if self
1003 .edit_char_restrictions
1004 .first()
1005 .and_then(|r| r.as_ref())
1006 .is_none()
1007 {
1008 return Some((
1009 m.start,
1010 FuzzyMatchResult {
1011 end: m.end,
1012 similarity: m.similarity,
1013 insertions: m.insertions,
1014 deletions: m.deletions,
1015 substitutions: m.substitutions,
1016 swaps: m.swaps,
1017 },
1018 ));
1019 }
1020
1021 if let Ok(text_str) = std::str::from_utf8(text)
1023 && let Some(restriction) = &self.edit_char_restrictions[0]
1024 {
1025 let matched_text = &text_str[m.start..m.end];
1026 if self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
1027 return Some((
1028 m.start,
1029 FuzzyMatchResult {
1030 end: m.end,
1031 similarity: m.similarity,
1032 insertions: m.insertions,
1033 deletions: m.deletions,
1034 substitutions: m.substitutions,
1035 swaps: m.swaps,
1036 },
1037 ));
1038 }
1039 }
1040 }
1041
1042 None
1043 }
1044
1045 #[inline]
1048 pub fn find_first_boyer_moore(
1049 &self,
1050 text: &[u8],
1051 threshold: f32,
1052 _max_offset: usize,
1053 ) -> Option<(usize, FuzzyMatchResult)> {
1054 if self.automata.len() != 1 {
1055 return None;
1056 }
1057
1058 let bitap = self.bitap_matchers[0].as_ref()?;
1059 let pattern_threshold = self.calculate_effective_threshold(0, threshold);
1060 let has_restrictions = self
1061 .edit_char_restrictions
1062 .first()
1063 .and_then(|r| r.as_ref())
1064 .is_some();
1065
1066 if let Some(m) = bitap.find_first_streaming(text, pattern_threshold) {
1069 if !has_restrictions {
1071 return Some((
1072 m.start,
1073 FuzzyMatchResult {
1074 end: m.end,
1075 similarity: m.similarity,
1076 insertions: m.insertions,
1077 deletions: m.deletions,
1078 substitutions: m.substitutions,
1079 swaps: m.swaps,
1080 },
1081 ));
1082 }
1083
1084 if let Ok(text_str) = std::str::from_utf8(text)
1086 && let Some(restriction) = &self.edit_char_restrictions[0]
1087 {
1088 let matched_text = &text_str[m.start..m.end];
1089 if self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
1090 return Some((
1091 m.start,
1092 FuzzyMatchResult {
1093 end: m.end,
1094 similarity: m.similarity,
1095 insertions: m.insertions,
1096 deletions: m.deletions,
1097 substitutions: m.substitutions,
1098 swaps: m.swaps,
1099 },
1100 ));
1101 }
1102 }
1103 }
1104
1105 None
1106 }
1107
1108 #[inline]
1113 pub fn find_first_lazy(
1114 &self,
1115 text: &[u8],
1116 threshold: f32,
1117 prefilter: &super::prefilter::Prefilter,
1118 ) -> Option<(usize, FuzzyMatchResult)> {
1119 if self.automata.len() != 1 {
1120 return None;
1121 }
1122
1123 let bitap = self.bitap_matchers.first()?.as_ref()?;
1124 let pattern_threshold = self.calculate_effective_threshold(0, threshold);
1125 let max_offset = prefilter.max_offset();
1126 let has_restrictions = self
1127 .edit_char_restrictions
1128 .first()
1129 .and_then(|r| r.as_ref())
1130 .is_some();
1131
1132 let use_streaming = match prefilter {
1136 super::prefilter::Prefilter::ThreeBytes {
1137 max_offset: off, ..
1138 } if *off > 0 => true,
1139 super::prefilter::Prefilter::MultiBytes {
1140 max_offset: off, ..
1141 } if *off > 0 => true,
1142 _ => false,
1143 };
1144
1145 if use_streaming {
1146 return self.find_first_boyer_moore(text, threshold, max_offset);
1148 }
1149
1150 let mut last_tried: Option<usize> = None;
1152
1153 for candidate in prefilter.find_candidates(text) {
1155 for back in 0..=max_offset {
1158 let pos = candidate.saturating_sub(back);
1159
1160 if pos > 0 && (text[pos] & 0b1100_0000) == 0b1000_0000 {
1162 continue;
1163 }
1164
1165 if last_tried == Some(pos) {
1167 continue;
1168 }
1169 last_tried = Some(pos);
1170
1171 if let Some(m) = bitap.find_at_byte_position(text, pos, pattern_threshold) {
1173 if !has_restrictions {
1175 return Some((
1176 m.start,
1177 FuzzyMatchResult {
1178 end: m.end,
1179 similarity: m.similarity,
1180 insertions: m.insertions,
1181 deletions: m.deletions,
1182 substitutions: m.substitutions,
1183 swaps: m.swaps,
1184 },
1185 ));
1186 }
1187
1188 if let Ok(text_str) = std::str::from_utf8(text)
1190 && let Some(restriction) = &self.edit_char_restrictions[0]
1191 {
1192 let matched_text = &text_str[m.start..m.end];
1193 if self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
1194 return Some((
1195 m.start,
1196 FuzzyMatchResult {
1197 end: m.end,
1198 similarity: m.similarity,
1199 insertions: m.insertions,
1200 deletions: m.deletions,
1201 substitutions: m.substitutions,
1202 swaps: m.swaps,
1203 },
1204 ));
1205 }
1206 }
1207 }
1208 }
1209
1210 for fwd in 1..=max_offset {
1212 let pos = candidate + fwd;
1213 if pos >= text.len() {
1214 continue;
1215 }
1216
1217 if (text[pos] & 0b1100_0000) == 0b1000_0000 {
1219 continue;
1220 }
1221
1222 if last_tried == Some(pos) {
1223 continue;
1224 }
1225 last_tried = Some(pos);
1226
1227 if let Some(m) = bitap.find_at_byte_position(text, pos, pattern_threshold) {
1228 if !has_restrictions {
1229 return Some((
1230 m.start,
1231 FuzzyMatchResult {
1232 end: m.end,
1233 similarity: m.similarity,
1234 insertions: m.insertions,
1235 deletions: m.deletions,
1236 substitutions: m.substitutions,
1237 swaps: m.swaps,
1238 },
1239 ));
1240 }
1241
1242 if let Ok(text_str) = std::str::from_utf8(text)
1243 && let Some(restriction) = &self.edit_char_restrictions[0]
1244 {
1245 let matched_text = &text_str[m.start..m.end];
1246 if self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
1247 return Some((
1248 m.start,
1249 FuzzyMatchResult {
1250 end: m.end,
1251 similarity: m.similarity,
1252 insertions: m.insertions,
1253 deletions: m.deletions,
1254 substitutions: m.substitutions,
1255 swaps: m.swaps,
1256 },
1257 ));
1258 }
1259 }
1260 }
1261 }
1262 }
1263
1264 None
1265 }
1266
1267 #[inline]
1277 pub fn find_first_batch_parallel(
1278 &self,
1279 text: &[u8],
1280 threshold: f32,
1281 prefilter: &super::prefilter::Prefilter,
1282 ) -> Option<(usize, FuzzyMatchResult)> {
1283 if self.automata.len() != 1 {
1284 return None;
1285 }
1286
1287 let bitap = self.bitap_matchers.first()?.as_ref()?;
1288 let pattern_threshold = self.calculate_effective_threshold(0, threshold);
1289 let max_offset = prefilter.max_offset();
1290
1291 let mut positions: Vec<usize> = Vec::with_capacity(64);
1296 let mut seen: FxHashSet<usize> = FxHashSet::default();
1297
1298 for candidate in prefilter.find_candidates(text) {
1299 for back in 0..=max_offset {
1301 let pos = candidate.saturating_sub(back);
1302 if pos > 0 && (text[pos] & 0b1100_0000) == 0b1000_0000 {
1303 continue;
1304 }
1305 if seen.insert(pos) {
1306 positions.push(pos);
1307 }
1308 }
1309 for fwd in 1..=max_offset {
1311 let pos = candidate + fwd;
1312 if pos >= text.len() {
1313 continue;
1314 }
1315 if pos > 0 && (text[pos] & 0b1100_0000) == 0b1000_0000 {
1316 continue;
1317 }
1318 if seen.insert(pos) {
1319 positions.push(pos);
1320 }
1321 }
1322 }
1323
1324 if positions.is_empty() {
1325 return None;
1326 }
1327
1328 if let Some((_idx, m)) =
1330 bitap.find_at_positions_parallel(text, &positions, pattern_threshold)
1331 {
1332 if self
1334 .edit_char_restrictions
1335 .first()
1336 .and_then(|r| r.as_ref())
1337 .is_none()
1338 {
1339 return Some((
1340 m.start,
1341 FuzzyMatchResult {
1342 end: m.end,
1343 similarity: m.similarity,
1344 insertions: m.insertions,
1345 deletions: m.deletions,
1346 substitutions: m.substitutions,
1347 swaps: m.swaps,
1348 },
1349 ));
1350 }
1351
1352 if let Ok(text_str) = std::str::from_utf8(text)
1354 && let Some(restriction) =
1355 self.edit_char_restrictions.first().and_then(|r| r.as_ref())
1356 {
1357 let matched_text = &text_str[m.start..m.end];
1358 if self.validate_edit_chars(&self.patterns[0], matched_text, restriction) {
1359 return Some((
1360 m.start,
1361 FuzzyMatchResult {
1362 end: m.end,
1363 similarity: m.similarity,
1364 insertions: m.insertions,
1365 deletions: m.deletions,
1366 substitutions: m.substitutions,
1367 swaps: m.swaps,
1368 },
1369 ));
1370 }
1371 }
1372 }
1373
1374 None
1375 }
1376
1377 pub fn find_first_multi_pattern(
1384 &self,
1385 text: &[u8],
1386 threshold: f32,
1387 pattern_indices: &[usize],
1388 prefilter: &super::prefilter::Prefilter,
1389 ) -> Option<(usize, usize, FuzzyMatchResult)> {
1390 if pattern_indices.is_empty() {
1391 return None;
1392 }
1393
1394 let text_str = std::str::from_utf8(text).ok()?;
1395 let max_offset = prefilter.max_offset();
1396
1397 let mut last_tried: Option<usize> = None;
1399
1400 for candidate in prefilter.find_candidates(text) {
1401 for offset in 0..=max_offset {
1403 let pos = candidate + offset;
1404 if pos >= text.len() {
1405 continue;
1406 }
1407
1408 if pos > 0 && (text[pos] & 0b1100_0000) == 0b1000_0000 {
1410 continue;
1411 }
1412 if last_tried == Some(pos) {
1413 continue;
1414 }
1415 last_tried = Some(pos);
1416
1417 for &pattern_idx in pattern_indices {
1419 if pattern_idx >= self.bitap_matchers.len() {
1420 continue;
1421 }
1422
1423 let pattern_threshold =
1424 self.calculate_effective_threshold(pattern_idx, threshold);
1425
1426 if let Some(ref bitap) = self.bitap_matchers[pattern_idx]
1428 && let Some(m) = bitap.find_at_byte_position(text, pos, pattern_threshold)
1429 {
1430 if let Some(restriction) = self
1432 .edit_char_restrictions
1433 .get(pattern_idx)
1434 .and_then(|r| r.as_ref())
1435 {
1436 let matched_text = &text_str[m.start..m.end];
1437 if !self.validate_edit_chars(
1438 &self.patterns[pattern_idx],
1439 matched_text,
1440 restriction,
1441 ) {
1442 continue;
1443 }
1444 }
1445
1446 return Some((
1447 pattern_idx,
1448 m.start,
1449 FuzzyMatchResult {
1450 end: m.end,
1451 similarity: m.similarity,
1452 insertions: m.insertions,
1453 deletions: m.deletions,
1454 substitutions: m.substitutions,
1455 swaps: m.swaps,
1456 },
1457 ));
1458 }
1459
1460 if self
1462 .bitap_matchers
1463 .get(pattern_idx)
1464 .is_none_or(Option::is_none)
1465 {
1466 let nfa = &self.automata[pattern_idx];
1467 let candidates: FxHashSet<usize> = std::iter::once(pos).collect();
1468 if let Some(m) =
1469 nfa.find_first_with_candidates(text_str, pattern_threshold, &candidates)
1470 && m.start == pos
1471 {
1472 if let Some(restriction) = self
1474 .edit_char_restrictions
1475 .get(pattern_idx)
1476 .and_then(|r| r.as_ref())
1477 {
1478 let matched_text = &text_str[m.start..m.end];
1479 if !self.validate_edit_chars(
1480 &self.patterns[pattern_idx],
1481 matched_text,
1482 restriction,
1483 ) {
1484 continue;
1485 }
1486 }
1487
1488 return Some((
1489 pattern_idx,
1490 m.start,
1491 FuzzyMatchResult {
1492 end: m.end,
1493 similarity: m.similarity,
1494 insertions: m.insertions,
1495 deletions: m.deletions,
1496 substitutions: m.substitutions,
1497 swaps: m.swaps,
1498 },
1499 ));
1500 }
1501 }
1502 }
1503 }
1504 }
1505
1506 None
1507 }
1508
1509 pub fn find_first_multi_pattern_individual(
1515 &self,
1516 text: &[u8],
1517 threshold: f32,
1518 pattern_indices: &[usize],
1519 ) -> Option<(usize, usize, FuzzyMatchResult)> {
1520 if pattern_indices.is_empty() {
1521 return None;
1522 }
1523
1524 let mut best: Option<(usize, usize, FuzzyMatchResult)> = None;
1525
1526 for &pattern_idx in pattern_indices {
1528 if pattern_idx >= self.bitap_matchers.len() {
1529 continue;
1530 }
1531
1532 let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
1533
1534 let Some(bitap) = self.bitap_matchers[pattern_idx].as_ref() else {
1536 continue;
1537 };
1538
1539 if let Some(m) = bitap.find_first_streaming(text, pattern_threshold) {
1541 let restriction = self
1543 .edit_char_restrictions
1544 .get(pattern_idx)
1545 .and_then(|r| r.as_ref());
1546 let validation_passed = match (restriction, std::str::from_utf8(text)) {
1547 (Some(r), Ok(text_str)) => {
1548 let matched_text = &text_str[m.start..m.end];
1549 self.validate_edit_chars(&self.patterns[pattern_idx], matched_text, r)
1550 }
1551 _ => true, };
1553
1554 if validation_passed {
1555 let result = FuzzyMatchResult {
1556 end: m.end,
1557 similarity: m.similarity,
1558 insertions: m.insertions,
1559 deletions: m.deletions,
1560 substitutions: m.substitutions,
1561 swaps: m.swaps,
1562 };
1563 if best
1566 .as_ref()
1567 .is_none_or(|(_, best_start, _)| m.start < *best_start)
1568 {
1569 best = Some((pattern_idx, m.start, result));
1570 }
1571 continue;
1572 }
1573 }
1575
1576 if pattern_idx < self.automata.len() {
1578 let nfa = &self.automata[pattern_idx];
1579 if let Ok(text_str) = std::str::from_utf8(text) {
1580 let matches = nfa.find_all(text_str, pattern_threshold);
1582 if let Some(m) = matches.into_iter().min_by_key(|m| m.start) {
1583 if let Some(restriction) = self
1585 .edit_char_restrictions
1586 .get(pattern_idx)
1587 .and_then(|r| r.as_ref())
1588 {
1589 let matched_text = &text_str[m.start..m.end];
1590 if !self.validate_edit_chars(
1591 &self.patterns[pattern_idx],
1592 matched_text,
1593 restriction,
1594 ) {
1595 continue;
1596 }
1597 }
1598
1599 let result = FuzzyMatchResult {
1600 end: m.end,
1601 similarity: m.similarity,
1602 insertions: m.insertions,
1603 deletions: m.deletions,
1604 substitutions: m.substitutions,
1605 swaps: m.swaps,
1606 };
1607 if best
1609 .as_ref()
1610 .is_none_or(|(_, best_start, _)| m.start < *best_start)
1611 {
1612 best = Some((pattern_idx, m.start, result));
1613 }
1614 }
1615 }
1616 }
1617 }
1618
1619 best
1620 }
1621
1622 #[must_use]
1627 pub fn calculate_min_effective_threshold(&self, user_threshold: f32) -> f32 {
1628 let mut min_threshold = user_threshold;
1629
1630 for idx in 0..self.patterns.len() {
1631 let pattern_threshold = self.calculate_effective_threshold(idx, user_threshold);
1632 if pattern_threshold < min_threshold {
1633 min_threshold = pattern_threshold;
1634 }
1635 }
1636
1637 min_threshold
1638 }
1639
1640 pub fn search_all_multi_pattern(
1647 &self,
1648 text: &str,
1649 threshold: f32,
1650 pattern_indices: &[usize],
1651 ) -> CachedMatches {
1652 let mut cached = CachedMatches::default();
1653
1654 if pattern_indices.is_empty() {
1655 return cached;
1656 }
1657
1658 for &pattern_idx in pattern_indices {
1661 if pattern_idx >= self.automata.len() {
1662 continue;
1663 }
1664
1665 let pattern_threshold = self.calculate_effective_threshold(pattern_idx, threshold);
1666
1667 let matches = if let Some(bitap) = self
1669 .bitap_matchers
1670 .get(pattern_idx)
1671 .and_then(|b| b.as_ref())
1672 {
1673 bitap.find_all(text, pattern_threshold)
1674 } else {
1675 self.automata[pattern_idx].find_all(text, pattern_threshold)
1676 };
1677
1678 for m in matches {
1679 if let Some(restriction) = self
1681 .edit_char_restrictions
1682 .get(pattern_idx)
1683 .and_then(|r| r.as_ref())
1684 {
1685 let matched_text = &text[m.start..m.end];
1686 if !self.validate_edit_chars(
1687 &self.patterns[pattern_idx],
1688 matched_text,
1689 restriction,
1690 ) {
1691 continue;
1692 }
1693 }
1694
1695 let result = FuzzyMatchResult {
1696 end: m.end,
1697 similarity: m.similarity,
1698 insertions: m.insertions,
1699 deletions: m.deletions,
1700 substitutions: m.substitutions,
1701 swaps: m.swaps,
1702 };
1703
1704 cached
1705 .by_pattern_and_start
1706 .entry((pattern_idx, m.start))
1707 .or_default()
1708 .push(result);
1709 }
1710 }
1711
1712 for matches in cached.by_pattern_and_start.values_mut() {
1714 matches.sort_by(|a, b| {
1715 b.similarity
1716 .partial_cmp(&a.similarity)
1717 .unwrap_or(std::cmp::Ordering::Equal)
1718 });
1719 }
1720
1721 cached
1722 }
1723
1724 pub fn find_at_cached(
1726 &self,
1727 cached: &CachedMatches,
1728 pattern_index: usize,
1729 from: usize,
1730 ) -> Option<FuzzyMatchResult> {
1731 cached.get(pattern_index, from).cloned()
1732 }
1733
1734 pub fn find_at(
1736 &self,
1737 text: &str,
1738 pattern_index: usize,
1739 from: usize,
1740 threshold: f32,
1741 ) -> Option<FuzzyMatchResult> {
1742 let pattern = self.patterns.get(pattern_index)?;
1743
1744 let max_edits = self.limits.get(pattern_index).and_then(|lim| {
1747 lim.as_ref().map(|l| {
1748 l.get_edits().unwrap_or_else(|| {
1749 let i = l.get_insertions().unwrap_or(0);
1750 let d = l.get_deletions().unwrap_or(0);
1751 let s = l.get_substitutions().unwrap_or(0);
1752 let t = l.get_swaps().unwrap_or(0);
1753 i.saturating_add(d).saturating_add(s).saturating_add(t)
1754 })
1755 })
1756 });
1757
1758 if from >= text.len() {
1760 let pattern_char_len = pattern.chars().count();
1762 if let Some(max) = max_edits
1763 && pattern_char_len <= max as usize
1764 {
1765 let deletions = pattern_char_len as u8;
1767 let max_len = pattern_char_len.max(1) as f32;
1768 let sim = (1.0 - f32::from(deletions) / max_len).max(0.0);
1769 if sim >= threshold {
1770 return Some(FuzzyMatchResult {
1771 end: from,
1772 similarity: sim,
1773 insertions: 0,
1774 deletions,
1775 substitutions: 0,
1776 swaps: 0,
1777 });
1778 }
1779 }
1780 return None;
1781 }
1782
1783 let substring = &text[from..];
1784
1785 if max_edits.is_none() || max_edits == Some(0) {
1786 if self.case_insensitive {
1788 if substring
1789 .chars()
1790 .take(pattern.chars().count())
1791 .zip(pattern.chars())
1792 .all(|(t, p)| t.to_lowercase().eq(p.to_lowercase()))
1793 {
1794 let end_byte = pattern.len().min(substring.len());
1795 let actual_end = if end_byte <= substring.len() {
1797 let mut e = end_byte;
1798 while e < substring.len() && !substring.is_char_boundary(e) {
1799 e += 1;
1800 }
1801 e.min(substring.len())
1802 } else {
1803 end_byte
1804 };
1805 return Some(FuzzyMatchResult {
1806 end: from + actual_end,
1807 similarity: 1.0,
1808 insertions: 0,
1809 deletions: 0,
1810 substitutions: 0,
1811 swaps: 0,
1812 });
1813 }
1814 } else if substring.starts_with(pattern) {
1815 return Some(FuzzyMatchResult {
1816 end: from + pattern.len(),
1817 similarity: 1.0,
1818 insertions: 0,
1819 deletions: 0,
1820 substitutions: 0,
1821 swaps: 0,
1822 });
1823 }
1824 return None;
1825 }
1826
1827 let nfa = self.automata.get(pattern_index)?;
1829 let effective_threshold = self.calculate_effective_threshold(pattern_index, threshold);
1830 let matches = nfa.find_all(substring, effective_threshold);
1831
1832 for m in matches {
1834 if m.start == 0 {
1835 if let Some(restriction) = self
1837 .edit_char_restrictions
1838 .get(pattern_index)
1839 .and_then(|r| r.as_ref())
1840 {
1841 let matched_text = &substring[m.start..m.end];
1842 if !self.validate_edit_chars(
1843 &self.patterns[pattern_index],
1844 matched_text,
1845 restriction,
1846 ) {
1847 continue;
1848 }
1849 }
1850
1851 return Some(FuzzyMatchResult {
1852 end: from + m.end,
1853 similarity: m.similarity,
1854 insertions: m.insertions,
1855 deletions: m.deletions,
1856 substitutions: m.substitutions,
1857 swaps: m.swaps,
1858 });
1859 }
1860 }
1861
1862 None
1863 }
1864
1865 pub fn find_with_boundary_insertions(
1868 &self,
1869 text: &str,
1870 pattern_index: usize,
1871 from: usize,
1872 to: Option<usize>,
1873 threshold: f32,
1874 cached: Option<&CachedMatches>,
1875 ) -> Option<FuzzyMatchResult> {
1876 let cached = cached?;
1878
1879 let limits = self.limits.get(pattern_index).and_then(|l| l.as_ref())?;
1880 let max_edits_val = limits.get_edits().unwrap_or_else(|| {
1881 let i = limits.get_insertions().unwrap_or(0);
1882 let d = limits.get_deletions().unwrap_or(0);
1883 let s = limits.get_substitutions().unwrap_or(0);
1884 let t = limits.get_swaps().unwrap_or(0);
1885 i.saturating_add(d).saturating_add(s).saturating_add(t)
1886 });
1887 let max_insertions = limits.get_insertions().unwrap_or(max_edits_val);
1888
1889 let effective_threshold = self.calculate_effective_threshold(pattern_index, threshold);
1890
1891 let max_window = max_insertions as usize;
1894 let search_start = from.saturating_sub(max_window);
1895
1896 let matches: Vec<_> = (search_start..=from)
1898 .filter_map(|start| {
1899 cached
1900 .get_all(pattern_index, start)
1901 .map(|results| results.iter().map(move |r| (start, r)))
1902 })
1903 .flatten()
1904 .collect();
1905
1906 let mut best: Option<FuzzyMatchResult> = None;
1907
1908 for (match_start, m) in matches {
1909 let start_insertions = (from.saturating_sub(match_start)) as u8;
1912 let end_insertions = if let Some(expected_end) = to {
1914 if m.end < expected_end {
1915 (expected_end - m.end) as u8
1916 } else {
1917 0
1918 }
1919 } else {
1920 0
1921 };
1922
1923 let total_boundary_insertions = start_insertions + end_insertions;
1924 let total_insertions = m.insertions + total_boundary_insertions;
1925 let total_edits =
1926 m.insertions + m.deletions + m.substitutions + total_boundary_insertions;
1927
1928 if total_edits > max_edits_val || total_insertions > max_insertions {
1929 continue;
1930 }
1931
1932 if let Some(restriction) = self
1934 .edit_char_restrictions
1935 .get(pattern_index)
1936 .and_then(|r| r.as_ref())
1937 {
1938 let mut boundary_valid = true;
1940 if start_insertions > 0 && match_start < from {
1941 for ch in text[match_start..from].chars() {
1942 if !restriction.allows(ch) {
1943 boundary_valid = false;
1944 break;
1945 }
1946 }
1947 }
1948 if boundary_valid
1950 && end_insertions > 0
1951 && let Some(expected_end) = to
1952 && m.end < expected_end
1953 && m.end < text.len()
1954 {
1955 let end_slice_end = expected_end.min(text.len());
1956 for ch in text[m.end..end_slice_end].chars() {
1957 if !restriction.allows(ch) {
1958 boundary_valid = false;
1959 break;
1960 }
1961 }
1962 }
1963 if !boundary_valid {
1964 continue;
1965 }
1966 }
1967
1968 let pattern_len = self.patterns[pattern_index].chars().count() as f32;
1970 let insertion_penalty = 0.5;
1971 let boundary_penalty = f32::from(total_boundary_insertions) * insertion_penalty;
1972 let adjusted_similarity = if pattern_len > 0.0 {
1973 ((pattern_len - boundary_penalty) / pattern_len).max(0.0) * m.similarity
1974 } else {
1975 m.similarity
1976 };
1977
1978 if adjusted_similarity < effective_threshold {
1979 continue;
1980 }
1981
1982 let result = FuzzyMatchResult {
1983 end: to.unwrap_or(m.end),
1984 similarity: adjusted_similarity,
1985 insertions: total_insertions,
1986 deletions: m.deletions,
1987 substitutions: m.substitutions,
1988 swaps: m.swaps,
1989 };
1990
1991 if best.as_ref().is_none_or(|b| {
1992 result.similarity > b.similarity
1993 || (result.similarity == b.similarity && result.total_edits() < b.total_edits())
1994 }) {
1995 best = Some(result);
1996 }
1997 }
1998
1999 best
2000 }
2001
2002 #[allow(clippy::unused_self)]
2008 fn calculate_effective_threshold(&self, _pattern_index: usize, user_threshold: f32) -> f32 {
2009 user_threshold
2013 }
2014
2015 pub fn pattern_text(&self, index: usize) -> Option<&str> {
2017 self.patterns.get(index).map(String::as_str)
2018 }
2019
2020 fn validate_edit_chars(
2024 &self,
2025 pattern: &str,
2026 matched_text: &str,
2027 restriction: &EditCharRestriction,
2028 ) -> bool {
2029 if pattern == matched_text {
2031 return true;
2032 }
2033
2034 if pattern.is_ascii() && matched_text.is_ascii() {
2036 return self.validate_edit_chars_ascii(
2037 pattern.as_bytes(),
2038 matched_text.as_bytes(),
2039 restriction,
2040 );
2041 }
2042
2043 let pattern_chars: Vec<char> = pattern.chars().collect();
2045 let text_chars: Vec<char> = matched_text.chars().collect();
2046 self.validate_edit_chars_slice(&pattern_chars, &text_chars, restriction)
2047 }
2048
2049 #[inline]
2051 #[allow(clippy::unused_self)]
2052 fn validate_edit_chars_ascii(
2053 &self,
2054 pattern: &[u8],
2055 text: &[u8],
2056 restriction: &EditCharRestriction,
2057 ) -> bool {
2058 let m = pattern.len();
2059 let n = text.len();
2060
2061 #[derive(Clone, Copy)]
2062 enum Op {
2063 None,
2064 Insert,
2065 Delete,
2066 Substitute,
2067 Transpose,
2068 }
2069
2070 const STACK_LIMIT: usize = 32;
2072 if m < STACK_LIMIT && n < STACK_LIMIT {
2073 let mut dp = [[(0usize, Op::None); STACK_LIMIT]; STACK_LIMIT];
2074
2075 for i in 1..=m {
2076 dp[i][0] = (i, Op::Delete);
2077 }
2078 for j in 1..=n {
2079 dp[0][j] = (j, Op::Insert);
2080 }
2081
2082 for i in 1..=m {
2083 for j in 1..=n {
2084 if pattern[i - 1] == text[j - 1] {
2085 dp[i][j] = (dp[i - 1][j - 1].0, Op::None);
2086 } else {
2087 let sub = dp[i - 1][j - 1].0 + 1;
2088 let del = dp[i - 1][j].0 + 1;
2089 let ins = dp[i][j - 1].0 + 1;
2090
2091 let trans = if i > 1
2092 && j > 1
2093 && pattern[i - 1] == text[j - 2]
2094 && pattern[i - 2] == text[j - 1]
2095 {
2096 dp[i - 2][j - 2].0 + 1
2097 } else {
2098 usize::MAX
2099 };
2100
2101 let mut best = (sub, Op::Substitute);
2102 if del < best.0 {
2103 best = (del, Op::Delete);
2104 }
2105 if ins < best.0 {
2106 best = (ins, Op::Insert);
2107 }
2108 if trans < best.0 {
2109 best = (trans, Op::Transpose);
2110 }
2111 dp[i][j] = best;
2112 }
2113 }
2114 }
2115
2116 let (mut i, mut j) = (m, n);
2118 while i > 0 || j > 0 {
2119 match dp[i][j].1 {
2120 Op::None => {
2121 i -= 1;
2122 j -= 1;
2123 }
2124 Op::Substitute => {
2125 if !restriction.allows(text[j - 1] as char) {
2126 return false;
2127 }
2128 i -= 1;
2129 j -= 1;
2130 }
2131 Op::Delete => {
2132 i -= 1;
2133 }
2134 Op::Insert => {
2135 if !restriction.allows(text[j - 1] as char) {
2136 return false;
2137 }
2138 j -= 1;
2139 }
2140 Op::Transpose => {
2141 i -= 2;
2142 j -= 2;
2143 }
2144 }
2145 }
2146 return true;
2147 }
2148
2149 let mut dp = vec![vec![(0usize, Op::None); n + 1]; m + 1];
2151
2152 for i in 1..=m {
2153 dp[i][0] = (i, Op::Delete);
2154 }
2155 for j in 1..=n {
2156 dp[0][j] = (j, Op::Insert);
2157 }
2158
2159 for i in 1..=m {
2160 for j in 1..=n {
2161 if pattern[i - 1] == text[j - 1] {
2162 dp[i][j] = (dp[i - 1][j - 1].0, Op::None);
2163 } else {
2164 let sub = dp[i - 1][j - 1].0 + 1;
2165 let del = dp[i - 1][j].0 + 1;
2166 let ins = dp[i][j - 1].0 + 1;
2167
2168 let trans = if i > 1
2169 && j > 1
2170 && pattern[i - 1] == text[j - 2]
2171 && pattern[i - 2] == text[j - 1]
2172 {
2173 dp[i - 2][j - 2].0 + 1
2174 } else {
2175 usize::MAX
2176 };
2177
2178 let mut best = (sub, Op::Substitute);
2179 if del < best.0 {
2180 best = (del, Op::Delete);
2181 }
2182 if ins < best.0 {
2183 best = (ins, Op::Insert);
2184 }
2185 if trans < best.0 {
2186 best = (trans, Op::Transpose);
2187 }
2188 dp[i][j] = best;
2189 }
2190 }
2191 }
2192
2193 let (mut i, mut j) = (m, n);
2194 while i > 0 || j > 0 {
2195 match dp[i][j].1 {
2196 Op::None => {
2197 i -= 1;
2198 j -= 1;
2199 }
2200 Op::Substitute => {
2201 if !restriction.allows(text[j - 1] as char) {
2202 return false;
2203 }
2204 i -= 1;
2205 j -= 1;
2206 }
2207 Op::Delete => {
2208 i -= 1;
2209 }
2210 Op::Insert => {
2211 if !restriction.allows(text[j - 1] as char) {
2212 return false;
2213 }
2214 j -= 1;
2215 }
2216 Op::Transpose => {
2217 i -= 2;
2218 j -= 2;
2219 }
2220 }
2221 }
2222 true
2223 }
2224
2225 #[inline]
2227 #[allow(clippy::unused_self)]
2228 fn validate_edit_chars_slice(
2229 &self,
2230 pattern: &[char],
2231 text: &[char],
2232 restriction: &EditCharRestriction,
2233 ) -> bool {
2234 let m = pattern.len();
2235 let n = text.len();
2236
2237 #[derive(Clone, Copy)]
2238 enum Op {
2239 None,
2240 Insert,
2241 Delete,
2242 Substitute,
2243 Transpose,
2244 }
2245
2246 const STACK_LIMIT: usize = 32;
2248 if m < STACK_LIMIT && n < STACK_LIMIT {
2249 let mut dp = [[(0usize, Op::None); STACK_LIMIT]; STACK_LIMIT];
2250
2251 for i in 1..=m {
2252 dp[i][0] = (i, Op::Delete);
2253 }
2254 for j in 1..=n {
2255 dp[0][j] = (j, Op::Insert);
2256 }
2257
2258 for i in 1..=m {
2259 for j in 1..=n {
2260 if pattern[i - 1] == text[j - 1] {
2261 dp[i][j] = (dp[i - 1][j - 1].0, Op::None);
2262 } else {
2263 let sub = dp[i - 1][j - 1].0 + 1;
2264 let del = dp[i - 1][j].0 + 1;
2265 let ins = dp[i][j - 1].0 + 1;
2266
2267 let trans = if i > 1
2268 && j > 1
2269 && pattern[i - 1] == text[j - 2]
2270 && pattern[i - 2] == text[j - 1]
2271 {
2272 dp[i - 2][j - 2].0 + 1
2273 } else {
2274 usize::MAX
2275 };
2276
2277 let mut best = (sub, Op::Substitute);
2278 if del < best.0 {
2279 best = (del, Op::Delete);
2280 }
2281 if ins < best.0 {
2282 best = (ins, Op::Insert);
2283 }
2284 if trans < best.0 {
2285 best = (trans, Op::Transpose);
2286 }
2287 dp[i][j] = best;
2288 }
2289 }
2290 }
2291
2292 let (mut i, mut j) = (m, n);
2293 while i > 0 || j > 0 {
2294 match dp[i][j].1 {
2295 Op::None => {
2296 i -= 1;
2297 j -= 1;
2298 }
2299 Op::Substitute => {
2300 if !restriction.allows(text[j - 1]) {
2301 return false;
2302 }
2303 i -= 1;
2304 j -= 1;
2305 }
2306 Op::Delete => {
2307 i -= 1;
2308 }
2309 Op::Insert => {
2310 if !restriction.allows(text[j - 1]) {
2311 return false;
2312 }
2313 j -= 1;
2314 }
2315 Op::Transpose => {
2316 i -= 2;
2317 j -= 2;
2318 }
2319 }
2320 }
2321 return true;
2322 }
2323
2324 let mut dp = vec![vec![(0usize, Op::None); n + 1]; m + 1];
2326
2327 for i in 1..=m {
2328 dp[i][0] = (i, Op::Delete);
2329 }
2330 for j in 1..=n {
2331 dp[0][j] = (j, Op::Insert);
2332 }
2333
2334 for i in 1..=m {
2335 for j in 1..=n {
2336 if pattern[i - 1] == text[j - 1] {
2337 dp[i][j] = (dp[i - 1][j - 1].0, Op::None);
2338 } else {
2339 let sub = dp[i - 1][j - 1].0 + 1;
2340 let del = dp[i - 1][j].0 + 1;
2341 let ins = dp[i][j - 1].0 + 1;
2342
2343 let trans = if i > 1
2344 && j > 1
2345 && pattern[i - 1] == text[j - 2]
2346 && pattern[i - 2] == text[j - 1]
2347 {
2348 dp[i - 2][j - 2].0 + 1
2349 } else {
2350 usize::MAX
2351 };
2352
2353 let mut best = (sub, Op::Substitute);
2354 if del < best.0 {
2355 best = (del, Op::Delete);
2356 }
2357 if ins < best.0 {
2358 best = (ins, Op::Insert);
2359 }
2360 if trans < best.0 {
2361 best = (trans, Op::Transpose);
2362 }
2363 dp[i][j] = best;
2364 }
2365 }
2366 }
2367
2368 let (mut i, mut j) = (m, n);
2369 while i > 0 || j > 0 {
2370 match dp[i][j].1 {
2371 Op::None => {
2372 i -= 1;
2373 j -= 1;
2374 }
2375 Op::Substitute => {
2376 if !restriction.allows(text[j - 1]) {
2377 return false;
2378 }
2379 i -= 1;
2380 j -= 1;
2381 }
2382 Op::Delete => {
2383 i -= 1;
2384 }
2385 Op::Insert => {
2386 if !restriction.allows(text[j - 1]) {
2387 return false;
2388 }
2389 j -= 1;
2390 }
2391 Op::Transpose => {
2392 i -= 2;
2393 j -= 2;
2394 }
2395 }
2396 }
2397 true
2398 }
2399}
2400
2401#[derive(Debug, Clone)]
2403pub struct FuzzyMatchResult {
2404 pub end: usize,
2406 pub similarity: f32,
2408 pub insertions: u8,
2410 pub deletions: u8,
2412 pub substitutions: u8,
2414 pub swaps: u8,
2416}
2417
2418impl FuzzyMatchResult {
2419 #[must_use]
2421 pub fn total_edits(&self) -> u8 {
2422 self.insertions
2423 .saturating_add(self.deletions)
2424 .saturating_add(self.substitutions)
2425 .saturating_add(self.swaps)
2426 }
2427}
2428
2429#[test]
2430fn test_search_all_the_quick() {
2431 use crate::ir::LiteralPattern;
2432 use crate::types::FuzzyLimits;
2433
2434 let limits = FuzzyLimits::new().edits(1);
2436 let lit = LiteralPattern::new("quik".to_string(), Some(limits), None);
2437
2438 let bridge = FuzzyBridge::new(&[lit], None, None, false).unwrap();
2439
2440 let text = "The quick brown fox";
2441 let cached = bridge.search_all(text, 0.5);
2442
2443 println!("search_all results for '{text}' with pattern 'quik~1':");
2444 println!(" by_pattern_and_start: {:?}", cached.by_pattern_and_start);
2445
2446 assert!(
2447 !cached.by_pattern_and_start.is_empty(),
2448 "Should find at least one match"
2449 );
2450}