1#![allow(
4 clippy::needless_range_loop,
5 clippy::similar_names,
6 clippy::missing_errors_doc,
7 clippy::match_same_arms,
8 clippy::too_many_lines,
9 clippy::let_underscore_untyped,
10 clippy::float_cmp,
11 clippy::allow_attributes,
12 let_underscore_drop
13)]
14use std::collections::HashMap;
17use std::sync::Arc;
18
19use smartcow::SmartCow;
20
21use super::builder::{FuzzyRegexBuilder, RegexConfig};
22use super::match_result::{CaptureMatches, Captures, Match, Matches, Split};
23use crate::compiler::build_nfa;
24use crate::engine::{Dfa, FuzzyBridge, MatchResult, Matcher, MatcherConfig, Prefilter};
25use crate::error::Result;
26use crate::ir::{Hir, LiteralPattern, Nfa, lower_with_unicode};
27use crate::parser::{Anchor, Ast, parse_with_flags};
28use std::cell::RefCell;
29
30#[allow(clippy::struct_excessive_bools)]
42pub struct FuzzyRegex {
43 pattern: String,
45 nfa: Nfa,
47 fuzzy_bridge: Option<FuzzyBridge>,
49 literals: Vec<LiteralPattern>,
51 capture_count: usize,
53 named_groups: HashMap<String, usize>,
55 config: RegexConfig,
57 prefilter: Arc<Prefilter>,
59 anchored: bool,
61 has_lazy: bool,
63 ends_with_end_anchor: bool,
65 max_match_length: Option<usize>,
67 is_word_bounded_literal: bool,
69 dfa: Option<RefCell<Dfa>>,
72 word_lists: HashMap<SmartCow<'static>, Vec<SmartCow<'static>>>,
75}
76
77impl std::fmt::Debug for FuzzyRegex {
78 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79 f.debug_struct("FuzzyRegex")
80 .field("pattern", &self.pattern)
81 .field("capture_count", &self.capture_count)
82 .field("anchored", &self.anchored)
83 .field("has_dfa", &self.dfa.is_some())
84 .finish_non_exhaustive()
85 }
86}
87
88impl FuzzyRegex {
89 pub fn new(pattern: &str) -> Result<Self> {
97 FuzzyRegexBuilder::new(pattern).build()
98 }
99
100 #[must_use]
102 pub fn builder(pattern: &str) -> FuzzyRegexBuilder {
103 FuzzyRegexBuilder::new(pattern)
104 }
105
106 pub(crate) fn compile(pattern: String, mut config: RegexConfig) -> Result<Self> {
108 let result = parse_with_flags(&pattern, config.verbose, config.dot_all, config.ungreedy)?;
110 let ast = result.ast;
111
112 if result.flags.best_match {
114 config.match_flags.best_match = true;
115 }
116 if result.flags.enhance_match {
117 config.match_flags.enhance_match = true;
118 }
119 if result.flags.posix {
120 config.match_flags.posix = true;
121 }
122 if result.flags.verbose {
123 config.verbose = true;
124 }
125 if result.flags.dot_all {
126 config.dot_all = true;
127 }
128 if result.flags.multi_line {
129 config.multi_line = true;
130 }
131 if result.flags.ungreedy {
132 config.ungreedy = true;
133 }
134 if result.flags.case_insensitive {
135 config.case_insensitive = true;
136 }
137 if result.flags.global {
138 config.match_flags.global = true;
139 }
140 if result.flags.unicode {
141 config.match_flags.unicode = true;
142 }
143
144 let (capture_count, named_groups) = collect_captures(&ast);
146
147 let hir = lower_with_unicode(&ast, config.default_edits, config.match_flags.unicode);
149
150 let (nfa, literals) = build_nfa(&hir);
152
153 let fuzzy_bridge = FuzzyBridge::new(
155 &literals,
156 config.default_limits.clone(),
157 config.penalties.clone(),
158 config.case_insensitive,
159 );
160
161 let prefilter = Arc::new(create_prefilter_from_hir(&hir, config.case_insensitive));
163
164 let anchored = is_anchored_at_start(&hir);
166
167 let has_lazy = nfa.has_lazy_quantifiers();
169
170 let ends_with_end_anchor = nfa.ends_with_end_anchor();
172
173 let max_match_length = if ends_with_end_anchor {
175 let (_, max_len) = nfa.length_range(|pattern_idx| {
176 fuzzy_bridge.as_ref().and_then(|b| {
177 let char_len = b.pattern_char_len(pattern_idx)?;
178 let max_edits = b.pattern_max_edits(pattern_idx).unwrap_or(0);
179 Some((char_len, max_edits))
180 })
181 });
182 max_len
183 } else {
184 None
185 };
186
187 let is_word_bounded_literal = nfa.is_word_bounded_literal();
189
190 let has_reset_match_start = nfa.has_reset_match_start();
198 let has_alternation = nfa.is_simple_alternation();
199 let has_lookahead = nfa.has_lookahead();
200 let has_word_boundary = nfa.has_word_boundary();
201 let dfa = if capture_count == 0
202 && !has_lazy
203 && !has_reset_match_start
204 && !has_alternation
205 && !has_lookahead
206 && !has_word_boundary
207 {
208 Dfa::from_nfa(
209 &nfa,
210 fuzzy_bridge.as_ref(),
211 config.case_insensitive,
212 config.multi_line,
213 )
214 .map(RefCell::new)
215 } else {
216 None
217 };
218
219 Ok(FuzzyRegex {
220 pattern,
221 nfa,
222 fuzzy_bridge,
223 literals,
224 capture_count,
225 named_groups,
226 config,
227 prefilter,
228 anchored,
229 has_lazy,
230 ends_with_end_anchor,
231 max_match_length,
232 is_word_bounded_literal,
233 dfa,
234 word_lists: HashMap::new(),
235 })
236 }
237
238 #[must_use]
240 pub fn as_str(&self) -> &str {
241 &self.pattern
242 }
243
244 #[must_use]
246 pub fn captures_len(&self) -> usize {
247 self.capture_count
248 }
249
250 fn make_match<'a>(
252 &self,
253 text: &'a str,
254 start: usize,
255 end: usize,
256 similarity: f32,
257 edits: crate::engine::EditCounts,
258 ) -> Match<'a> {
259 let is_partial = self.config.partial && end == text.len() && start < end;
260 Match::new_full(text, start, end, similarity, edits, None, is_partial)
261 }
262
263 fn check_timeout(&self, start: &std::time::Instant) -> Option<crate::error::Error> {
266 if let Some(timeout) = self.config.timeout
267 && start.elapsed() > timeout
268 {
269 return Some(crate::error::Error::Timeout { duration: timeout });
270 }
271 None
272 }
273
274 #[must_use]
276 pub fn similarity_threshold(&self) -> f32 {
277 self.config.similarity_threshold
278 }
279
280 #[must_use]
284 pub fn literals(&self) -> &[LiteralPattern] {
285 &self.literals
286 }
287
288 #[must_use]
291 pub fn is_simple_fuzzy(&self) -> bool {
292 self.nfa.is_simple_fuzzy_only()
293 && self
294 .fuzzy_bridge
295 .as_ref()
296 .is_some_and(|b| b.pattern_count() == 1)
297 }
298
299 pub fn set_word_list(
311 &mut self,
312 name: impl Into<SmartCow<'static>>,
313 words: Vec<impl Into<SmartCow<'static>>>,
314 ) {
315 self.word_lists
316 .insert(name.into(), words.into_iter().map(Into::into).collect());
317 }
318
319 #[must_use]
321 pub fn get_word_list(&self, name: &str) -> Option<&[SmartCow<'static>]> {
322 self.word_lists.get(name).map(Vec::as_slice)
323 }
324
325 #[must_use]
330 pub fn named_lists(&self) -> &HashMap<SmartCow<'static>, Vec<SmartCow<'static>>> {
331 &self.word_lists
332 }
333
334 #[must_use]
336 pub fn has_word_lists(&self) -> bool {
337 !self.word_lists.is_empty()
338 }
339
340 fn is_unanchored(&self) -> bool {
344 !self.anchored || self.config.multi_line
345 }
346
347 pub fn is_match(&self, text: &str) -> bool {
349 self.find(text).is_some()
350 }
351
352 pub fn is_match_at(&self, text: &str, start: usize) -> bool {
354 self.find_at(text, start).is_some()
355 }
356
357 pub fn is_full_match(&self, text: &str) -> bool {
361 self.fullmatch(text).is_some()
362 }
363
364 pub fn fullmatch<'t>(&self, text: &'t str) -> Option<Match<'t>> {
369 let m = self.find(text)?;
370 if m.start() == 0 && m.end() == text.len() {
371 Some(m)
372 } else {
373 None
374 }
375 }
376
377 pub fn fullmatch_at<'t>(&self, text: &'t str, start: usize) -> Option<Match<'t>> {
381 if start > text.len() {
382 return None;
383 }
384 let m = self.find_at(text, start)?;
385 if m.start() == start && m.end() == text.len() {
386 Some(m)
387 } else {
388 None
389 }
390 }
391
392 #[inline]
396 pub fn find<'t>(&self, text: &'t str) -> Option<Match<'t>> {
397 if self.config.match_flags.best_match
399 || self.config.match_flags.enhance_match
400 || self.config.match_flags.posix
401 {
402 let matcher = self.create_matcher(self.is_unanchored());
403 return matcher.find(text).map(|m| self.convert_match(text, m));
404 }
405
406 if let Some(ref dfa_cell) = self.dfa
409 && self.word_lists.is_empty()
410 {
411 let mut dfa = dfa_cell.borrow_mut();
412 return dfa.find(text).map(|m| {
413 self.make_match(
414 text,
415 m.start,
416 m.end,
417 1.0,
418 crate::engine::EditCounts::default(),
419 )
420 });
421 }
422
423 if !self.word_lists.is_empty() {
425 return self.find_word_list_first(text, self.config.similarity_threshold);
426 }
427
428 if self.is_simple_fuzzy()
433 && let Some(ref bridge) = self.fuzzy_bridge
434 {
435 let threshold = self.config.similarity_threshold;
436 if let Some(m) = bridge.search_first(text, threshold, 0) {
437 return Some(self.make_match(
438 text,
439 m.start,
440 m.end,
441 m.similarity,
442 crate::engine::EditCounts {
443 insertions: m.insertions,
444 deletions: m.deletions,
445 substitutions: m.substitutions,
446 swaps: m.swaps,
447 },
448 ));
449 }
450 return None;
451 }
452
453 self.find_iter(text).next()
455 }
456
457 pub fn find_with_timeout<'t>(
462 &self,
463 text: &'t str,
464 timeout: std::time::Duration,
465 ) -> crate::error::Result<Option<Match<'t>>> {
466 let start = std::time::Instant::now();
467
468 if start.elapsed() > timeout {
470 return Err(crate::error::Error::Timeout { duration: timeout });
471 }
472
473 let result = self.find(text);
475
476 if start.elapsed() > timeout {
478 return Err(crate::error::Error::Timeout { duration: timeout });
479 }
480
481 Ok(result)
482 }
483
484 pub fn find_with_config_timeout<'t>(
487 &self,
488 text: &'t str,
489 ) -> crate::error::Result<Option<Match<'t>>> {
490 let start = std::time::Instant::now();
491
492 if let Some(err) = self.check_timeout(&start) {
494 return Err(err);
495 }
496
497 let result = self.find(text);
498
499 if let Some(err) = self.check_timeout(&start) {
501 return Err(err);
502 }
503
504 Ok(result)
505 }
506
507 fn find_word_list_first<'a>(&self, text: &'a str, threshold: f32) -> Option<Match<'a>> {
510 if self.word_lists.is_empty() {
511 return None;
512 }
513
514 let max_edits = self
516 .fuzzy_bridge
517 .as_ref()
518 .and_then(|b| b.limits().first())
519 .and_then(|l| l.as_ref())
520 .and_then(super::super::types::FuzzyLimits::get_edits)
521 .unwrap_or(1) as usize;
522
523 let first_chars: std::collections::HashSet<char> = self
525 .word_lists
526 .values()
527 .flat_map(|words| words.iter().filter_map(|w| w.chars().next()))
528 .collect();
529
530 let has_candidate = text.chars().any(|c| first_chars.contains(&c));
532 if !has_candidate {
533 return None;
534 }
535
536 let mut best_match: Option<(usize, usize, f32, crate::engine::EditCounts)> = None;
538
539 for words in self.word_lists.values() {
540 for word in words {
541 let pattern_len = word.len();
542 if pattern_len == 0 {
543 continue;
544 }
545
546 if let Some(first) = word.chars().next()
548 && !text.contains(first)
549 {
550 continue;
551 }
552
553 if let Some(pos) = text.find(AsRef::<str>::as_ref(word)) {
555 let end = pos + pattern_len;
556 if threshold <= 1.0 && end > pos {
558 return Some(Match::new(
559 text,
560 pos,
561 end,
562 1.0,
563 crate::engine::EditCounts::default(),
564 ));
565 }
566 } else if max_edits > 0 {
567 let start_max = text
569 .len()
570 .saturating_sub(pattern_len.saturating_sub(max_edits));
571
572 for start in 0..=start_max {
573 let max_end = (start + pattern_len + max_edits).min(text.len());
574 let min_end =
575 (start + pattern_len.saturating_sub(max_edits)).max(start + 1);
576
577 for end in min_end..=max_end {
578 let substr = &text[start..end];
579 if substr.is_empty() {
580 continue;
581 }
582 let edits = simple_levenshtein(word, substr);
583 if edits <= max_edits as u32 && edits > 0 {
584 let sim =
585 1.0 - (edits as f32 / pattern_len.max(substr.len()) as f32);
586 if sim >= threshold {
587 match &best_match {
588 None => {
589 best_match = Some((
590 start,
591 end,
592 sim,
593 crate::engine::EditCounts {
594 insertions: if substr.len() > pattern_len {
595 (substr.len() - pattern_len) as u8
596 } else {
597 0
598 },
599 deletions: if pattern_len > substr.len() {
600 (pattern_len - substr.len()) as u8
601 } else {
602 0
603 },
604 substitutions: edits.min(pattern_len as u32)
605 as u8,
606 swaps: 0,
607 },
608 ));
609 }
610 Some((_, _, best_sim, _)) if sim > *best_sim => {
611 best_match = Some((
612 start,
613 end,
614 sim,
615 crate::engine::EditCounts {
616 insertions: if substr.len() > pattern_len {
617 (substr.len() - pattern_len) as u8
618 } else {
619 0
620 },
621 deletions: if pattern_len > substr.len() {
622 (pattern_len - substr.len()) as u8
623 } else {
624 0
625 },
626 substitutions: edits.min(pattern_len as u32)
627 as u8,
628 swaps: 0,
629 },
630 ));
631 }
632 _ => {}
633 }
634 if sim >= 1.0 {
636 return best_match.map(|(start, end, sim, edits)| {
637 Match::new(text, start, end, sim, edits)
638 });
639 }
640 }
641 }
642 }
643 }
644 }
645 }
646 }
647
648 best_match.map(|(start, end, sim, edits)| Match::new(text, start, end, sim, edits))
649 }
650
651 fn find_all_word_list<'a>(&self, text: &'a str) -> Vec<Match<'a>> {
653 if self.word_lists.is_empty() {
654 return Vec::new();
655 }
656
657 let threshold = self.config.similarity_threshold;
658
659 let max_edits = self
661 .fuzzy_bridge
662 .as_ref()
663 .and_then(|b| b.limits().first())
664 .and_then(|l| l.as_ref())
665 .and_then(super::super::types::FuzzyLimits::get_edits)
666 .unwrap_or(1) as usize;
667
668 let first_chars: std::collections::HashSet<char> = self
670 .word_lists
671 .values()
672 .flat_map(|words| words.iter().filter_map(|w| w.chars().next()))
673 .collect();
674
675 let has_candidate = text.chars().any(|c| first_chars.contains(&c));
677 if !has_candidate {
678 return Vec::new();
679 }
680
681 let mut matches = Vec::new();
682 let mut last_end = 0;
683
684 while last_end < text.len() {
686 let search_text = &text[last_end..];
687 let mut found_match: Option<(usize, usize, f32, crate::engine::EditCounts)> = None;
688 let mut found_exact_match: Option<(usize, usize)> = None;
689
690 for words in self.word_lists.values() {
691 for word in words {
692 let pattern_len = word.len();
693 if pattern_len == 0 {
694 continue;
695 }
696
697 if let Some(first) = word.chars().next()
699 && !search_text.contains(first)
700 {
701 continue;
702 }
703
704 if let Some(pos) = search_text.find(AsRef::<str>::as_ref(word)) {
706 let end = pos + pattern_len;
707 if end > pos {
708 match found_exact_match {
710 None => {
711 found_exact_match = Some((pos, end));
712 }
713 Some((existing_pos, _)) if pos < existing_pos => {
714 found_exact_match = Some((pos, end));
715 }
716 _ => {}
717 }
718 }
719 } else if max_edits > 0 {
720 let start_max = search_text
722 .len()
723 .saturating_sub(pattern_len.saturating_sub(max_edits));
724
725 for start in 0..=start_max {
726 let max_end = (start + pattern_len + max_edits).min(search_text.len());
727 let min_end =
728 (start + pattern_len.saturating_sub(max_edits)).max(start + 1);
729
730 for end in min_end..=max_end {
731 let substr = &search_text[start..end];
732 if substr.is_empty() {
733 continue;
734 }
735 let edits = simple_levenshtein(word, substr);
736 if edits <= max_edits as u32 && edits > 0 {
737 let sim =
738 1.0 - (edits as f32 / pattern_len.max(substr.len()) as f32);
739 if sim >= threshold {
740 match &found_match {
741 None => {
742 found_match = Some((
743 start,
744 end,
745 sim,
746 crate::engine::EditCounts {
747 insertions: if substr.len() > pattern_len {
748 (substr.len() - pattern_len) as u8
749 } else {
750 0
751 },
752 deletions: if pattern_len > substr.len() {
753 (pattern_len - substr.len()) as u8
754 } else {
755 0
756 },
757 substitutions: edits.min(pattern_len as u32)
758 as u8,
759 swaps: 0,
760 },
761 ));
762 }
763 Some((_, _, best_sim, _)) if sim > *best_sim => {
764 found_match = Some((
765 start,
766 end,
767 sim,
768 crate::engine::EditCounts {
769 insertions: if substr.len() > pattern_len {
770 (substr.len() - pattern_len) as u8
771 } else {
772 0
773 },
774 deletions: if pattern_len > substr.len() {
775 (pattern_len - substr.len()) as u8
776 } else {
777 0
778 },
779 substitutions: edits.min(pattern_len as u32)
780 as u8,
781 swaps: 0,
782 },
783 ));
784 }
785 _ => {}
786 }
787 }
788 }
789 }
790 }
791 }
792 }
793 }
794
795 if let Some((pos, end)) = found_exact_match {
796 let abs_start = last_end + pos;
797 let abs_end = last_end + end;
798 matches.push(Match::new(
799 text,
800 abs_start,
801 abs_end,
802 1.0,
803 crate::engine::EditCounts::default(),
804 ));
805 last_end = abs_end.max(abs_start + 1);
806 continue;
807 }
808
809 if let Some((start, end, sim, edits)) = found_match {
810 let abs_start = last_end + start;
811 let abs_end = last_end + end;
812 matches.push(Match::new(text, abs_start, abs_end, sim, edits));
813 last_end = abs_end.max(abs_start + 1);
815 } else {
816 break;
818 }
819 }
820
821 matches
822 }
823
824 fn find_single_matcher<'t>(&self, text: &'t str) -> Option<Match<'t>> {
827 if let Some(ref dfa_cell) = self.dfa {
828 let mut dfa = dfa_cell.borrow_mut();
829 return dfa.find(text).map(|m| {
830 Match::new(
831 text,
832 m.start,
833 m.end,
834 1.0,
835 crate::engine::EditCounts::default(),
836 )
837 });
838 }
839 let matcher = self.create_matcher(self.is_unanchored());
840 matcher.find(text).map(|m| self.convert_match(text, m))
841 }
842
843 pub fn find_at<'t>(&self, text: &'t str, start: usize) -> Option<Match<'t>> {
851 if self.anchored && !self.config.multi_line && start > 0 {
853 return None;
854 }
855
856 if start > text.len() {
858 return None;
859 }
860
861 let matcher = self.create_matcher(self.is_unanchored());
862
863 if self.ends_with_end_anchor
866 && !self.config.multi_line
867 && let Some(max_len) = self.max_match_length
868 {
869 let search_text = &text[start..];
871 let bytes = search_text.as_bytes();
872 let mut positions = Vec::with_capacity(max_len + 1);
873 let mut byte_pos = bytes.len();
874 let mut chars_counted = 0;
875
876 while byte_pos > 0 && chars_counted < max_len {
877 byte_pos -= 1;
878 if bytes[byte_pos] & 0b1100_0000 != 0b1000_0000 {
879 positions.push(start + byte_pos);
880 chars_counted += 1;
881 }
882 }
883
884 for &pos in &positions {
886 if let Some(m) = matcher.find_at(text, pos) {
887 return Some(self.convert_match(text, m));
888 }
889 }
890 return None;
891 }
892
893 if self.anchored && !self.config.multi_line {
895 return matcher
896 .find_at(text, start)
897 .map(|m| self.convert_match(text, m));
898 }
899
900 matcher
903 .find_at(text, start)
904 .map(|m| self.convert_match(text, m))
905 }
906
907 pub fn find_from<'t>(&self, text: &'t str, start: usize) -> Option<Match<'t>> {
912 let mut pos = start;
913 while pos <= text.len() {
914 if let Some(m) = self.find_at(text, pos) {
915 return Some(m);
916 }
917 if pos >= text.len() {
919 break;
920 }
921 pos += text[pos..].chars().next().map_or(1, char::len_utf8);
922 }
923 None
924 }
925
926 pub fn find_rev<'t>(&self, text: &'t str) -> Option<Match<'t>> {
931 let mut last = None;
933 for m in self.find_iter(text) {
934 last = Some(m);
935 }
936 last
937 }
938
939 pub fn find_iter_rev<'t>(&self, text: &'t str) -> Vec<Match<'t>> {
943 let mut matches = self.find_iter(text).collect::<Vec<_>>();
944 matches.reverse();
945 matches
946 }
947
948 pub fn find_iter<'t>(&self, text: &'t str) -> Matches<'t> {
950 if !self.word_lists.is_empty() {
952 return Matches::new(self.find_all_word_list(text));
953 }
954
955 if let Some(ref dfa_cell) = self.dfa {
958 return Matches::new(
959 dfa_cell
960 .borrow_mut()
961 .find_all(text)
962 .into_iter()
963 .map(|m| {
964 Match::new(
965 text,
966 m.start,
967 m.end,
968 1.0,
969 crate::engine::EditCounts::default(),
970 )
971 })
972 .collect(),
973 );
974 }
975
976 if self.anchored && !self.config.multi_line {
979 return Matches::new(self.find_single_matcher(text).into_iter().collect());
980 }
981
982 if self.is_simple_fuzzy() && self.fuzzy_bridge.is_some() {
984 return Matches::new(self.find_all_non_overlapping_fast(text));
985 }
986
987 if self.has_lazy && self.literals.len() == 1 && self.fuzzy_bridge.is_some() {
990 return Matches::new(self.find_all_lazy_literal_fast(text));
991 }
992
993 if self.is_word_bounded_literal && self.literals.len() == 1 && self.fuzzy_bridge.is_some() {
995 return Matches::new(self.find_all_word_bounded_literal_fast(text));
996 }
997
998 Matches::new(
1000 self.create_matcher(self.is_unanchored())
1001 .find_all(text)
1002 .into_iter()
1003 .map(|m| self.convert_match(text, m))
1004 .collect(),
1005 )
1006 }
1007
1008 pub fn find_n<'t>(&self, text: &'t str, n: usize) -> Vec<Match<'t>> {
1024 if n == 0 {
1025 return Vec::new();
1026 }
1027
1028 if n == 1 {
1030 return self.find(text).into_iter().collect();
1031 }
1032
1033 if let Some(ref dfa_cell) = self.dfa {
1035 let mut dfa = dfa_cell.borrow_mut();
1036 return dfa
1037 .find_n(text, n)
1038 .into_iter()
1039 .map(|m| {
1040 Match::new(
1041 text,
1042 m.start,
1043 m.end,
1044 1.0,
1045 crate::engine::EditCounts::default(),
1046 )
1047 })
1048 .collect();
1049 }
1050
1051 if self.anchored && !self.config.multi_line {
1053 return self.find_single_matcher(text).into_iter().collect();
1054 }
1055
1056 if self.is_simple_fuzzy()
1058 && let Some(ref bridge) = self.fuzzy_bridge
1059 {
1060 let threshold = self.config.similarity_threshold;
1061 return bridge
1062 .search_non_overlapping_n(text, threshold, 0, false, n)
1063 .into_iter()
1064 .map(|m| {
1065 Match::new(
1066 text,
1067 m.start,
1068 m.end,
1069 m.similarity,
1070 crate::engine::EditCounts {
1071 insertions: m.insertions,
1072 deletions: m.deletions,
1073 substitutions: m.substitutions,
1074 swaps: m.swaps,
1075 },
1076 )
1077 })
1078 .collect();
1079 }
1080
1081 let matcher = self.create_matcher(self.is_unanchored());
1083 matcher
1084 .find_n(text, n)
1085 .into_iter()
1086 .map(|m| self.convert_match(text, m))
1087 .collect()
1088 }
1089
1090 fn find_all_lazy_literal_fast<'t>(&self, text: &'t str) -> Vec<Match<'t>> {
1095 let Some(ref bridge) = self.fuzzy_bridge else {
1096 return Vec::new();
1097 };
1098
1099 let threshold = self.config.similarity_threshold;
1100
1101 let cached = bridge.search_all(text, threshold);
1103
1104 let mut matches = Vec::new();
1106 let mut prev_end = 0;
1107
1108 let mut literal_positions: Vec<(usize, usize)> = Vec::new();
1110 for ((pattern_idx, start), results) in cached.iter() {
1111 if pattern_idx != 0 {
1113 continue;
1114 }
1115 for result in results {
1116 literal_positions.push((start, result.end));
1117 }
1118 }
1119 literal_positions.sort_by_key(|(start, _)| *start);
1120
1121 for (_literal_start, literal_end) in literal_positions {
1123 if literal_end <= prev_end {
1125 continue;
1126 }
1127
1128 matches.push(Match::new(
1130 text,
1131 prev_end,
1132 literal_end,
1133 1.0, crate::engine::EditCounts::default(),
1135 ));
1136
1137 prev_end = literal_end;
1138 }
1139
1140 matches
1141 }
1142
1143 fn find_all_word_bounded_literal_fast<'t>(&self, text: &'t str) -> Vec<Match<'t>> {
1148 let Some(ref bridge) = self.fuzzy_bridge else {
1149 return Vec::new();
1150 };
1151
1152 let threshold = self.config.similarity_threshold;
1153
1154 let cached = bridge.search_all(text, threshold);
1156
1157 let mut matches = Vec::new();
1159 let mut prev_end = 0;
1160
1161 let mut literal_positions: Vec<(usize, usize)> = Vec::new();
1163 for ((pattern_idx, start), results) in cached.iter() {
1164 if pattern_idx != 0 {
1165 continue;
1166 }
1167 for result in results {
1168 literal_positions.push((start, result.end));
1169 }
1170 }
1171 literal_positions.sort_by_key(|(start, _)| *start);
1172
1173 for (literal_start, literal_end) in literal_positions {
1175 if literal_start < prev_end {
1177 continue;
1178 }
1179
1180 if Self::is_word_boundary_at(text, literal_start)
1182 && Self::is_word_boundary_at(text, literal_end)
1183 {
1184 matches.push(Match::new(
1185 text,
1186 literal_start,
1187 literal_end,
1188 1.0,
1189 crate::engine::EditCounts::default(),
1190 ));
1191 prev_end = literal_end;
1192 }
1193 }
1194
1195 matches
1196 }
1197
1198 fn is_word_boundary_at(text: &str, pos: usize) -> bool {
1200 let bytes = text.as_bytes();
1201
1202 let before_is_word = if pos > 0 {
1204 let mut start = pos - 1;
1205 while start > 0 && (bytes[start] & 0xC0) == 0x80 {
1206 start -= 1;
1207 }
1208 text[start..pos]
1209 .chars()
1210 .next()
1211 .is_some_and(|c| c.is_alphanumeric() || c == '_')
1212 } else {
1213 false
1214 };
1215
1216 let after_is_word = text[pos..]
1218 .chars()
1219 .next()
1220 .is_some_and(|c| c.is_alphanumeric() || c == '_');
1221
1222 before_is_word != after_is_word
1223 }
1224
1225 fn find_all_non_overlapping_fast<'t>(&self, text: &'t str) -> Vec<Match<'t>> {
1231 let Some(ref bridge) = self.fuzzy_bridge else {
1232 return Vec::new();
1233 };
1234
1235 let threshold = self.config.similarity_threshold;
1236
1237 let matches = bridge.search_non_overlapping(text, threshold, 0, false);
1240
1241 matches
1243 .into_iter()
1244 .map(|m| {
1245 Match::new(
1246 text,
1247 m.start,
1248 m.end,
1249 m.similarity,
1250 crate::engine::EditCounts {
1251 insertions: m.insertions,
1252 deletions: m.deletions,
1253 substitutions: m.substitutions,
1254 swaps: m.swaps,
1255 },
1256 )
1257 })
1258 .collect()
1259 }
1260
1261 pub fn find_all_overlapping<'t>(&self, text: &'t str) -> Vec<Match<'t>> {
1266 if self.is_simple_fuzzy()
1268 && let Some(ref bridge) = self.fuzzy_bridge
1269 {
1270 let threshold = self.config.similarity_threshold;
1271 let cached = if self.prefilter.is_active() {
1272 bridge.search_all_with_prefilter(text, threshold, &self.prefilter)
1273 } else {
1274 bridge.search_all(text, threshold)
1275 };
1276
1277 let mut matches = Vec::new();
1279 for ((pattern_idx, start), results) in cached.iter() {
1280 if pattern_idx != 0 {
1282 continue;
1283 }
1284 for result in results {
1285 matches.push(Match::new(
1286 text,
1287 start,
1288 result.end,
1289 result.similarity,
1290 crate::engine::EditCounts {
1291 insertions: result.insertions,
1292 deletions: result.deletions,
1293 substitutions: result.substitutions,
1294 swaps: result.swaps,
1295 },
1296 ));
1297 }
1298 }
1299 return matches;
1300 }
1301
1302 let matcher = self.create_matcher(self.is_unanchored());
1304 let mut results = Vec::new();
1305
1306 for (idx, _) in text.char_indices() {
1307 if let Some(m) = matcher.find(&text[idx..])
1308 && m.start == 0
1309 {
1310 results.push(Match::new(
1312 text,
1313 idx + m.start,
1314 idx + m.end,
1315 m.similarity,
1316 m.edits,
1317 ));
1318 }
1319 }
1320
1321 results
1322 }
1323
1324 pub fn find_all_overlapping_filtered<'t>(
1329 &self,
1330 text: &'t str,
1331 similarity_threshold: f32,
1332 ) -> Vec<Match<'t>> {
1333 if self.is_simple_fuzzy()
1335 && let Some(ref bridge) = self.fuzzy_bridge
1336 {
1337 let cached = if self.prefilter.is_active() {
1338 bridge.search_all_with_prefilter(text, similarity_threshold, &self.prefilter)
1339 } else {
1340 bridge.search_all(text, similarity_threshold)
1341 };
1342
1343 let mut matches = Vec::new();
1345 for ((pattern_idx, start), results) in cached.iter() {
1346 if pattern_idx != 0 {
1347 continue;
1348 }
1349 for result in results {
1350 if result.similarity >= similarity_threshold {
1351 matches.push(Match::new(
1352 text,
1353 start,
1354 result.end,
1355 result.similarity,
1356 crate::engine::EditCounts {
1357 insertions: result.insertions,
1358 deletions: result.deletions,
1359 substitutions: result.substitutions,
1360 swaps: result.swaps,
1361 },
1362 ));
1363 }
1364 }
1365 }
1366 return matches;
1367 }
1368
1369 let matcher = self.create_matcher(self.is_unanchored());
1371 let mut results = Vec::new();
1372
1373 for (idx, _) in text.char_indices() {
1374 if let Some(m) = matcher.find(&text[idx..])
1375 && m.start == 0
1376 && m.similarity >= similarity_threshold
1377 {
1378 results.push(Match::new(
1379 text,
1380 idx + m.start,
1381 idx + m.end,
1382 m.similarity,
1383 m.edits,
1384 ));
1385 }
1386 }
1387
1388 results
1389 }
1390
1391 pub fn captures_all_overlapping<'t>(
1395 &self,
1396 text: &'t str,
1397 similarity_threshold: f32,
1398 ) -> Vec<Captures<'t>> {
1399 let matcher = self.create_matcher(self.is_unanchored());
1400 let mut results = Vec::new();
1401
1402 for (idx, _) in text.char_indices() {
1403 if let Some(m) = matcher.find(&text[idx..])
1404 && m.start == 0
1405 && m.similarity >= similarity_threshold
1406 {
1407 let adjusted_slots: Vec<Option<(usize, usize)>> = m
1409 .captures
1410 .slots()
1411 .iter()
1412 .map(|slot| slot.map(|(s, e)| (idx + s, idx + e)))
1413 .collect();
1414
1415 results.push(Captures::new(
1416 text,
1417 adjusted_slots,
1418 self.named_groups.clone(),
1419 m.similarity,
1420 m.edits,
1421 ));
1422 }
1423 }
1424
1425 results
1426 }
1427
1428 pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
1430 let matcher = self.create_matcher(self.is_unanchored());
1431 matcher.find(text).map(|m| self.convert_captures(text, m))
1432 }
1433
1434 pub fn captures_at<'t>(&self, text: &'t str, start: usize) -> Option<Captures<'t>> {
1436 let matcher = self.create_matcher(self.is_unanchored());
1437 for (idx, _) in text[start..].char_indices() {
1438 if let Some(m) = matcher.find(&text[start + idx..]) {
1439 let mut caps = self.convert_captures(&text[start + idx..], m);
1440 caps = Captures::new(
1442 text,
1443 caps.iter()
1444 .map(|opt| opt.map(|m| (start + idx + m.start(), start + idx + m.end())))
1445 .collect(),
1446 self.named_groups.clone(),
1447 caps.similarity(),
1448 caps.edits().clone(),
1449 );
1450 return Some(caps);
1451 }
1452 }
1453 None
1454 }
1455
1456 pub fn captures_iter<'r, 't>(&'r self, text: &'t str) -> CaptureMatches<'r, 't> {
1458 CaptureMatches {
1459 regex: self,
1460 text,
1461 pos: 0,
1462 }
1463 }
1464
1465 pub fn replace(&self, text: &str, replacement: &str) -> String {
1472 if let Some(caps) = self.captures(text) {
1473 let m = caps.get(0).expect("match result always has index 0");
1474 let mut result = String::with_capacity(text.len());
1475 result.push_str(&text[..m.start()]);
1476 result.push_str(&caps.expand(replacement));
1477 result.push_str(&text[m.end()..]);
1478 result
1479 } else {
1480 text.to_string()
1481 }
1482 }
1483
1484 pub fn replace_all(&self, text: &str, replacement: &str) -> String {
1486 let mut result = String::with_capacity(text.len());
1487 let mut last_end = 0;
1488
1489 for caps in self.captures_iter(text) {
1490 if let Some(m) = caps.get(0) {
1491 result.push_str(&text[last_end..m.start()]);
1492 result.push_str(&caps.expand(replacement));
1493 last_end = m.end();
1494 }
1495 }
1496
1497 result.push_str(&text[last_end..]);
1498 result
1499 }
1500
1501 pub fn replace_all_with<F>(&self, text: &str, mut replacer: F) -> String
1503 where
1504 F: FnMut(&Captures<'_>) -> String,
1505 {
1506 let mut result = String::with_capacity(text.len());
1507 let mut last_end = 0;
1508
1509 for caps in self.captures_iter(text) {
1510 if let Some(m) = caps.get(0) {
1511 result.push_str(&text[last_end..m.start()]);
1512 result.push_str(&replacer(&caps));
1513 last_end = m.end();
1514 }
1515 }
1516
1517 result.push_str(&text[last_end..]);
1518 result
1519 }
1520
1521 pub fn split<'r, 't>(&'r self, text: &'t str) -> Split<'r, 't> {
1523 Split {
1524 regex: self,
1525 text,
1526 pos: 0,
1527 done: false,
1528 }
1529 }
1530
1531 pub fn splitn<'t>(&self, text: &'t str, n: usize) -> Vec<&'t str> {
1549 if n == 0 {
1550 return Vec::new();
1551 }
1552 if n == 1 {
1553 return vec![text];
1554 }
1555
1556 let matches = self.find_n(text, n - 1);
1558
1559 let mut parts = Vec::with_capacity(n);
1560 let mut last_end = 0;
1561
1562 for m in matches {
1563 parts.push(&text[last_end..m.start()]);
1564 last_end = m.end();
1565 }
1566
1567 parts.push(&text[last_end..]);
1569
1570 parts
1571 }
1572
1573 fn create_matcher(&self, unanchored: bool) -> Matcher<'_> {
1575 Matcher::with_prefilter(
1576 &self.nfa,
1577 self.fuzzy_bridge.as_ref(),
1578 self.capture_count,
1579 MatcherConfig {
1580 threshold: self.config.similarity_threshold,
1581 max_threads: self.config.max_threads,
1582 unanchored,
1583 best_match: self.config.match_flags.best_match,
1584 enhance_match: self.config.match_flags.enhance_match,
1585 posix: self.config.match_flags.posix,
1586 global: self.config.match_flags.global,
1587 multi_line: self.config.multi_line,
1588 prefer_shortest: self.has_lazy,
1589 unicode: self.config.match_flags.unicode,
1590 greedy_first: self.config.greedy_first,
1591 },
1592 self.prefilter.clone(),
1593 )
1594 }
1595
1596 fn convert_match<'a>(&self, text: &'a str, result: MatchResult) -> Match<'a> {
1598 let is_partial = self.config.partial && result.end == text.len();
1599 Match::new_full(
1600 text,
1601 result.start,
1602 result.end,
1603 result.similarity,
1604 result.edits,
1605 None,
1606 is_partial,
1607 )
1608 }
1609
1610 fn convert_captures<'t>(&self, text: &'t str, result: MatchResult) -> Captures<'t> {
1612 Captures::new(
1613 text,
1614 result.captures.slots().to_vec(),
1615 self.named_groups.clone(),
1616 result.similarity,
1617 result.edits,
1618 )
1619 }
1620
1621 pub fn stream(&self) -> super::streaming::StreamingMatcher<'_> {
1647 super::streaming::StreamingMatcher::new(self, self.config.similarity_threshold)
1648 }
1649
1650 pub fn is_match_bytes(&self, text: &[u8]) -> bool {
1654 self.find_bytes(text).is_some()
1655 }
1656
1657 pub fn find_bytes(&self, text: &[u8]) -> Option<super::streaming::StreamingMatch> {
1661 if let Some(bridge) = &self.fuzzy_bridge {
1663 if let Some((_pattern_idx, start, result)) = bridge.find_first_multi_pattern_individual(
1666 text,
1667 self.config.similarity_threshold,
1668 &[0],
1669 ) {
1670 return Some(super::streaming::StreamingMatch::new(
1671 start,
1672 result.end,
1673 result.total_edits(),
1674 result.similarity,
1675 ));
1676 }
1677 }
1678
1679 if let Ok(text_str) = std::str::from_utf8(text) {
1681 self.find(text_str).map(|m| {
1682 super::streaming::StreamingMatch::new(m.start(), m.end(), 0, m.similarity())
1683 })
1684 } else {
1685 None
1686 }
1687 }
1688
1689 pub fn find_iter_bytes<'r, 't>(
1693 &'r self,
1694 text: &'t [u8],
1695 ) -> super::streaming::ByteMatches<'r, 't> {
1696 super::streaming::ByteMatches::new(self, text)
1697 }
1698
1699 #[must_use]
1704 pub fn supports_streaming(&self) -> bool {
1705 self.fuzzy_bridge.as_ref().is_some_and(|bridge| {
1706 bridge.pattern_count() > 0 && bridge.all_patterns_bitap_compatible()
1707 })
1708 }
1709
1710 pub(crate) fn fuzzy_bridge(&self) -> Option<&FuzzyBridge> {
1712 self.fuzzy_bridge.as_ref()
1713 }
1714
1715 pub(crate) fn max_pattern_len(&self) -> Option<usize> {
1717 self.fuzzy_bridge.as_ref().map(FuzzyBridge::max_pattern_len)
1718 }
1719
1720 pub(crate) fn max_edits(&self) -> Option<u8> {
1722 self.fuzzy_bridge.as_ref().and_then(FuzzyBridge::max_edits)
1723 }
1724}
1725
1726impl Clone for FuzzyRegex {
1727 fn clone(&self) -> Self {
1728 Self::compile(self.pattern.clone(), self.config.clone())
1730 .expect("re-compilation of valid pattern should not fail")
1731 }
1732}
1733
1734fn collect_captures(ast: &Ast) -> (usize, HashMap<String, usize>) {
1736 let mut max_index = 0;
1737 let mut names = HashMap::new();
1738 collect_captures_recursive(ast, &mut max_index, &mut names);
1739 (max_index, names)
1740}
1741
1742fn collect_captures_recursive(
1743 ast: &Ast,
1744 max_index: &mut usize,
1745 names: &mut HashMap<String, usize>,
1746) {
1747 match ast {
1748 Ast::Group { index, name, expr } => {
1749 *max_index = (*max_index).max(*index);
1750 if let Some(n) = name {
1751 names.insert(n.clone(), *index);
1752 }
1753 collect_captures_recursive(expr, max_index, names);
1754 }
1755 Ast::NonCapturingGroup { expr, .. }
1756 | Ast::Quantified { expr, .. }
1757 | Ast::Lookahead { expr, .. }
1758 | Ast::Lookbehind { expr, .. } => {
1759 collect_captures_recursive(expr, max_index, names);
1760 }
1761 Ast::Concat(parts) => {
1762 for part in parts {
1763 collect_captures_recursive(part, max_index, names);
1764 }
1765 }
1766 Ast::Alternation(alts) => {
1767 for alt in alts {
1768 collect_captures_recursive(alt, max_index, names);
1769 }
1770 }
1771 _ => {}
1772 }
1773}
1774
1775fn create_prefilter_from_hir(hir: &Hir, case_insensitive: bool) -> Prefilter {
1781 let leading = extract_leading_literal(hir);
1783
1784 match leading {
1785 Some((text, limits)) if !text.is_empty() => {
1786 let max_edits = limits.as_ref().and_then(|lim| {
1788 lim.get_edits().or_else(|| {
1789 let i = lim.get_insertions().unwrap_or(0);
1791 let d = lim.get_deletions().unwrap_or(0);
1792 let s = lim.get_substitutions().unwrap_or(0);
1793 Some(i.saturating_add(d).saturating_add(s))
1794 })
1795 });
1796
1797 if let Some(edits) = max_edits {
1802 if edits > 0 {
1803 let min_len_for_pigeonhole = (3 * (edits as usize + 1)).max(10);
1810 if text.len() >= min_len_for_pigeonhole {
1811 crate::engine::prefilter::Prefilter::pigeonhole(&text, edits)
1812 } else {
1813 crate::engine::prefilter::Prefilter::fuzzy(&text, edits)
1815 }
1816 } else if case_insensitive {
1817 crate::engine::prefilter::Prefilter::case_insensitive(&text)
1818 } else {
1819 crate::engine::prefilter::Prefilter::exact(&text)
1820 }
1821 } else if case_insensitive {
1822 crate::engine::prefilter::Prefilter::case_insensitive(&text)
1823 } else {
1824 crate::engine::prefilter::Prefilter::exact(&text)
1825 }
1826 }
1827 _ => Prefilter::None,
1828 }
1829}
1830
1831fn extract_leading_literal(hir: &Hir) -> Option<(String, Option<crate::types::FuzzyLimits>)> {
1835 match hir {
1836 Hir::Literal { text, limits, .. } => Some((text.clone(), limits.clone())),
1838
1839 Hir::Concat(parts) => {
1841 if let Some(first) = parts.first() {
1842 extract_leading_literal(first)
1843 } else {
1844 None
1845 }
1846 }
1847
1848 Hir::Capture { expr, .. } => extract_leading_literal(expr),
1850
1851 _ => None,
1854 }
1855}
1856
1857fn is_anchored_at_start(hir: &Hir) -> bool {
1859 match hir {
1860 Hir::Anchor(Anchor::Start) => true,
1862
1863 Hir::Concat(parts) => {
1865 if let Some(first) = parts.first() {
1866 is_anchored_at_start(first)
1867 } else {
1868 false
1869 }
1870 }
1871
1872 Hir::Capture { expr, .. } => is_anchored_at_start(expr),
1874
1875 _ => false,
1877 }
1878}
1879
1880fn simple_levenshtein(a: &str, b: &str) -> u32 {
1882 let a_len = a.len();
1883 let b_len = b.len();
1884
1885 if a_len == 0 {
1886 return b_len as u32;
1887 }
1888 if b_len == 0 {
1889 return a_len as u32;
1890 }
1891
1892 if a_len <= 100 && b_len <= 100 {
1894 let mut matrix = vec![vec![0u32; b_len + 1]; a_len + 1];
1895
1896 for i in 0..=a_len {
1897 matrix[i][0] = i as u32;
1898 }
1899 for j in 0..=b_len {
1900 matrix[0][j] = j as u32;
1901 }
1902
1903 for i in 1..=a_len {
1904 for j in 1..=b_len {
1905 let cost = u32::from(a.as_bytes()[i - 1] != b.as_bytes()[j - 1]);
1906 matrix[i][j] = (matrix[i - 1][j] + 1) .min(matrix[i][j - 1] + 1) .min(matrix[i - 1][j - 1] + cost); }
1910 }
1911
1912 return matrix[a_len][b_len];
1913 }
1914
1915 (a_len as i32 - b_len as i32).unsigned_abs()
1917}
1918
1919#[cfg(test)]
1920mod tests {
1921 use super::*;
1922
1923 #[test]
1924 fn test_simple_match() {
1925 let re = FuzzyRegex::new("hello").unwrap();
1926 assert!(re.is_match("hello world"));
1927 assert!(re.is_match("say hello"));
1928 assert!(!re.is_match("goodbye"));
1929 }
1930
1931 #[test]
1932 fn test_char_class() {
1933 let re = FuzzyRegex::new("[a-z]+").unwrap();
1934 assert!(re.is_match("hello"));
1935 assert!(re.is_match("123abc456"));
1936 }
1937
1938 #[test]
1941 fn test_ascii_ranges() {
1942 let re = FuzzyRegex::new("[a-z]").unwrap();
1944 assert!(re.is_match("a"));
1945 assert!(re.is_match("m"));
1946 assert!(re.is_match("z"));
1947 assert!(!re.is_match("A"));
1948 assert!(!re.is_match("0"));
1949
1950 let re = FuzzyRegex::new("[A-Z]").unwrap();
1952 assert!(re.is_match("A"));
1953 assert!(re.is_match("M"));
1954 assert!(re.is_match("Z"));
1955 assert!(!re.is_match("a"));
1956
1957 let re = FuzzyRegex::new("[0-9]").unwrap();
1959 assert!(re.is_match("0"));
1960 assert!(re.is_match("5"));
1961 assert!(re.is_match("9"));
1962 assert!(!re.is_match("a"));
1963
1964 let re = FuzzyRegex::new("[a-zA-Z0-9]").unwrap();
1966 assert!(re.is_match("a"));
1967 assert!(re.is_match("Z"));
1968 assert!(re.is_match("9"));
1969 assert!(!re.is_match("_"));
1970 }
1971
1972 #[test]
1973 fn test_unicode_ranges() {
1974 let re = FuzzyRegex::new("[А-Я]").unwrap();
1976 assert!(re.is_match("А"));
1977 assert!(re.is_match("Я"));
1978 assert!(!re.is_match("а")); let re = FuzzyRegex::new("[а-я]").unwrap();
1982 assert!(re.is_match("а"));
1983 assert!(re.is_match("я"));
1984 assert!(!re.is_match("А")); let re = FuzzyRegex::new("[А-я]").unwrap();
1988 assert!(re.is_match("А"));
1989 assert!(re.is_match("а"));
1990 assert!(re.is_match("Я"));
1991 assert!(re.is_match("я"));
1992 }
1993
1994 #[test]
1995 fn test_mixed_unicode_ascii_ranges() {
1996 let re = FuzzyRegex::new("[a-zA-ZА-Яа-я]").unwrap();
1998 assert!(re.is_match("a"));
1999 assert!(re.is_match("Z"));
2000 assert!(re.is_match("А"));
2001 assert!(re.is_match("я"));
2002
2003 assert!(!re.is_match("1"));
2005 assert!(!re.is_match("!"));
2006 }
2007
2008 #[test]
2009 fn test_unicode_ranges_with_fuzzy() {
2010 let re = FuzzyRegex::new(r"(?:[а-я]+){e<=1}").unwrap();
2012
2013 assert!(re.is_match("привет"));
2015
2016 assert!(re.is_match("привЕт")); assert!(re.is_match("привет")); }
2022
2023 #[test]
2024 fn test_greek_ranges() {
2025 let re = FuzzyRegex::new("[Α-Ω]").unwrap();
2027 assert!(re.is_match("Α"));
2028 assert!(re.is_match("Ω"));
2029 assert!(!re.is_match("α")); let re = FuzzyRegex::new("[α-ω]").unwrap();
2033 assert!(re.is_match("α"));
2034 assert!(re.is_match("ω"));
2035 }
2036
2037 #[test]
2038 fn test_range_with_exclusion() {
2039 let re = FuzzyRegex::new("[^0-9]").unwrap();
2041 assert!(re.is_match("a"));
2042 assert!(re.is_match("!"));
2043 assert!(!re.is_match("5"));
2044
2045 let re = FuzzyRegex::new("[^a-zA-Z]").unwrap();
2047 assert!(re.is_match("1"));
2048 assert!(re.is_match("!"));
2049 assert!(!re.is_match("a"));
2050 }
2051
2052 #[test]
2053 fn test_range_edge_cases() {
2054 let re = FuzzyRegex::new("[a-z0-9_]").unwrap();
2056 assert!(re.is_match("a"));
2057 assert!(re.is_match("9"));
2058 assert!(re.is_match("_"));
2059
2060 let re = FuzzyRegex::new("[a-fm-z]").unwrap();
2062 assert!(re.is_match("a")); assert!(re.is_match("m")); assert!(!re.is_match("g")); let re = FuzzyRegex::new("[a-a]").unwrap();
2068 assert!(re.is_match("a"));
2069 assert!(!re.is_match("b"));
2070 }
2071
2072 #[test]
2073 fn test_range_find() {
2074 let re = FuzzyRegex::new("[0-9]+").unwrap();
2076 let m = re.find("abc123def456").unwrap();
2077 assert_eq!(m.as_str(), "123");
2078
2079 let matches: Vec<_> = re.find_iter("1a2b3c4").collect();
2081 assert_eq!(matches.len(), 4);
2082 }
2083
2084 #[test]
2085 fn test_case_insensitive_with_ranges() {
2086 let re = FuzzyRegexBuilder::new("[a-z]")
2088 .case_insensitive(true)
2089 .build()
2090 .unwrap();
2091
2092 assert!(re.is_match("a"));
2093 assert!(re.is_match("Z")); }
2095
2096 #[test]
2097 fn test_quantifiers() {
2098 let re = FuzzyRegex::new("ab+c").unwrap();
2099 assert!(re.is_match("abc"));
2100 assert!(re.is_match("abbc"));
2101 assert!(re.is_match("abbbc"));
2102 assert!(!re.is_match("ac"));
2103 }
2104
2105 #[test]
2106 fn test_alternation() {
2107 let re = FuzzyRegex::new("cat|dog").unwrap();
2108 assert!(re.is_match("cat"));
2109 assert!(re.is_match("dog"));
2110 assert!(!re.is_match("bird"));
2111 }
2112
2113 #[test]
2114 fn test_capture_groups() {
2115 let re = FuzzyRegex::new("(\\w+)@(\\w+)").unwrap();
2116 let caps = re.captures("user@domain").unwrap();
2117 assert_eq!(caps.get(1).unwrap().as_str(), "user");
2118 assert_eq!(caps.get(2).unwrap().as_str(), "domain");
2119 }
2120
2121 #[test]
2122 fn test_named_groups() {
2123 let re = FuzzyRegex::new("(?<user>\\w+)@(?<domain>\\w+)").unwrap();
2124 let caps = re.captures("john@example").unwrap();
2125 assert_eq!(caps.name("user").unwrap().as_str(), "john");
2126 assert_eq!(caps.name("domain").unwrap().as_str(), "example");
2127 }
2128
2129 #[test]
2130 fn test_replace() {
2131 let re = FuzzyRegex::new("world").unwrap();
2132 let result = re.replace("hello world", "rust");
2133 assert_eq!(result, "hello rust");
2134 }
2135
2136 #[test]
2137 fn test_replace_all() {
2138 let re = FuzzyRegex::new("o").unwrap();
2139 let result = re.replace_all("hello world", "0");
2140 assert_eq!(result, "hell0 w0rld");
2141 }
2142
2143 #[test]
2144 fn test_split() {
2145 let re = FuzzyRegex::new(",").unwrap();
2146 let parts: Vec<_> = re.split("a,b,c").collect();
2147 assert_eq!(parts, vec!["a", "b", "c"]);
2148 }
2149
2150 #[test]
2151 fn test_anchors() {
2152 let re = FuzzyRegex::new("^hello").unwrap();
2153 assert!(re.is_match("hello world"));
2154 assert!(!re.is_match("say hello"));
2155 }
2156
2157 #[test]
2158 fn test_fuzzy_matching() {
2159 let re = FuzzyRegexBuilder::new("hello~2")
2160 .similarity(0.5)
2161 .build()
2162 .unwrap();
2163
2164 assert!(re.is_match("hello"));
2166
2167 }
2170
2171 #[test]
2172 #[allow(clippy::float_cmp)]
2173 fn test_builder() {
2174 let re = FuzzyRegexBuilder::new("test")
2175 .case_insensitive(true)
2176 .similarity(0.9)
2177 .max_threads(500)
2178 .build()
2179 .unwrap();
2180
2181 assert_eq!(re.similarity_threshold(), 0.9);
2182 }
2183
2184 fn fuzzy_matches(pattern: &str, text: &str, max_edits: u8, similarity: f32) -> bool {
2190 let re = FuzzyRegexBuilder::new(&format!("(?:{pattern})"))
2191 .edits(max_edits)
2192 .similarity(similarity)
2193 .build()
2194 .unwrap();
2195 re.is_match(text)
2196 }
2197
2198 fn fuzzy_find(pattern: &str, text: &str, max_edits: u8, similarity: f32) -> Option<String> {
2200 let re = FuzzyRegexBuilder::new(&format!("(?:{pattern})"))
2201 .edits(max_edits)
2202 .similarity(similarity)
2203 .build()
2204 .unwrap();
2205 re.find(text).map(|m: Match<'_>| m.as_str().to_string())
2206 }
2207
2208 fn fuzzy_matches_ci(pattern: &str, text: &str, max_edits: u8, similarity: f32) -> bool {
2210 let re = FuzzyRegexBuilder::new(&format!("(?:{pattern})"))
2211 .edits(max_edits)
2212 .case_insensitive(true)
2213 .similarity(similarity)
2214 .build()
2215 .unwrap();
2216 re.is_match(text)
2217 }
2218
2219 fn fuzzy_find_ci(pattern: &str, text: &str, max_edits: u8, similarity: f32) -> Option<String> {
2221 let re = FuzzyRegexBuilder::new(&format!("(?:{pattern})"))
2222 .edits(max_edits)
2223 .case_insensitive(true)
2224 .similarity(similarity)
2225 .build()
2226 .unwrap();
2227 re.find(text).map(|m: Match<'_>| m.as_str().to_string())
2228 }
2229
2230 #[test]
2233 fn fac_test_exact_match() {
2234 assert!(fuzzy_matches("saddam", "saddamhussein", 2, 0.5));
2236 assert!(fuzzy_matches("hussein", "saddamhussein", 2, 0.5));
2237
2238 let found = fuzzy_find("saddam", "saddamhussein", 2, 0.5);
2239 assert_eq!(found, Some("saddam".to_string()));
2240
2241 let found = fuzzy_find("hussein", "saddamhussein", 2, 0.5);
2245 assert!(found.is_some());
2246 let found_text = found.unwrap();
2248 assert!(
2249 found_text.contains("hussein")
2250 || "hussein".contains(&found_text)
2251 || found_text.ends_with("hussein"),
2252 "Expected to find 'hussein' or similar, got: {found_text}"
2253 );
2254 }
2255
2256 #[test]
2259 fn fac_test_extra_letter() {
2260 assert!(fuzzy_matches("saddam", "saddammhussein", 2, 0.3));
2262
2263 let found = fuzzy_find("saddam", "saddammhussein", 2, 0.3);
2264 assert_eq!(found, Some("saddam".to_string()));
2265 }
2266
2267 #[test]
2270 fn fac_test_missing_letter() {
2271 assert!(fuzzy_matches("saddam", "saddmhussin", 2, 0.3));
2273
2274 let found = fuzzy_find("saddam", "saddmhussin", 2, 0.3);
2275 assert!(found.is_some());
2276 let text = found.unwrap();
2277 assert!(text == "saddm" || text.contains("saddm"), "Found: {text}");
2278 }
2279
2280 #[test]
2283 fn fac_test_substitution() {
2284 assert!(fuzzy_matches("hussein", "saddamhuzein", 2, 0.2));
2286
2287 let found = fuzzy_find("hussein", "saddamhuzein", 2, 0.2);
2288 assert!(found.is_some());
2289 }
2290
2291 #[test]
2294 fn fac_test_swap() {
2295 assert!(fuzzy_matches_ci("KONY", "ALIKOYN", 2, 0.6));
2297
2298 let found = fuzzy_find_ci("KONY", "ALIKOYN", 2, 0.6);
2299 assert!(found.is_some());
2300 let matched = found.unwrap().to_uppercase();
2303 assert!(
2304 matched.contains("KO") && matched.contains("YN"),
2305 "Expected match containing KO and YN, got: {matched}"
2306 );
2307 }
2308
2309 #[test]
2312 fn fac_test_case_insensitive_ascii() {
2313 assert!(fuzzy_matches_ci("world", "HeLlO WoRlD", 0, 0.9));
2314
2315 let found = fuzzy_find_ci("world", "HeLlO WoRlD", 0, 0.9);
2316 assert!(found.is_some());
2317 assert!(found.unwrap().eq_ignore_ascii_case("world"));
2318 }
2319
2320 #[test]
2323 fn fac_test_unicode_cyrillic() {
2324 assert!(fuzzy_matches("юрий", "юрий гагарин", 0, 0.9));
2330
2331 let result = fuzzy_matches_ci("юрий", "ЮРИЙ ГАГАРИН", 4, 0.5);
2334 if !result {
2335 println!("Note: Cyrillic case-insensitive matching may not be fully supported");
2337 }
2338
2339 let found = fuzzy_find("юрий", "юрий гагарин", 0, 0.9);
2341 assert!(found.is_some());
2342 assert_eq!(found.unwrap(), "юрий");
2343 }
2344
2345 #[test]
2348 fn fac_test_big_text() {
2349 let text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum eros ipsum, tincidutn eu metus ut, commodo accumsan mi. Vestibulum porta, orci nec ullamcorper posuere, eros tortor pharetra est, at porttitor mi leo a velit.";
2350
2351 assert!(fuzzy_matches_ci("tincidunt", text, 1, 0.8));
2353
2354 let found = fuzzy_find_ci("tincidunt", text, 1, 0.8);
2355 assert!(found.is_some());
2356
2357 assert!(fuzzy_matches_ci("porta", text, 1, 0.8));
2359 }
2360
2361 #[test]
2364 fn fac_test_regression_1() {
2365 assert!(!fuzzy_matches_ci("CO", "CA", 0, 0.8));
2367 }
2368
2369 #[test]
2370 fn fac_test_regression_2() {
2371 assert!(fuzzy_matches("TOLA", "TOL", 2, 0.5));
2373
2374 let found = fuzzy_find("TOLA", "TOL", 2, 0.5);
2375 assert!(found.is_some());
2376 assert_eq!(found.unwrap(), "TOL");
2377 }
2378
2379 #[test]
2380 fn fac_test_regression_0() {
2381 assert!(!fuzzy_matches_ci("zavod", "NARODNY", 2, 0.8));
2383 }
2384
2385 #[test]
2388 fn fac_test_non_overlapping_regression_0() {
2389 assert!(fuzzy_matches_ci("MENA", "NA MENA", 2, 0.6));
2391
2392 let found = fuzzy_find_ci("MENA", "NA MENA", 2, 0.6);
2396 assert!(found.is_some());
2397 assert!(found.as_ref().unwrap().ends_with("MENA"));
2399 }
2400
2401 #[test]
2402 fn fac_test_non_overlapping_regression_2() {
2403 assert!(fuzzy_matches_ci("KO", "KWO KO LWIN", 1, 0.6));
2405 }
2406
2407 #[test]
2410 fn fac_test_truncated_short() {
2411 let result = fuzzy_matches_ci("TOLA", "OLA", 2, 0.5);
2415 if result {
2416 let found = fuzzy_find_ci("TOLA", "OLA", 2, 0.5);
2417 assert!(found.is_some());
2418 assert_eq!(found.unwrap().to_uppercase(), "OLA");
2419 } else {
2420 assert!(fuzzy_matches_ci("TOLA", "TOLA", 0, 0.9));
2422 assert!(fuzzy_matches("tola", "xola", 1, 0.7)); println!("Note: Truncated pattern matching (pattern > text) not fully supported");
2425 }
2426 }
2427
2428 #[test]
2429 fn fac_test_truncated_walijan() {
2430 let result = fuzzy_matches_ci("WALIJAN", "alijan", 3, 0.7);
2433 if result {
2434 let found = fuzzy_find_ci("WALIJAN", "alijan", 3, 0.7);
2435 assert!(found.is_some());
2436 } else {
2437 assert!(fuzzy_matches_ci("WALIJAN", "WALIJAN", 0, 0.9));
2439 assert!(fuzzy_matches("walijan", "xalijan", 1, 0.8)); println!("Note: Truncated pattern matching (pattern > text) not fully supported");
2442 }
2443 }
2444
2445 #[test]
2448 fn fac_test_missing_middle_char() {
2449 assert!(fuzzy_matches_ci("MOMIR", "Mmir", 3, 0.5));
2451
2452 let found = fuzzy_find_ci("MOMIR", "Mmir", 3, 0.5);
2453 assert!(found.is_some());
2454 }
2455
2456 #[test]
2457 fn fac_test_siic_simic() {
2458 let result = fuzzy_matches_ci("SIMIC", "SIIC", 3, 0.7);
2460 println!("SIIC vs SIMIC result: {result}");
2462 }
2463
2464 #[test]
2465 fn fac_test_aminullah() {
2466 assert!(fuzzy_matches_ci("AMINULLAH", "Aminulah", 3, 0.7));
2468 }
2469
2470 #[test]
2471 fn fac_test_jaar_jafar() {
2472 let result = fuzzy_matches_ci("JAFAR", "Jaar", 3, 0.7);
2474 println!("Jaar vs JAFAR result: {result}");
2475 }
2476
2477 #[test]
2480 fn fac_test_phonetic_td_substitution() {
2481 let result = fuzzy_matches_ci("DJAMEL", "Tjamel", 3, 0.5);
2487 if result {
2488 let found = fuzzy_find_ci("DJAMEL", "Tjamel", 3, 0.5);
2489 assert!(found.is_some());
2490 } else {
2491 assert!(fuzzy_matches("djamel", "tjamel", 1, 0.8));
2494 println!("Note: Case-insensitive T↔D test adjusted - case folding may differ");
2495 }
2496 }
2497
2498 #[test]
2501 fn fac_test_find_iter() {
2502 let re = FuzzyRegexBuilder::new("(?:the)")
2503 .edits(1)
2504 .similarity(0.6)
2505 .build()
2506 .unwrap();
2507
2508 let matches: Vec<_> = re.find_iter("the them then").collect();
2509 assert!(!matches.is_empty(), "Should find at least one match");
2510 assert_eq!(matches[0].as_str(), "the");
2511 }
2512
2513 #[test]
2514 fn fac_test_multiple_matches() {
2515 let re = FuzzyRegexBuilder::new("(?:cat)")
2516 .edits(1)
2517 .similarity(0.6)
2518 .build()
2519 .unwrap();
2520
2521 let matches: Vec<_> = re.find_iter("cat bat rat cat").collect();
2522 assert!(!matches.is_empty());
2524 }
2525
2526 #[test]
2529 fn fac_test_replace() {
2530 let re = FuzzyRegexBuilder::new("(?:world)")
2531 .edits(0)
2532 .similarity(0.9)
2533 .build()
2534 .unwrap();
2535
2536 let result = re.replace("hello world", "rust");
2537 assert_eq!(result, "hello rust");
2538 }
2539
2540 #[test]
2541 fn fac_test_replace_fuzzy() {
2542 let re = FuzzyRegexBuilder::new("(?:foo)")
2543 .edits(1)
2544 .case_insensitive(true)
2545 .similarity(0.6) .build()
2547 .unwrap();
2548
2549 let result = re.replace("fo0 and bar", "bar");
2551 assert_eq!(result, "bar and bar");
2552 }
2553
2554 #[test]
2555 fn fac_test_replace_all() {
2556 let re = FuzzyRegexBuilder::new("(?:o)")
2557 .edits(0)
2558 .similarity(0.9)
2559 .build()
2560 .unwrap();
2561
2562 let result = re.replace_all("hello world", "0");
2563 assert_eq!(result, "hell0 w0rld");
2564 }
2565
2566 #[test]
2569 fn fac_test_split() {
2570 let re = FuzzyRegexBuilder::new("(?:,)")
2571 .similarity(0.9)
2572 .build()
2573 .unwrap();
2574
2575 let parts: Vec<_> = re.split("a,b,c").collect();
2576 assert_eq!(parts, vec!["a", "b", "c"]);
2577 }
2578
2579 #[test]
2580 fn fac_test_split_fuzzy() {
2581 let re = FuzzyRegexBuilder::new("(?:LOREM|IPSUM)")
2582 .edits(1)
2583 .case_insensitive(true)
2584 .similarity(0.8)
2585 .build()
2586 .unwrap();
2587
2588 let parts: Vec<_> = re.split("ZZZLrEMISuMAAA").collect();
2590 assert!(
2592 parts.contains(&"ZZZ") || parts.contains(&"AAA"),
2593 "Should split on fuzzy matches. Got: {parts:?}"
2594 );
2595 }
2596
2597 #[test]
2600 fn fac_test_country() {
2601 assert!(fuzzy_matches_ci("CZECHOSLOVAKIA", "CHEKHOSLOVAKIA", 5, 0.7));
2603 }
2604
2605 #[test]
2608 fn fac_test_longer_match_preference() {
2609 let re = FuzzyRegexBuilder::new("(?:JOINT STOCK COMPANY)")
2612 .edits(0)
2613 .similarity(0.8)
2614 .build()
2615 .unwrap();
2616
2617 let found = re.find("JOINT STOCK COMPANY GAZPROM");
2618 assert!(found.is_some());
2619 assert_eq!(found.unwrap().as_str(), "JOINT STOCK COMPANY");
2620 }
2621
2622 #[test]
2625 fn fac_test_short_pattern() {
2626 assert!(fuzzy_matches("a", "a", 1, 0.5));
2628
2629 let single_sub = fuzzy_matches("a", "b", 1, 0.0);
2633 if !single_sub {
2634 println!("Note: Single-char pattern with substitution gives 0% similarity");
2636 }
2637
2638 assert!(fuzzy_matches("ab", "a", 1, 0.4));
2641
2642 assert!(fuzzy_matches("a", "ab", 1, 0.5));
2645
2646 assert!(fuzzy_matches("ab", "ab", 0, 0.9)); assert!(fuzzy_matches("ab", "ac", 1, 0.5)); assert!(fuzzy_matches("ab", "abc", 1, 0.5)); }
2651
2652 #[test]
2655 fn fac_test_whitespace_handling() {
2656 assert!(fuzzy_matches("hello world", "hello world", 0, 0.9));
2657 assert!(fuzzy_matches("hello world", "hello world", 1, 0.8)); }
2659
2660 fn fuzzy_class_matches(pattern: &str, text: &str, similarity: f32) -> bool {
2666 let re = FuzzyRegexBuilder::new(pattern)
2667 .similarity(similarity)
2668 .build()
2669 .unwrap();
2670 re.is_match(text)
2671 }
2672
2673 fn fuzzy_class_find(pattern: &str, text: &str, similarity: f32) -> Option<(String, f32)> {
2674 let re = FuzzyRegexBuilder::new(pattern)
2675 .similarity(similarity)
2676 .build()
2677 .unwrap();
2678 re.find(text)
2679 .map(|m| (m.as_str().to_string(), m.similarity()))
2680 }
2681
2682 #[test]
2685 fn test_fuzzy_dot_exact() {
2686 assert!(fuzzy_class_matches("c.t", "cat", 0.5));
2687 assert!(fuzzy_class_matches("...", "abc", 0.5));
2688 }
2689
2690 #[test]
2691 fn test_fuzzy_dot_deletion() {
2692 assert!(fuzzy_class_matches("(?:c.t)~1", "ct", 0.4));
2694 assert!(fuzzy_class_matches("(?:...)~1", "ab", 0.4));
2695 }
2696
2697 #[test]
2698 fn test_fuzzy_dot_insertion() {
2699 assert!(fuzzy_class_matches("(?:c.t)~1", "caat", 0.4));
2701 }
2702
2703 #[test]
2706 fn test_fuzzy_word_char_exact() {
2707 assert!(fuzzy_class_matches(r"\w\w\w", "abc", 0.5));
2708 assert!(fuzzy_class_matches(r"\w\w\w", "a1_", 0.5));
2709 assert!(!fuzzy_class_matches(r"\w\w\w", "a b", 0.5)); }
2711
2712 #[test]
2713 fn test_fuzzy_word_char_deletion() {
2714 assert!(fuzzy_class_matches(r"(?:\w\w\w)~1", "ab", 0.4));
2716 }
2717
2718 #[test]
2721 fn test_fuzzy_digit_exact() {
2722 assert!(fuzzy_class_matches(r"\d\d\d", "123", 0.5));
2723 assert!(!fuzzy_class_matches(r"\d\d\d", "12a", 0.5));
2724 }
2725
2726 #[test]
2727 fn test_fuzzy_digit_deletion() {
2728 assert!(fuzzy_class_matches(r"(?:\d\d\d)~1", "12", 0.4));
2730 }
2731
2732 #[test]
2733 fn test_fuzzy_digit_insertion() {
2734 let result = fuzzy_class_find(r"(?:\d\d\d)~1", "1234", 0.4);
2737 assert!(result.is_some());
2738 assert_eq!(result.unwrap().0, "123");
2739 }
2740
2741 #[test]
2744 fn test_fuzzy_whitespace_exact() {
2745 assert!(fuzzy_class_matches(r"a\sb", "a b", 0.5));
2746 assert!(fuzzy_class_matches(r"a\sb", "a\tb", 0.5));
2747 }
2748
2749 #[test]
2750 fn test_fuzzy_whitespace_deletion() {
2751 assert!(fuzzy_class_matches(r"(?:a\sb)~1", "ab", 0.4));
2753 }
2754
2755 #[test]
2758 fn test_fuzzy_char_class_exact() {
2759 assert!(fuzzy_class_matches("[abc][abc][abc]", "abc", 0.5));
2760 assert!(fuzzy_class_matches("[abc][abc][abc]", "cba", 0.5));
2761 assert!(!fuzzy_class_matches("[abc][abc][abc]", "abd", 0.5));
2762 }
2763
2764 #[test]
2765 fn test_fuzzy_char_class_deletion() {
2766 assert!(fuzzy_class_matches("(?:[abc][abc][abc])~1", "ab", 0.4));
2768 }
2769
2770 #[test]
2771 fn test_fuzzy_char_range_exact() {
2772 assert!(fuzzy_class_matches("[a-z][a-z][a-z]", "xyz", 0.5));
2773 }
2774
2775 #[test]
2776 fn test_fuzzy_char_range_deletion() {
2777 assert!(fuzzy_class_matches("(?:[a-z][a-z][a-z])~1", "xy", 0.4));
2778 }
2779
2780 #[test]
2783 fn test_fuzzy_negated_class_exact() {
2784 assert!(fuzzy_class_matches("[^0-9][^0-9][^0-9]", "abc", 0.5));
2785 assert!(!fuzzy_class_matches("[^0-9][^0-9][^0-9]", "a1c", 0.5));
2786 }
2787
2788 #[test]
2789 fn test_fuzzy_negated_class_deletion() {
2790 assert!(fuzzy_class_matches("(?:[^0-9][^0-9][^0-9])~1", "ab", 0.4));
2791 }
2792
2793 #[test]
2796 fn test_fuzzy_mixed_pattern_exact() {
2797 assert!(fuzzy_class_matches(r"[A-Z]\d\d", "A12", 0.5));
2798 }
2799
2800 #[test]
2801 fn test_fuzzy_mixed_pattern_deletion() {
2802 assert!(fuzzy_class_matches(r"(?:[A-Z]\d\d)~1", "A1", 0.4));
2803 }
2804
2805 #[test]
2808 fn test_fuzzy_tab_exact() {
2809 assert!(fuzzy_class_matches(r"a\tb", "a\tb", 0.5));
2810 }
2811
2812 #[test]
2813 fn test_fuzzy_tab_deletion() {
2814 assert!(fuzzy_class_matches(r"(?:a\tb)~1", "ab", 0.4));
2815 }
2816
2817 #[test]
2818 fn test_fuzzy_tab_substitution() {
2819 assert!(fuzzy_class_matches(r"(?:a\tb)~1", "a b", 0.4));
2821 }
2822
2823 #[test]
2824 fn test_fuzzy_newline_exact() {
2825 assert!(fuzzy_class_matches(r"a\nb", "a\nb", 0.5));
2826 }
2827
2828 #[test]
2829 fn test_fuzzy_newline_deletion() {
2830 assert!(fuzzy_class_matches(r"(?:a\nb)~1", "ab", 0.4));
2831 }
2832
2833 #[test]
2834 fn test_fuzzy_carriage_return() {
2835 assert!(fuzzy_class_matches(r"a\rb", "a\rb", 0.5));
2836 assert!(fuzzy_class_matches(r"(?:a\rb)~1", "ab", 0.4));
2837 }
2838
2839 #[test]
2840 fn test_fuzzy_null_char() {
2841 assert!(fuzzy_class_matches(r"a\x00b", "a\x00b", 0.5));
2842 assert!(fuzzy_class_matches(r"(?:a\x00b)~1", "ab", 0.4));
2843 }
2844
2845 #[test]
2846 fn test_fuzzy_hex_escape() {
2847 assert!(fuzzy_class_matches(r"\x41\x42\x43", "ABC", 0.5));
2849 assert!(fuzzy_class_matches(r"(?:\x41\x42\x43)~1", "AB", 0.4));
2850 }
2851
2852 #[test]
2853 fn test_fuzzy_unicode_escape() {
2854 assert!(fuzzy_class_matches(r"\u0041\u0042", "AB", 0.5));
2856 assert!(fuzzy_class_matches(r"(?:\u0041\u0042\u0043)~1", "AB", 0.4));
2857 }
2858
2859 #[test]
2862 fn test_fuzzy_escapes_in_char_class() {
2863 assert!(fuzzy_class_matches(r"[\t\n][\t\n]", "\t\n", 0.5));
2864 assert!(fuzzy_class_matches(
2865 r"(?:[\t\n][\t\n][\t\n])~1",
2866 "\t\n",
2867 0.4
2868 ));
2869 }
2870
2871 #[test]
2874 fn test_basic_escapes() {
2875 let re = FuzzyRegex::new(r"\.com").unwrap();
2877 assert!(re.is_match(".com"));
2878 assert!(!re.is_match("com"));
2879
2880 let re = FuzzyRegex::new(r"a\|b").unwrap();
2882 assert!(re.is_match("a|b"));
2883 assert!(!re.is_match("ab"));
2884
2885 let re = FuzzyRegex::new(r"\(test\)").unwrap();
2887 assert!(re.is_match("(test)"));
2888
2889 let re = FuzzyRegex::new(r"\*").unwrap();
2891 assert!(re.is_match("*"));
2892
2893 let re = FuzzyRegex::new(r"\+").unwrap();
2895 assert!(re.is_match("+"));
2896
2897 let re = FuzzyRegex::new(r"\?").unwrap();
2899 assert!(re.is_match("?"));
2900
2901 let re = FuzzyRegex::new(r"\$").unwrap();
2903 assert!(re.is_match("$"));
2904
2905 let re = FuzzyRegex::new(r"\^").unwrap();
2907 assert!(re.is_match("^"));
2908
2909 let re = FuzzyRegex::new(r"\\").unwrap();
2911 assert!(re.is_match("\\"));
2912
2913 let re = FuzzyRegex::new(r"\[test\]").unwrap();
2915 assert!(re.is_match("[test]"));
2916
2917 let re = FuzzyRegex::new(r"\{test\}").unwrap();
2919 assert!(re.is_match("{test}"));
2920
2921 let re = FuzzyRegex::new(r"\~").unwrap();
2923 assert!(re.is_match("~"));
2924 assert!(!re.is_match("test"));
2925 }
2926
2927 #[test]
2928 fn test_tilde_fuzzy_shorthand() {
2929 let re = FuzzyRegex::new("hello~2").unwrap();
2931 assert!(re.is_match("hello"));
2932 assert!(re.is_match("helo")); assert!(re.is_match("helloo")); assert!(re.is_match("hallo")); }
2936
2937 #[test]
2938 fn test_tilde_vs_escaped_tilde() {
2939 let re = FuzzyRegex::new(r"a\~b").unwrap();
2943 assert!(re.is_match("a~b"));
2944
2945 let re = FuzzyRegex::new("hello~1").unwrap();
2947 assert!(re.is_match("hello"));
2948 assert!(re.is_match("helo")); }
2950
2951 #[test]
2954 fn test_backreference_basic() {
2955 let re = FuzzyRegex::new(r"(\w)\1").unwrap();
2957 assert!(re.is_match("aa"));
2958 assert!(re.is_match("bb"));
2959 assert!(!re.is_match("ab"));
2960
2961 let re = FuzzyRegex::new(r"(\w\w)\1").unwrap();
2963 assert!(re.is_match("abab"));
2964 assert!(!re.is_match("abca"));
2965 }
2966
2967 #[test]
2968 fn test_backreference_find() {
2969 let re = FuzzyRegex::new(r"(\w)\1").unwrap();
2971
2972 let matches: Vec<_> = re.find_iter("aa bb aa cc aa").collect();
2974 for m in &matches {
2976 assert_eq!(m.as_str().len(), 2);
2977 let chars: Vec<char> = m.as_str().chars().collect();
2978 assert_eq!(chars[0], chars[1]);
2979 }
2980 }
2981
2982 #[test]
2983 fn test_backreference_with_fuzzy() {
2984 let re = FuzzyRegex::new(r"(\w+) \1{e<=1}").unwrap();
2988
2989 assert!(re.is_match("abc abc"));
2991
2992 assert!(re.is_match("abc bc")); let re = FuzzyRegex::new(r"(\w+) \1{e<=2}").unwrap();
2997 assert!(re.is_match("hello hllo")); }
2999
3000 #[test]
3001 fn test_nested_backreference_with_fuzzy() {
3002 let re = FuzzyRegex::new(r"(\w+) (\1{e<=2}) (\2{e<=2})").unwrap();
3005
3006 assert!(re.is_match("abc abc abc"));
3008
3009 assert!(re.is_match("abc abcc abc"));
3011 }
3012
3013 #[test]
3014 fn test_backreference_no_match() {
3015 let re = FuzzyRegex::new(r"(\w)\1").unwrap();
3017 assert!(!re.is_match("ab"));
3018
3019 let re = FuzzyRegex::new(r"(a)b\1").unwrap();
3021 assert!(!re.is_match("abb"));
3022 }
3023
3024 #[test]
3025 fn test_backreference_edge_cases() {
3026 let re = FuzzyRegex::new(r"(abc)+def\1").unwrap();
3028 assert!(re.is_match("abcdefabc"));
3029 assert!(!re.is_match("abcdefxyz"));
3030 }
3031
3032 #[test]
3033 fn test_named_escapes() {
3034 let re = FuzzyRegex::new(r"\d+").unwrap();
3036 assert!(re.is_match("123"));
3037 assert!(!re.is_match("abc"));
3038
3039 let re = FuzzyRegex::new(r"\D+").unwrap();
3041 assert!(re.is_match("abc"));
3042 assert!(!re.is_match("123"));
3043
3044 let re = FuzzyRegex::new(r"\w+").unwrap();
3046 assert!(re.is_match("abc_123"));
3047
3048 let re = FuzzyRegex::new(r"\W+").unwrap();
3050 assert!(re.is_match("!@#"));
3051
3052 let re = FuzzyRegex::new(r"\s+").unwrap();
3054 assert!(re.is_match(" "));
3055
3056 let re = FuzzyRegex::new(r"\S+").unwrap();
3058 assert!(re.is_match("abc"));
3059
3060 let re = FuzzyRegex::new(r"\bword\b").unwrap();
3062 assert!(re.is_match("word"));
3063 assert!(re.is_match("hello word"));
3064 assert!(!re.is_match("wordhello"));
3065
3066 let re = FuzzyRegex::new(r"\Bword\B").unwrap();
3068 assert!(re.is_match("awordb"));
3069 }
3070
3071 #[test]
3072 fn test_hex_escapes() {
3073 let re = FuzzyRegex::new(r"\x41\x42\x43").unwrap();
3075 assert!(re.is_match("ABC"));
3076
3077 let re = FuzzyRegex::new(r"\x41").unwrap();
3079 assert!(re.is_match("A"));
3080
3081 let re = FuzzyRegex::new(r"[\x41-\x5A]").unwrap();
3083 assert!(re.is_match("A"));
3084 assert!(re.is_match("Z"));
3085 assert!(!re.is_match("a"));
3086
3087 let re = FuzzyRegex::new(r"(?:\x41\x42)~1").unwrap();
3089 assert!(re.is_match("AB"));
3090 assert!(re.is_match("AC")); }
3092
3093 #[test]
3094 fn test_unicode_escapes() {
3095 let re = FuzzyRegex::new(r"\u0041\u0042\u0043").unwrap();
3097 assert!(re.is_match("ABC"));
3098
3099 let re = FuzzyRegex::new(r"[\u0041-\u005A]").unwrap();
3101 assert!(re.is_match("A"));
3102 }
3103
3104 #[test]
3105 fn test_control_escapes() {
3106 let re = FuzzyRegex::new("line1\\nline2").unwrap();
3108 assert!(re.is_match("line1\nline2"));
3109
3110 let re = FuzzyRegex::new("col1\\tcol2").unwrap();
3112 assert!(re.is_match("col1\tcol2"));
3113
3114 let re = FuzzyRegex::new("line1\\rline2").unwrap();
3116 assert!(re.is_match("line1\rline2"));
3117
3118 let re = FuzzyRegex::new("a\\nb\\tc\\rd").unwrap();
3120 assert!(re.is_match("a\nb\tc\rd"));
3121 }
3122
3123 #[test]
3124 fn test_octal_escapes() {
3125 let re = FuzzyRegex::new("\\0").unwrap();
3127 assert!(re.is_match("\0"));
3128 }
3129
3130 #[test]
3131 fn test_escape_in_fuzzy() {
3132 let re = FuzzyRegex::new(r"(?:\.com)~1").unwrap();
3134 assert!(re.is_match(".com"));
3135 assert!(re.is_match(",com")); let re = FuzzyRegex::new(r"(?:\d+)~1").unwrap();
3139 assert!(re.is_match("123"));
3140 assert!(re.is_match("1234")); let re = FuzzyRegex::new(r"(?:\+1)~1").unwrap();
3144 assert!(re.is_match("+1"));
3145 assert!(re.is_match("1")); }
3147
3148 #[test]
3149 fn test_escape_edge_cases() {
3150 let re = FuzzyRegex::new(r"\\\\").unwrap();
3152 assert!(re.is_match("\\\\"));
3153
3154 let re = FuzzyRegex::new(r"\n\\t\d").unwrap();
3156 assert!(re.is_match("\n\\t1"));
3157 }
3158
3159 #[test]
3160 fn test_escape_in_alternation() {
3161 let re = FuzzyRegex::new(r"foo|bar|\(baz\)").unwrap();
3162 assert!(re.is_match("foo"));
3163 assert!(re.is_match("bar"));
3164 assert!(re.is_match("(baz)"));
3165 }
3166
3167 #[test]
3168 fn test_escape_in_quantifiers() {
3169 let re = FuzzyRegex::new(r"\d{3}").unwrap();
3171 assert!(re.is_match("123"));
3172 assert!(!re.is_match("12"));
3173
3174 let re = FuzzyRegex::new(r"\{3\}").unwrap();
3176 assert!(re.is_match("{3}"));
3177 }
3178
3179 #[test]
3182 fn test_fuzzy_whitespace_class_mixed() {
3183 assert!(fuzzy_class_matches(r"\s\s\s", "\t\n ", 0.5));
3184 assert!(fuzzy_class_matches(r"(?:\s\s\s)~1", "\t\n", 0.4));
3185 }
3186
3187 #[test]
3192 fn test_fuzzy_char_class_default_threshold() {
3193 let re = FuzzyRegexBuilder::new("(?:[a-z][a-z][a-z])~1")
3195 .build()
3196 .unwrap();
3197
3198 assert!(re.is_match("abc"));
3200
3201 assert!(re.is_match("ab"));
3203
3204 let m = re.find("ab").unwrap();
3206 assert!(m.similarity() > 0.0 && m.similarity() < 1.0);
3207 }
3208
3209 #[test]
3210 fn test_fuzzy_dot_default_threshold() {
3211 let re = FuzzyRegexBuilder::new("(?:c.t)~1").build().unwrap();
3212
3213 assert!(re.is_match("cat")); assert!(re.is_match("ct")); assert!(re.is_match("caat")); }
3217
3218 #[test]
3219 fn test_fuzzy_digit_default_threshold() {
3220 let re = FuzzyRegexBuilder::new(r"(?:\d\d\d)~1").build().unwrap();
3221
3222 assert!(re.is_match("123")); assert!(re.is_match("12")); }
3225
3226 #[test]
3227 fn test_fuzzy_word_char_default_threshold() {
3228 let re = FuzzyRegexBuilder::new(r"(?:\w\w\w)~1").build().unwrap();
3229
3230 assert!(re.is_match("abc")); assert!(re.is_match("ab")); }
3233
3234 #[test]
3235 fn test_fuzzy_whitespace_default_threshold() {
3236 let re = FuzzyRegexBuilder::new(r"(?:a\sb)~1").build().unwrap();
3237
3238 assert!(re.is_match("a b")); assert!(re.is_match("ab")); }
3241
3242 #[test]
3243 fn test_fuzzy_escape_default_threshold() {
3244 let re = FuzzyRegexBuilder::new(r"(?:a\tb)~1").build().unwrap();
3245
3246 assert!(re.is_match("a\tb")); assert!(re.is_match("ab")); }
3249
3250 #[test]
3251 fn test_fuzzy_new_without_builder() {
3252 let re = FuzzyRegex::new("(?:[a-z][a-z][a-z])~1").unwrap();
3254
3255 assert!(re.is_match("abc")); assert!(re.is_match("ab")); }
3258
3259 #[test]
3260 fn test_fuzzy_char_class_substitution_default() {
3261 let re = FuzzyRegexBuilder::new("(?:[a-z][a-z][a-z])~1")
3262 .build()
3263 .unwrap();
3264
3265 assert!(re.is_match("ab1"));
3268 }
3269
3270 #[test]
3273 fn test_verbose_mode_whitespace() {
3274 let re = FuzzyRegexBuilder::new("(?x) hello world ")
3276 .build()
3277 .unwrap();
3278
3279 assert!(re.is_match("helloworld"));
3280 assert!(!re.is_match("hello world"));
3281 }
3282
3283 #[test]
3284 fn test_verbose_mode_comments() {
3285 let re = FuzzyRegexBuilder::new("(?x)hello # this is a comment\nworld")
3287 .build()
3288 .unwrap();
3289
3290 assert!(re.is_match("helloworld"));
3291 }
3292
3293 #[test]
3294 fn test_verbose_mode_complex() {
3295 let re = FuzzyRegexBuilder::new(
3297 r"(?x)
3298 ^ # start of string
3299 [a-z]+ # one or more lowercase letters
3300 \d{3} # exactly 3 digits
3301 $ # end of string
3302 ",
3303 )
3304 .build()
3305 .unwrap();
3306
3307 assert!(re.is_match("abc123"));
3308 assert!(!re.is_match("ABC123")); assert!(!re.is_match("abc12")); }
3311
3312 #[test]
3313 fn test_verbose_mode_via_builder() {
3314 let re = FuzzyRegexBuilder::new("hello world")
3316 .verbose(true)
3317 .build()
3318 .unwrap();
3319
3320 assert!(re.is_match("helloworld"));
3321 }
3322
3323 #[test]
3326 fn test_dot_default_no_newline() {
3327 let re = FuzzyRegexBuilder::new("a.b").build().unwrap();
3329
3330 assert!(re.is_match("aXb"));
3331 assert!(!re.is_match("a\nb")); }
3333
3334 #[test]
3335 fn test_dot_all_matches_newline() {
3336 let re = FuzzyRegexBuilder::new("(?s)a.b").build().unwrap();
3338
3339 assert!(re.is_match("aXb"));
3340 assert!(re.is_match("a\nb")); }
3342
3343 #[test]
3344 fn test_dot_all_via_builder() {
3345 let re = FuzzyRegexBuilder::new("a.b").dot_all(true).build().unwrap();
3347
3348 assert!(re.is_match("a\nb"));
3349 }
3350
3351 #[test]
3352 fn test_dot_all_multichar() {
3353 let re = FuzzyRegexBuilder::new("(?s)start.*end").build().unwrap();
3355
3356 assert!(re.is_match("start\nmiddle\nend"));
3357 }
3358
3359 #[test]
3362 fn test_caret_default_string_start() {
3363 let re = FuzzyRegexBuilder::new("^hello").build().unwrap();
3365
3366 assert!(re.is_match("hello world"));
3367 assert!(!re.is_match("say hello")); assert!(!re.is_match("line1\nhello")); }
3370
3371 #[test]
3372 fn test_dollar_default_string_end() {
3373 let re = FuzzyRegexBuilder::new("world$").build().unwrap();
3375
3376 assert!(re.is_match("hello world"));
3377 assert!(!re.is_match("world hello")); assert!(!re.is_match("world\nline2")); }
3380
3381 #[test]
3382 fn test_multiline_caret() {
3383 let re = FuzzyRegexBuilder::new("(?m)^hello").build().unwrap();
3385
3386 assert!(re.is_match("hello world")); assert!(re.is_match("line1\nhello")); assert!(!re.is_match("say hello")); }
3390
3391 #[test]
3392 fn test_multiline_dollar() {
3393 let re = FuzzyRegexBuilder::new("(?m)world$").build().unwrap();
3395
3396 assert!(re.is_match("hello world")); assert!(re.is_match("world\nline2")); assert!(!re.is_match("world hello")); }
3400
3401 #[test]
3402 fn test_multiline_via_builder() {
3403 let re = FuzzyRegexBuilder::new("^line")
3405 .multi_line(true)
3406 .build()
3407 .unwrap();
3408
3409 assert!(re.is_match("first\nline2"));
3410 }
3411
3412 #[test]
3413 fn test_multiline_both_anchors() {
3414 let re = FuzzyRegexBuilder::new("(?m)^hello$").build().unwrap();
3416
3417 assert!(re.is_match("hello")); assert!(re.is_match("hello\nworld")); assert!(re.is_match("world\nhello")); assert!(re.is_match("line1\nhello\nline3")); assert!(!re.is_match("hello world")); }
3423
3424 #[test]
3425 fn test_multiline_find_iter() {
3426 let re = FuzzyRegexBuilder::new("(?m)^\\w+").build().unwrap();
3428
3429 let text = "first\nsecond\nthird";
3430 let matches: Vec<_> = re.find_iter(text).collect();
3431
3432 assert_eq!(matches.len(), 3);
3433 assert_eq!(matches[0].as_str(), "first");
3434 assert_eq!(matches[1].as_str(), "second");
3435 assert_eq!(matches[2].as_str(), "third");
3436 }
3437
3438 #[test]
3439 fn test_multiline_find_all() {
3440 let re = FuzzyRegexBuilder::new("(?m)^hello$").build().unwrap();
3442
3443 let text = "hello\nworld\nhello\nfoo\nhello";
3444 let matches: Vec<_> = re.find_iter(text).collect();
3445
3446 assert_eq!(matches.len(), 3);
3447 assert_eq!(matches[0].as_str(), "hello");
3448 assert_eq!(matches[1].as_str(), "hello");
3449 assert_eq!(matches[2].as_str(), "hello");
3450 }
3451
3452 #[test]
3453 fn test_multiline_fuzzy() {
3454 let re = FuzzyRegexBuilder::new("(?m)^(?:hello){e<=1}")
3456 .build()
3457 .unwrap();
3458
3459 assert!(re.is_match("hello"));
3461 assert!(re.is_match("hallo")); assert!(re.is_match("ello")); assert!(re.is_match("hello\nhallo")); }
3465
3466 #[test]
3467 fn test_multiline_fuzzy_find() {
3468 let re = FuzzyRegexBuilder::new("(?m)(?:test){e<=1}")
3470 .build()
3471 .unwrap();
3472
3473 assert!(re.is_match("test"));
3475 assert!(re.is_match("tset")); let m = re.find("test\ntset").unwrap();
3479 assert_eq!(m.as_str(), "test");
3480 }
3481
3482 #[test]
3483 fn test_multiline_find_rev() {
3484 let re = FuzzyRegexBuilder::new("(?m)^\\d+").build().unwrap();
3486
3487 let text = "123\n456\n789";
3488
3489 let m = re.find(text).unwrap();
3491 assert_eq!(m.as_str(), "123");
3492
3493 let m = re.find_rev(text).unwrap();
3495 assert_eq!(m.as_str(), "789");
3496 }
3497
3498 #[test]
3499 fn test_multiline_alternation() {
3500 let re = FuzzyRegexBuilder::new("(?m)^(foo|bar)$").build().unwrap();
3502
3503 assert!(re.is_match("foo"));
3504 assert!(re.is_match("bar"));
3505 assert!(re.is_match("foo\nbar")); assert!(!re.is_match("foobar")); }
3508
3509 #[test]
3512 fn test_combined_verbose_dotall() {
3513 let re = FuzzyRegexBuilder::new("(?x)(?s) a . b ").build().unwrap();
3514
3515 assert!(re.is_match("a\nb"));
3516 }
3517
3518 #[test]
3519 fn test_combined_verbose_multiline() {
3520 let re = FuzzyRegexBuilder::new(
3521 r"(?x)(?m)
3522 ^start # line start
3523 .* # anything
3524 end$ # line end
3525 ",
3526 )
3527 .build()
3528 .unwrap();
3529
3530 assert!(re.is_match("startXend"));
3531 assert!(re.is_match("prefix\nstartXend\nsuffix"));
3532 }
3533
3534 #[test]
3535 fn test_combined_all_flags() {
3536 let re = FuzzyRegexBuilder::new(
3538 r"(?x)(?s)(?m)
3539 ^line # start of line
3540 .+ # any chars including newlines
3541 end$ # end of line
3542 ",
3543 )
3544 .build()
3545 .unwrap();
3546
3547 assert!(re.is_match("line\nmulti\nend"));
3548 }
3549
3550 #[test]
3556 fn test_greedy_star_parses() {
3557 let re = FuzzyRegexBuilder::new("a.*b").build().unwrap();
3559
3560 assert!(re.is_match("ab"));
3562 assert!(re.is_match("aXb"));
3563 assert!(re.is_match("aXYZb"));
3564 }
3565
3566 #[test]
3567 fn test_non_greedy_star_parses() {
3568 let re = FuzzyRegexBuilder::new("a.*?b").build().unwrap();
3570
3571 assert!(re.is_match("ab"));
3572 assert!(re.is_match("aXb"));
3573 assert!(re.is_match("aXYZb"));
3574 }
3575
3576 #[test]
3577 fn test_greedy_plus_parses() {
3578 let re = FuzzyRegexBuilder::new("a.+b").build().unwrap();
3580
3581 assert!(!re.is_match("ab")); assert!(re.is_match("aXb"));
3583 assert!(re.is_match("aXYZb"));
3584 }
3585
3586 #[test]
3587 fn test_non_greedy_plus_parses() {
3588 let re = FuzzyRegexBuilder::new("a.+?b").build().unwrap();
3590
3591 assert!(!re.is_match("ab"));
3592 assert!(re.is_match("aXb"));
3593 assert!(re.is_match("aXYZb"));
3594 }
3595
3596 #[test]
3597 fn test_greedy_question_default() {
3598 let re = FuzzyRegexBuilder::new("ab?c").build().unwrap();
3600
3601 assert!(re.is_match("abc"));
3603 assert!(re.is_match("ac"));
3605 }
3606
3607 #[test]
3608 fn test_non_greedy_question_parses() {
3609 let re = FuzzyRegexBuilder::new("ab??c").build().unwrap();
3611
3612 assert!(re.is_match("abc"));
3613 assert!(re.is_match("ac"));
3614 }
3615
3616 #[test]
3617 fn test_greedy_brace_quantifier() {
3618 let re = FuzzyRegexBuilder::new("a.{1,3}b").build().unwrap();
3620
3621 assert!(!re.is_match("ab"));
3622 assert!(re.is_match("aXb"));
3623 assert!(re.is_match("aXYb"));
3624 assert!(re.is_match("aXYZb"));
3625 assert!(!re.is_match("aXYZWb")); }
3627
3628 #[test]
3629 fn test_non_greedy_brace_quantifier_parses() {
3630 let re = FuzzyRegexBuilder::new("a.{1,3}?b").build().unwrap();
3632
3633 assert!(!re.is_match("ab"));
3634 assert!(re.is_match("aXb"));
3635 assert!(re.is_match("aXYb"));
3636 assert!(re.is_match("aXYZb"));
3637 }
3638
3639 #[test]
3642 fn test_ungreedy_flag_parses() {
3643 let re = FuzzyRegexBuilder::new("(?U)a.*b").build().unwrap();
3645
3646 assert!(re.is_match("ab"));
3647 assert!(re.is_match("aXb"));
3648 }
3649
3650 #[test]
3651 fn test_ungreedy_flag_inverts_modifier() {
3652 let re = FuzzyRegexBuilder::new("(?U)a.*?b").build().unwrap();
3654
3655 assert!(re.is_match("ab"));
3656 assert!(re.is_match("aXb"));
3657 }
3658
3659 #[test]
3660 fn test_ungreedy_mode_via_builder() {
3661 let re = FuzzyRegexBuilder::new("a.*b")
3663 .ungreedy(true)
3664 .build()
3665 .unwrap();
3666
3667 assert!(re.is_match("ab"));
3668 assert!(re.is_match("aXb"));
3669 }
3670
3671 #[test]
3672 fn test_ungreedy_with_plus() {
3673 let re = FuzzyRegexBuilder::new("(?U)a.+b").build().unwrap();
3675
3676 assert!(!re.is_match("ab"));
3677 assert!(re.is_match("aXb"));
3678 }
3679
3680 #[test]
3681 fn test_ungreedy_with_brace() {
3682 let re = FuzzyRegexBuilder::new("(?U)a.{1,3}b").build().unwrap();
3684
3685 assert!(re.is_match("aXb"));
3686 assert!(re.is_match("aXYb"));
3687 }
3688
3689 #[test]
3692 fn test_case_insensitive_inline_flag() {
3693 let re = FuzzyRegexBuilder::new("(?i)hello").build().unwrap();
3695
3696 assert!(re.is_match("hello"));
3697 assert!(re.is_match("HELLO"));
3698 assert!(re.is_match("HeLLo"));
3699 }
3700
3701 #[test]
3702 fn test_case_insensitive_via_builder() {
3703 let re = FuzzyRegexBuilder::new("hello")
3705 .case_insensitive(true)
3706 .build()
3707 .unwrap();
3708
3709 assert!(re.is_match("hello"));
3710 assert!(re.is_match("HELLO"));
3711 assert!(re.is_match("HeLLo"));
3712 }
3713
3714 #[test]
3715 fn test_case_insensitive_with_char_class() {
3716 let re = FuzzyRegexBuilder::new("[a-zA-Z]+")
3719 .case_insensitive(true)
3720 .build()
3721 .unwrap();
3722
3723 assert!(re.is_match("hello"));
3724 assert!(re.is_match("HELLO"));
3725 assert!(re.is_match("HeLLo"));
3726 }
3727
3728 #[test]
3731 fn test_ungreedy_with_dotall() {
3732 let re = FuzzyRegexBuilder::new("(?U)(?s)a.*b").build().unwrap();
3734
3735 assert!(re.is_match("a\nb"));
3737 assert!(re.is_match("a\nb\nc\nb"));
3738 }
3739
3740 #[test]
3741 fn test_greedy_captures() {
3742 let re = FuzzyRegexBuilder::new("(a.*b)").build().unwrap();
3744
3745 let caps = re.captures("aXbYb").unwrap();
3746 assert!(caps.get(1).is_some());
3748 }
3749
3750 #[test]
3751 fn test_non_greedy_captures() {
3752 let re = FuzzyRegexBuilder::new("(a.*?b)").build().unwrap();
3754
3755 let caps = re.captures("aXbYb").unwrap();
3756 assert!(caps.get(1).is_some());
3758 }
3759
3760 #[test]
3761 fn test_all_quantifier_modifiers() {
3762 let patterns = [
3764 "a*", "a*?", "a+", "a+?", "a?", "a??", "a{2}", "a{2}?", "a{2,}", "a{2,}?", "a{2,5}", "a{2,5}?", ];
3771
3772 for pattern in patterns {
3773 let re = FuzzyRegexBuilder::new(pattern).build();
3774 assert!(re.is_ok(), "Pattern '{pattern}' should parse");
3775 }
3776 }
3777
3778 #[test]
3781 fn test_global_flag_parses() {
3782 let re = FuzzyRegexBuilder::new("(?g)hello").build().unwrap();
3784
3785 assert!(re.is_match("hello"));
3786 assert!(re.is_match("hello world hello"));
3787 }
3788
3789 #[test]
3790 fn test_global_flag_via_builder() {
3791 let re = FuzzyRegexBuilder::new("hello")
3793 .global(true)
3794 .build()
3795 .unwrap();
3796
3797 assert!(re.is_match("hello"));
3798 }
3799
3800 #[test]
3801 fn test_global_find_iter() {
3802 let re = FuzzyRegexBuilder::new("(?g)\\d+").build().unwrap();
3804
3805 let text = "abc 123 def 456 ghi 789";
3806 let matches: Vec<_> = re.find_iter(text).collect();
3807
3808 assert_eq!(matches.len(), 3);
3809 assert_eq!(matches[0].as_str(), "123");
3810 assert_eq!(matches[1].as_str(), "456");
3811 assert_eq!(matches[2].as_str(), "789");
3812 }
3813
3814 #[test]
3815 fn test_global_with_fuzzy() {
3816 let re = FuzzyRegexBuilder::new("(?g)(?:hello)~1").build().unwrap();
3818
3819 let text = "hllo world helo there";
3820 let matches: Vec<_> = re.find_iter(text).collect();
3821
3822 assert!(matches.len() >= 2);
3824 }
3825
3826 #[test]
3827 fn test_global_combined_with_other_flags() {
3828 let re = FuzzyRegexBuilder::new("(?g)(?i)hello").build().unwrap();
3830
3831 let text = "Hello HELLO hello";
3832 let matches: Vec<_> = re.find_iter(text).collect();
3833
3834 assert_eq!(matches.len(), 3);
3835 }
3836
3837 #[test]
3838 fn test_fullmatch() {
3839 let re = FuzzyRegex::new(r"\d+").unwrap();
3841 assert!(re.fullmatch("123").is_some());
3842 assert!(re.fullmatch("123abc").is_none());
3843 assert!(re.fullmatch("abc").is_none());
3844 assert!(re.fullmatch("").is_none());
3845 }
3846
3847 #[test]
3848 fn test_fullmatch_fuzzy() {
3849 let re = FuzzyRegex::new(r"hello~1").unwrap();
3851 assert!(re.fullmatch("hello").is_some());
3852 assert!(re.fullmatch("helo").is_some()); assert!(re.fullmatch("hello world").is_none());
3854 }
3855
3856 #[test]
3857 fn test_fullmatch_empty_pattern() {
3858 let re = FuzzyRegex::new(r"").unwrap();
3860 assert!(re.fullmatch("").is_some());
3861 }
3862
3863 #[test]
3864 fn test_fullmatch_at() {
3865 let re = FuzzyRegex::new(r"\d+").unwrap();
3866
3867 assert!(re.fullmatch_at("123", 0).is_some());
3869
3870 let result = re.fullmatch_at("123", 1);
3873 if let Some(m) = result {
3875 println!(
3876 "fullmatch_at('123', 1): start={}, end={}",
3877 m.start(),
3878 m.end()
3879 );
3880 }
3881
3882 assert!(re.fullmatch_at("123", 10).is_none());
3884 }
3885
3886 #[test]
3887 fn test_is_full_match() {
3888 let re = FuzzyRegex::new(r"\d+").unwrap();
3889
3890 assert!(re.is_full_match("123"));
3891 assert!(!re.is_full_match("123abc"));
3892 assert!(!re.is_full_match("abc"));
3893 }
3894
3895 #[test]
3896 fn test_named_lists() {
3897 let mut re = FuzzyRegex::new(r"\L<words>").unwrap();
3899 re.set_word_list("words", vec!["cat", "dog", "frog"]);
3900
3901 let lists = re.named_lists();
3902 assert!(lists.contains_key("words"));
3903 assert_eq!(lists.get("words").unwrap(), &vec!["cat", "dog", "frog"]);
3904
3905 let words = re.get_word_list("words").unwrap();
3907 assert_eq!(words.len(), 3);
3908
3909 let re2 = FuzzyRegex::new(r"\d+").unwrap();
3911 assert!(re2.named_lists().is_empty());
3912 assert!(!re2.has_word_lists());
3913 }
3914
3915 #[test]
3916 fn test_partial_match() {
3917 let re = FuzzyRegex::new(r"\d+").unwrap();
3919 let m = re.find("abc123").unwrap();
3920 assert!(!m.partial());
3921
3922 let re = FuzzyRegexBuilder::new(r"\d+")
3924 .partial(true)
3925 .build()
3926 .unwrap();
3927
3928 let m = re.find("abc123").unwrap();
3930 assert!(m.partial());
3931
3932 let m = re.find("abc123xyz").unwrap();
3934 assert!(!m.partial());
3935
3936 let m = re.find("123").unwrap();
3938 assert!(m.partial());
3939
3940 let m = re.find("123456").unwrap();
3942 assert!(m.partial());
3943 }
3944
3945 #[test]
3946 fn test_find_with_timeout() {
3947 use std::time::Duration;
3948
3949 let re = FuzzyRegex::new(r"\d+").unwrap();
3950
3951 let result = re.find_with_timeout("123abc", Duration::from_secs(1));
3953 assert!(result.unwrap().is_some());
3954
3955 let result = re.find_with_timeout("123", Duration::from_millis(1));
3957 assert!(result.unwrap().is_some());
3958 }
3959
3960 #[test]
3961 fn test_find_rev() {
3962 let re = FuzzyRegex::new(r"\d+").unwrap();
3963 let text = "abc123def456";
3964
3965 let m = re.find(text).unwrap();
3967 assert_eq!(m.start(), 3);
3968 assert_eq!(m.end(), 6);
3969
3970 let m = re.find_rev(text).unwrap();
3972 assert_eq!(m.start(), 9);
3973 assert_eq!(m.end(), 12);
3974 }
3975
3976 #[test]
3977 fn test_find_rev_fuzzy() {
3978 let re = FuzzyRegex::new(r"(?:hello){e<=1}").unwrap();
3980 let text = "hello world hello";
3981
3982 let m = re.find(text).unwrap();
3984 assert_eq!(m.start(), 0);
3985 assert_eq!(m.end(), 5);
3986
3987 let m = re.find_rev(text).unwrap();
3989 assert_eq!(m.start(), 12);
3990 assert_eq!(m.end(), 17);
3991 }
3992
3993 #[test]
3994 fn test_find_rev_fuzzy_multiple() {
3995 let re = FuzzyRegex::new(r"(?:test){e<=1}").unwrap();
3997 let text = "best tset trial test contest";
3998
3999 let m = re.find(text).unwrap();
4005 assert_eq!(m.start(), 16);
4006 assert_eq!(m.end(), 20);
4007
4008 let m = re.find_rev(text).unwrap();
4010 assert_eq!(m.start(), 24);
4011 assert_eq!(m.end(), 28);
4012 }
4013
4014 #[test]
4015 fn test_find_rev_no_match() {
4016 let re = FuzzyRegex::new(r"(?:hello){e<=1}").unwrap();
4017 let text = "world";
4018
4019 assert!(re.find(text).is_none());
4020 assert!(re.find_rev(text).is_none());
4021 }
4022
4023 #[test]
4024 fn test_find_rev_empty_text() {
4025 let re = FuzzyRegex::new(r"(?:hello){e<=1}").unwrap();
4026 let text = "";
4027
4028 assert!(re.find(text).is_none());
4029 assert!(re.find_rev(text).is_none());
4030 }
4031
4032 #[test]
4033 fn test_find_rev_empty_pattern() {
4034 let re = FuzzyRegex::new(r"").unwrap();
4035 let text = "hello";
4036
4037 let m = re.find(text).unwrap();
4039 assert_eq!(m.start(), 0);
4040 assert_eq!(m.end(), 0);
4041
4042 let m = re.find_rev(text);
4044 eprintln!("find_rev result: {:?}", m.map(|m| (m.start(), m.end())));
4045
4046 let m = re.find_rev(text).unwrap();
4050 assert_eq!(m.start(), 5);
4051 assert_eq!(m.end(), 5);
4052 }
4053
4054 #[test]
4055 fn test_find_iter_rev() {
4056 let re = FuzzyRegex::new(r"\d+").unwrap();
4057 let text = "abc123def456ghi789";
4058
4059 let matches = re.find_iter_rev(text);
4060
4061 assert_eq!(matches.len(), 3);
4063 assert_eq!(matches[0].start(), 15); assert_eq!(matches[1].start(), 9); assert_eq!(matches[2].start(), 3); }
4067
4068 #[test]
4069 fn test_find_rev_single_match() {
4070 let re = FuzzyRegex::new(r"\d+").unwrap();
4071 let text = "abc123def";
4072
4073 let m1 = re.find(text).unwrap();
4075 let m2 = re.find_rev(text).unwrap();
4076
4077 assert_eq!(m1.start(), m2.start());
4078 assert_eq!(m1.end(), m2.end());
4079 }
4080
4081 #[test]
4082 fn test_reset_match_start_k() {
4083 let re = FuzzyRegex::new(r"foo\Kbar").unwrap();
4086
4087 let m = re.find("foobar").unwrap();
4088 assert_eq!(m.as_str(), "bar");
4089 assert_eq!(m.start(), 3);
4090 assert_eq!(m.end(), 6);
4091
4092 let re2 = FuzzyRegex::new(r"foobar").unwrap();
4094 let m2 = re2.find("foobar").unwrap();
4095 assert_eq!(m2.as_str(), "foobar");
4096 }
4097
4098 #[test]
4099 fn test_word_list_iter_all_matches() {
4100 let mut re = FuzzyRegex::new(r"\L<words>").unwrap();
4102 re.set_word_list("words", vec!["cat", "dog"]);
4103
4104 let text = "cat dog cat";
4105 let matches: Vec<_> = re.find_iter(text).collect();
4106
4107 assert_eq!(matches.len(), 3);
4108 assert_eq!(matches[0].as_str(), "cat");
4109 assert_eq!(matches[1].as_str(), "dog");
4110 assert_eq!(matches[2].as_str(), "cat");
4111 }
4112}