fuzzy_aho_corasick/lib.rs
1#![warn(clippy::pedantic)]
2#![allow(clippy::too_many_lines, clippy::cast_precision_loss)]
3mod builder;
4mod matches;
5mod replacer;
6pub mod structs;
7#[cfg(test)]
8mod tests;
9
10pub use builder::FuzzyAhoCorasickBuilder;
11pub use replacer::FuzzyReplacer;
12use std::borrow::Cow;
13use std::collections::BTreeMap;
14use unicode_segmentation::UnicodeSegmentation;
15pub type PatternIndex = usize;
16pub use structs::*;
17
18#[allow(unused_macros)]
19#[cfg(test)]
20macro_rules! trace {
21 ($($arg:tt)*) => { println!($($arg)*); };
22}
23#[allow(unused_macros)]
24#[cfg(not(test))]
25macro_rules! trace {
26 ($($arg:tt)*) => {};
27}
28/// Fuzzy Aho—Corasick engine
29impl FuzzyAhoCorasick {
30 /// Get the per-node limits if this node corresponds to a pattern that has
31 /// its own `FuzzyLimits`.
32 #[inline]
33 fn get_node_limits(&self, node: usize) -> Option<&FuzzyLimits> {
34 self.nodes[node]
35 .pattern_index
36 .and_then(|i| self.patterns.get(i).and_then(|p| p.limits.as_ref()))
37 }
38
39 /// Check ahead whether an insertion would stay within the allowed limits.
40 /// Considers both the node-specific limits and the global fallback `self.limits`.
41 #[inline]
42 fn within_limits_insertion_ahead(
43 &self,
44 limits: Option<&FuzzyLimits>,
45 edits: NumEdits,
46 insertions: NumEdits,
47 ) -> bool {
48 if let Some(max) = limits.or(self.limits.as_ref()) {
49 max.edits.is_none_or(|max| edits < max)
50 && max.insertions.is_none_or(|max| insertions < max)
51 } else {
52 false
53 }
54 }
55
56 /// Check ahead whether a deletion would stay within the allowed limits.
57 #[inline]
58 fn within_limits_deletion_ahead(
59 &self,
60 limits: Option<&FuzzyLimits>,
61 edits: NumEdits,
62 deletions: NumEdits,
63 ) -> bool {
64 if let Some(max) = limits.or(self.limits.as_ref()) {
65 max.edits.is_none_or(|max| edits < max)
66 && max.deletions.is_none_or(|max| deletions < max)
67 } else {
68 false
69 }
70 }
71
72 /// Check ahead whether a swap (transposition) would stay within the allowed limits.
73 #[inline]
74 fn within_limits_swap_ahead(
75 &self,
76 limits: Option<&FuzzyLimits>,
77 edits: NumEdits,
78 swaps: NumEdits,
79 ) -> bool {
80 if let Some(max) = limits.or(self.limits.as_ref()) {
81 /*println!(
82 "within_limits_swap_ahead() -- max: {max:?} edits: {edits:?} swaps: {swaps:?}\
83 \nresult = {:?}\n"
84 , max.edits.is_none_or(|max| edits < max) && max.swaps.is_none_or(|max| swaps < max))*/
85 max.edits.is_none_or(|max| edits < max) && max.swaps.is_none_or(|max| swaps < max)
86 } else {
87 false
88 }
89 }
90
91 /// Check ahead whether a substitution would stay within the allowed limits.
92 #[inline]
93 fn within_limits_subst(
94 &self,
95 limits: Option<&FuzzyLimits>,
96 edits: NumEdits,
97 substitutions: NumEdits,
98 ) -> bool {
99 if let Some(max) = limits.or(self.limits.as_ref()) {
100 /*println!(
101 "within_limits_subst_ahead() -- max: {max:?} edits: {edits:?} substitutions: {substitutions:?}\
102 \nresult = {result:?}\n"
103 );*/
104 max.edits.is_none_or(|max| edits <= max)
105 && max.substitutions.is_none_or(|max| substitutions <= max)
106 } else {
107 edits == 0 && substitutions == 0
108 }
109 }
110
111 /// General limits check: given all edit counts, returns whether they are
112 /// acceptable under either the node-specific limits or the global default.
113 #[inline]
114 fn within_limits(
115 &self,
116 limits: Option<&FuzzyLimits>,
117 edits: NumEdits,
118 insertions: NumEdits,
119 deletions: NumEdits,
120 substitutions: NumEdits,
121 swaps: NumEdits,
122 ) -> bool {
123 if let Some(max) = limits.or(self.limits.as_ref()) {
124 /*println!(
125 "within_limits() -- max: {max:?} edits: {edits:?} insertions: {insertions:?} deletions: {deletions:?} substitutions: {substitutions:?} swaps: {swaps:?}\
126 \nresult = {result:?}\n"
127 );*/
128 max.edits.is_none_or(|max| edits <= max)
129 && max.insertions.is_none_or(|max| insertions <= max)
130 && max.deletions.is_none_or(|max| deletions <= max)
131 && max.substitutions.is_none_or(|max| substitutions <= max)
132 && max.swaps.is_none_or(|max| swaps <= max)
133 } else {
134 edits == 0 && insertions == 0 && deletions == 0 && substitutions == 0 && swaps == 0
135 }
136 }
137
138 /// Returns the list of patterns the automaton was built with.
139 #[must_use]
140 pub fn patterns(&self) -> &[Pattern] {
141 &self.patterns
142 }
143
144 /// Core fuzzy search over the haystack producing raw matches without any
145 /// global ordering applied. This explores all possible state transitions
146 /// (substitutions, swaps, insertions, deletions) starting at each grapheme
147 /// position, accumulating penalties and enforcing per-pattern limits. Keeps the
148 /// best match for each unique (`start_byte`, `end_byte`, `pattern_index`) key by
149 /// highest similarity, but does **not** sort the results; the returned
150 /// `FuzzyMatches.inner` is effectively unsorted.
151 ///
152 /// Similarity is computed as `(total_graphemes - penalties) / total_graphemes * weight`.
153 /// Matches below `similarity_threshold` are discarded early.
154 ///
155 /// # Parameters
156 /// - `haystack`: the input text to search in.
157 /// - `similarity_threshold`: minimum similarity a candidate must have to be kept.
158 ///
159 /// # Returns
160 /// A `FuzzyMatches` containing the best per-span matches meeting the threshold.
161 #[inline]
162 #[must_use]
163 pub fn search_unsorted<'a>(
164 &'a self,
165 haystack: &'a str,
166 similarity_threshold: f32,
167 ) -> FuzzyMatches<'a> {
168 let grapheme_idx: Vec<(usize, &str)> = haystack.grapheme_indices(true).collect();
169 if grapheme_idx.is_empty() {
170 return FuzzyMatches {
171 haystack,
172 inner: vec![],
173 };
174 }
175 let text_chars: Vec<Cow<str>> = grapheme_idx
176 .iter()
177 .map(|(_, g)| {
178 if self.case_insensitive {
179 Cow::Owned(g.to_lowercase())
180 } else {
181 Cow::Borrowed(*g)
182 }
183 })
184 .collect();
185
186 let mut best: BTreeMap<(usize, usize, usize), FuzzyMatch> = BTreeMap::new();
187
188 let mut queue: Vec<State> = Vec::with_capacity(64);
189
190 trace!(
191 "=== fuzzy_search on {haystack:?} (similarity_threshold {similarity_threshold:.2}) ===",
192 );
193 for start in 0..text_chars.len() {
194 trace!(
195 "=== new window at grapheme #{start} ({:?}) ===",
196 text_chars[start]
197 );
198
199 queue.clear();
200 queue.push(State {
201 node: 0,
202 j: start,
203 matched_start: start,
204 matched_end: start,
205 penalties: 0.,
206 edits: 0,
207 insertions: 0,
208 deletions: 0,
209 substitutions: 0,
210 swaps: 0,
211 #[cfg(debug_assertions)]
212 notes: vec![],
213 });
214
215 let mut q_idx = 0;
216 while q_idx < queue.len() {
217 let State {
218 node,
219 j,
220 matched_start,
221 matched_end,
222 penalties,
223 edits,
224 insertions,
225 deletions,
226 substitutions,
227 swaps,
228 ..
229 } = queue[q_idx];
230 #[cfg(debug_assertions)]
231 let notes = queue[q_idx].notes.clone();
232 q_idx += 1;
233
234 /*trace!(
235 "visit: node={} j={} span=({}->{}) score={:.3} edits={}",
236 node, j, matched_start, matched_end, score, edits
237 );*/
238
239 let Node {
240 output,
241 transitions,
242 ..
243 } = &self.nodes[node];
244
245 if !output.is_empty() {
246 for &pattern_index in output {
247 if !self.within_limits(
248 self.patterns[pattern_index].limits.as_ref(),
249 edits,
250 insertions,
251 deletions,
252 substitutions,
253 swaps,
254 ) {
255 continue;
256 }
257 let start_byte = grapheme_idx.get(matched_start).map_or(0, |&(b, _)| b);
258 let end_byte = grapheme_idx
259 .get(matched_end)
260 .map_or_else(|| haystack.len(), |&(b, _)| b);
261 let key = (start_byte, end_byte, pattern_index);
262
263 let total = self.patterns[pattern_index].grapheme_len as f32;
264
265 let similarity =
266 (total - penalties) / total * self.patterns[pattern_index].weight;
267
268 if similarity < similarity_threshold {
269 continue;
270 }
271
272 best.entry(key)
273 .and_modify(|entry| {
274 if similarity > entry.similarity {
275 *entry = FuzzyMatch {
276 insertions,
277 deletions,
278 substitutions,
279 edits,
280 swaps,
281 pattern_index,
282 start: start_byte,
283 end: end_byte,
284 pattern: &self.patterns[pattern_index],
285 similarity,
286 text: &haystack[start_byte..end_byte],
287 #[cfg(debug_assertions)]
288 notes: notes.clone(),
289 };
290 }
291 })
292 .or_insert_with(|| FuzzyMatch {
293 insertions,
294 deletions,
295 substitutions,
296 edits,
297 swaps,
298 pattern_index,
299 start: start_byte,
300 end: end_byte,
301 pattern: &self.patterns[pattern_index],
302 similarity,
303 text: &haystack[start_byte..end_byte],
304 #[cfg(debug_assertions)]
305 notes: notes.clone(),
306 });
307 }
308 }
309
310 //
311 // 1) Same or similar symbol — только внутри текста
312 //
313 if j < text_chars.len() {
314 let current_grapheme = text_chars[j].as_ref();
315 let current_ch = current_grapheme.chars().next().unwrap_or('\0');
316
317 for (edge_g, &next_node) in transitions {
318 #[cfg(debug_assertions)]
319 let notes = notes.clone();
320
321 let g_ch = edge_g.chars().next().unwrap_or('\0');
322 if edge_g == current_grapheme {
323 // exact match
324 trace!(" match {:>8} ─ok→ node={} sim=1.00", edge_g, next_node);
325 queue.push(State {
326 node: next_node,
327 j: j + 1,
328 matched_start: if matched_end == matched_start {
329 j
330 } else {
331 matched_start
332 },
333 matched_end: j + 1,
334 penalties,
335 edits,
336 insertions,
337 deletions,
338 substitutions,
339 swaps,
340 #[cfg(debug_assertions)]
341 notes,
342 });
343 } else if self.within_limits_subst(
344 self.get_node_limits(node),
345 edits,
346 substitutions,
347 ) {
348 // substitution
349 let sim = *self.similarity.get(&(g_ch, current_ch)).unwrap_or(&0.);
350 let penalty = self.penalties.substitution * (1.0 - sim);
351
352 trace!(
353 " subst {:>8?} ─sub→ {current_grapheme:?} \
354 node={} sim={:.2} pen={:.2} edits->{}",
355 edge_g,
356 next_node,
357 sim,
358 penalty,
359 edits + 1
360 );
361 #[cfg(debug_assertions)]
362 let mut notes = notes.clone();
363 #[cfg(debug_assertions)]
364 notes.push(format!("sub {edge_g:?} -> {current_grapheme:?} (sim={sim:.2}, pen={penalty:.2}) (subst->{}, edits->{})", substitutions + 1, edits + 1));
365
366 queue.push(State {
367 node: next_node,
368 j: j + 1,
369 matched_start: if matched_end == matched_start {
370 j
371 } else {
372 matched_start
373 },
374 matched_end: j + 1,
375 penalties: penalties + penalty,
376 edits: edits + 1,
377 insertions,
378 deletions,
379 substitutions: substitutions + 1,
380 swaps,
381 #[cfg(debug_assertions)]
382 notes,
383 });
384 }
385 }
386
387 //
388 // 2) Swap (transposition of two neighboring graphemes)
389 //
390 if j + 1 < text_chars.len() {
391 let a = &text_chars[j];
392 let b = &text_chars[j + 1];
393 if let Some(&node2) = transitions
394 .get(b.as_ref())
395 .and_then(|&x| self.nodes[x].transitions.get(a.as_ref()))
396 {
397 if self.within_limits_swap_ahead(
398 self.get_node_limits(node2),
399 edits,
400 swaps,
401 ) {
402 #[cfg(debug_assertions)]
403 let mut notes = notes.clone();
404 #[cfg(debug_assertions)]
405 notes.push(format!(
406 "swap a:{a:?} b:{b:?} (swaps->{}, edits->{})",
407 swaps + 1,
408 edits + 1
409 ));
410 queue.push(State {
411 node: node2,
412 j: j + 2,
413 matched_start,
414 matched_end: j + 2,
415 penalties: penalties + self.penalties.swap,
416 edits: edits + 1,
417 insertions,
418 deletions,
419 substitutions,
420 swaps: swaps + 1,
421 #[cfg(debug_assertions)]
422 notes,
423 });
424 }
425 }
426 }
427
428 //
429 // 3a) Insertion (skip a haystack character)
430 //
431 if (matched_start != matched_end || matched_start != j)
432 && self.within_limits_insertion_ahead(
433 self.get_node_limits(node),
434 edits,
435 insertions,
436 )
437 {
438 #[cfg(debug_assertions)]
439 let mut notes = notes.clone();
440 #[cfg(debug_assertions)]
441 notes.push(format!(
442 "ins {:?} (ins->{} , edits->{})",
443 text_chars[j],
444 insertions + 1,
445 edits + 1
446 ));
447 queue.push(State {
448 node,
449 j: j + 1,
450 matched_start,
451 matched_end,
452 penalties: penalties + self.penalties.insertion,
453 edits: edits + 1,
454 insertions: insertions + 1,
455 deletions,
456 substitutions,
457 swaps,
458 #[cfg(debug_assertions)]
459 notes,
460 });
461 }
462 }
463
464 //
465 // 3b) Deletion (skip a pattern character) — always, even if j == len
466 //
467 if self.within_limits_deletion_ahead(self.get_node_limits(node), edits, deletions) {
468 #[allow(unused_variables)]
469 for (edge_g2, &next_node2) in transitions {
470 trace!(
471 " delete to node={next_node2} penalty={:.2}",
472 self.penalties.deletion
473 );
474 #[cfg(debug_assertions)]
475 let mut notes = notes.clone();
476 #[cfg(debug_assertions)]
477 notes.push(format!("edge_g2 {edge_g2:?} (del->{:?})", deletions + 1));
478 queue.push(State {
479 node: next_node2,
480 j,
481 matched_start,
482 matched_end,
483 penalties: penalties + self.penalties.deletion,
484 edits: edits + 1,
485 insertions,
486 deletions: deletions + 1,
487 substitutions,
488 swaps,
489 #[cfg(debug_assertions)]
490 notes,
491 });
492 }
493 }
494 }
495 }
496 FuzzyMatches {
497 haystack,
498 inner: best
499 .into_values()
500 .map(|mut m| {
501 m.text = &haystack[m.start..m.end];
502 m
503 })
504 .collect(),
505 }
506 }
507
508 /// Convenience wrapper over `search_unsorted` that applies the default sorting
509 /// order to the matches (via `default_sort()`).
510 ///
511 /// # Parameters
512 /// - `haystack`: the input text to search in.
513 /// - `similarity_threshold`: minimum similarity threshold for candidates.
514 ///
515 /// # Returns
516 /// `FuzzyMatches` with matches sorted according to the default ranking.
517 #[inline]
518 #[must_use]
519 pub fn search<'a>(&'a self, haystack: &'a str, similarity_threshold: f32) -> FuzzyMatches<'a> {
520 let mut matches = self.search_unsorted(haystack, similarity_threshold);
521 matches.default_sort();
522 matches
523 }
524
525 /// Convenience wrapper over `search_unsorted` that applies a greedy sort (via `greedy_sort()`),
526 ///
527 /// # Parameters
528 /// - `haystack`: the input text to search in.
529 /// - `similarity_threshold`: minimum similarity threshold for candidates.
530 ///
531 /// # Returns
532 /// `FuzzyMatches` with matches sorted by the greedy heuristic.
533 #[inline]
534 #[must_use]
535 pub fn search_greedy<'a>(
536 &'a self,
537 haystack: &'a str,
538 similarity_threshold: f32,
539 ) -> FuzzyMatches<'a> {
540 let mut matches = self.search_unsorted(haystack, similarity_threshold);
541 matches.greedy_sort();
542 matches
543 }
544
545 /// Search that returns non-overlapping matches by delegating to
546 /// `non_overlapping()` on the fully sorted (default) results. This will
547 /// greedily select a set of matches such that their spans do not overlap,
548 /// according to whatever strategy `non_overlapping` encapsulates.
549 ///
550 /// # Parameters
551 /// - `haystack`: the input text to search in.
552 /// - `similarity_threshold`: minimum similarity threshold for candidates.
553 ///
554 /// # Returns
555 /// `FuzzyMatches` containing a non-overlapping subset of matches.
556 #[must_use]
557 pub fn search_non_overlapping<'a>(
558 &'a self,
559 haystack: &'a str,
560 similarity_threshold: f32,
561 ) -> FuzzyMatches<'a> {
562 let mut matches = self.search(haystack, similarity_threshold);
563 matches.non_overlapping();
564 matches
565 }
566
567 /// Variation of `search_non_overlapping` that additionally enforces uniqueness
568 /// of patterns: each pattern (identified by `custom_unique_id` if present or by
569 /// its index) may only contribute one accepted match. Delegates to
570 /// `non_overlapping_unique()` after obtaining the base sorted matches.
571 ///
572 /// # Parameters
573 /// - `haystack`: the input text to search in.
574 /// - `similarity_threshold`: minimum similarity threshold for candidates.
575 ///
576 /// # Returns
577 /// `FuzzyMatches` containing a non-overlapping, pattern-unique subset of matches.
578 #[must_use]
579 pub fn search_non_overlapping_unique<'a>(
580 &'a self,
581 haystack: &'a str,
582 similarity_threshold: f32,
583 ) -> FuzzyMatches<'a> {
584 let mut matches = self.search(haystack, similarity_threshold);
585 matches.non_overlapping_unique();
586 matches
587 }
588
589 /// Perform replacements on `text` by finding non-overlapping fuzzy matches above
590 /// `threshold` and invoking `callback` for each. Matches are resolved via
591 /// `search_non_overlapping`, so they won’t overlap; the first chosen set is
592 /// used in left-to-right order.
593 ///
594 /// The `callback` is called with each `FuzzyMatch`. If it returns `Some(repl)`,
595 /// the matched span is replaced with `repl`. If it returns `None`, the original
596 /// substring from `text` is preserved.
597 ///
598 /// # Parameters
599 /// - `text`: input string to perform replacements on.
600 /// - `callback`: mapping from a `FuzzyMatch` to an optional replacement string.
601 /// - `threshold`: minimum similarity for a match to be considered.
602 ///
603 /// # Returns
604 /// A new `String` with the selected fuzzy matches replaced per `callback`.
605 ///
606 /// # Example
607 /// ```rust
608 /// use fuzzy_aho_corasick::FuzzyAhoCorasickBuilder;
609 /// let automaton = FuzzyAhoCorasickBuilder::new().build(["FOO", "BAR", "BAZ"]);
610 /// let result = automaton.replace("FOO BAR BAZ", |m| {
611 /// (m.pattern.pattern == "BAR").then_some("###")
612 /// }, 0.8);
613 /// assert_eq!(result, "FOO ### BAZ");
614 /// ```
615 #[must_use]
616 pub fn replace<'a, F, S: Into<Cow<'a, str>>>(
617 &'a self,
618 text: &'a str,
619 callback: F,
620 threshold: f32,
621 ) -> String
622 where
623 F: Fn(&FuzzyMatch<'a>) -> Option<S>,
624 S: Into<Cow<'a, str>>,
625 {
626 self.search_non_overlapping(text, threshold)
627 .replace(callback)
628 }
629
630 /// Strip any leading fuzzy‐matched prefix from `haystack` using the given
631 /// similarity `threshold`, and return the remainder of the string.
632 ///
633 /// # Behavior
634 ///
635 /// - All initial [`Segment::Matched`] variants are skipped.
636 /// - Any unmatched segments consisting solely of whitespace are also skipped.
637 /// - The first non‐whitespace [`Segment::Unmatched`]:
638 /// - Has its leading whitespace trimmed before appending.
639 /// - Disables skipping so that all subsequent segments are included.
640 /// - After that point, both `Matched` and `Unmatched` segments are appended
641 /// in full (without further trimming).
642 ///
643 /// # Parameters
644 ///
645 /// - `haystack`: The text to strip a fuzzy‐matched prefix from.
646 /// - `threshold`: A float from `0.0` to `1.0` indicating the minimum
647 /// similarity score required for a match.
648 ///
649 /// # Returns
650 ///
651 /// A `String` containing the remainder of `haystack` after removing the
652 /// leading fuzzy‐matched portion and any leading whitespace.
653 ///
654 /// # Examples
655 ///
656 /// ```
657 /// use fuzzy_aho_corasick::{FuzzyAhoCorasickBuilder, FuzzyLimits};
658 /// let f = FuzzyAhoCorasickBuilder::new()
659 /// .fuzzy(FuzzyLimits::new().edits(1))
660 /// .case_insensitive(true)
661 /// .build(["LOREM", "IPSUM"]);
662 ///
663 /// // "LROEM" fuzzy‐matches "LOREM", "PISUM" matches "IPSUM",
664 /// // so both are stripped, and leading space before "ZZZ" is trimmed:
665 /// let result = f.strip_prefix("LrEM ISuM Lorm ZZZ", 0.8);
666 /// assert_eq!(result, "ZZZ");
667 /// ```
668 #[must_use]
669 pub fn strip_prefix<'a>(&'a self, haystack: &'a str, threshold: f32) -> String {
670 self.search_non_overlapping(haystack, threshold)
671 .strip_prefix()
672 }
673
674 /// Perform a non‐overlapping fuzzy search over `haystack` with the given
675 /// similarity `threshold`, then strip any trailing fuzzy‐matched suffix
676 /// from the end of the string and return the leading portion.
677 ///
678 /// # Behavior
679 ///
680 /// - Conducts a non‐overlapping fuzzy search (via [`search_non_overlapping`]).
681 /// - Skips all trailing [`Segment::Matched`] variants.
682 /// - Skips any trailing [`Segment::Unmatched`] variants consisting solely of whitespace.
683 /// - The last non‐whitespace [`Segment::Unmatched`]:
684 /// - Has its trailing whitespace trimmed before inclusion.
685 /// - Marks the cutoff point—everything after it is dropped.
686 ///
687 /// # Parameters
688 ///
689 /// - `haystack`: The text to strip a fuzzy‐matched suffix from.
690 /// - `threshold`: A float in `0.0..=1.0` indicating the minimum similarity
691 /// score required for a match to count as part of the suffix.
692 ///
693 /// # Returns
694 ///
695 /// A `String` containing the beginning of `haystack` with any trailing
696 /// fuzzy‐matched portion (and trailing whitespace) removed.
697 ///
698 /// # Examples
699 ///
700 /// ```rust
701 /// use fuzzy_aho_corasick::{FuzzyAhoCorasickBuilder, FuzzyLimits};
702 ///
703 /// let f = FuzzyAhoCorasickBuilder::new()
704 /// .fuzzy(FuzzyLimits::new().edits(1))
705 /// .case_insensitive(true)
706 /// .build(["LOREM", "IPSUM"]);
707 ///
708 /// // The suffix " LrEM ISuM" fuzzily matches " LOREM IPSUM" at ≥0.8,
709 /// // so it's stripped from the end, leaving only "ZZZ".
710 /// let result = f.strip_postfix("ZZZ LrEM ISuM", 0.8);
711 /// assert_eq!(result, "ZZZ");
712 /// ```
713 #[must_use]
714 pub fn strip_postfix<'a>(&'a self, haystack: &'a str, threshold: f32) -> String {
715 self.search_non_overlapping(haystack, threshold)
716 .strip_postfix()
717 }
718
719 /// Split `haystack` into unmatched substrings by treating each fuzzy match
720 /// (above the given `threshold`) as a separator.
721 ///
722 /// # Behavior
723 ///
724 /// - Performs a non-overlapping fuzzy search over `haystack` using
725 /// [`search_non_overlapping`].
726 /// - Delegates to the `split()` method on the resulting `FuzzyMatches`.
727 /// - Produces one `String` per unmatched segment in order, including empty
728 /// segments if matches occur at the very start or end.
729 ///
730 /// # Parameters
731 ///
732 /// - `haystack`: The input text to split on fuzzy matches.
733 /// - `threshold`: A similarity cutoff (`0.0..=1.0`); only matches with
734 /// a score ≥ `threshold` are treated as separators.
735 ///
736 /// # Returns
737 ///
738 /// A `Vec<String>` containing the parts of `haystack` between each fuzzy match.
739 ///
740 /// # Examples
741 ///
742 /// ```rust
743 /// use fuzzy_aho_corasick::{FuzzyAhoCorasickBuilder, FuzzyLimits};
744 ///
745 /// let engine = FuzzyAhoCorasickBuilder::new()
746 /// .fuzzy(FuzzyLimits::new().edits(1))
747 /// .case_insensitive(true)
748 /// .build(["FOO", "BAR"]);
749 ///
750 /// let parts: Vec<String> = engine.split("xxFo0yyBAARzz", 0.8).collect();
751 /// assert_eq!(parts, vec!["xx", "yy", "zz"]);
752 /// ```
753 #[must_use]
754 pub fn split<'a>(
755 &'a self,
756 haystack: &'a str,
757 threshold: f32,
758 ) -> impl Iterator<Item = String> + 'a {
759 self.search_non_overlapping(haystack, threshold).split()
760 }
761
762 /// Returns an **iterator** that yields interleaving [`Segment::Matched`]
763 /// [`Segment::Unmatched`] items for the given text.
764 pub fn segment_iter<'a>(
765 &'a self,
766 haystack: &'a str,
767 threshold: f32,
768 ) -> impl Iterator<Item = Segment<'a>> {
769 self.search_non_overlapping(haystack, threshold)
770 .segment_iter()
771 }
772 /// Convenience wrapper around [`segment_iter`](Self::segment_iter).
773 #[must_use]
774 pub fn segment_text(&self, haystack: &str, threshold: f32) -> String {
775 self.search_non_overlapping(haystack, threshold)
776 .segment_text()
777 }
778}