fuzzy_aho_corasick/
lib.rs

1#![warn(clippy::pedantic)]
2#![allow(clippy::too_many_lines, clippy::cast_precision_loss)]
3mod builder;
4mod matches;
5mod replacer;
6pub mod structs;
7#[cfg(test)]
8mod tests;
9
10pub use builder::FuzzyAhoCorasickBuilder;
11pub use replacer::FuzzyReplacer;
12use std::borrow::Cow;
13use std::collections::BTreeMap;
14use unicode_segmentation::UnicodeSegmentation;
15pub type PatternIndex = usize;
16pub use structs::*;
17
18#[allow(unused_macros)]
19#[cfg(test)]
20macro_rules! trace {
21    ($($arg:tt)*) => { println!($($arg)*); };
22}
23#[allow(unused_macros)]
24#[cfg(not(test))]
25macro_rules! trace {
26    ($($arg:tt)*) => {};
27}
28/// Fuzzy Aho—Corasick engine
29impl FuzzyAhoCorasick {
30    /// Get the per-node limits if this node corresponds to a pattern that has
31    /// its own `FuzzyLimits`.
32    #[inline]
33    fn get_node_limits(&self, node: usize) -> Option<&FuzzyLimits> {
34        self.nodes[node]
35            .pattern_index
36            .and_then(|i| self.patterns.get(i).and_then(|p| p.limits.as_ref()))
37    }
38
39    /// Check ahead whether an insertion would stay within the allowed limits.
40    /// Considers both the node-specific limits and the global fallback `self.limits`.
41    #[inline]
42    fn within_limits_insertion_ahead(
43        &self,
44        limits: Option<&FuzzyLimits>,
45        edits: NumEdits,
46        insertions: NumEdits,
47    ) -> bool {
48        if let Some(max) = limits.or(self.limits.as_ref()) {
49            max.edits.is_none_or(|max| edits < max)
50                && max.insertions.is_none_or(|max| insertions < max)
51        } else {
52            false
53        }
54    }
55
56    /// Check ahead whether a deletion would stay within the allowed limits.
57    #[inline]
58    fn within_limits_deletion_ahead(
59        &self,
60        limits: Option<&FuzzyLimits>,
61        edits: NumEdits,
62        deletions: NumEdits,
63    ) -> bool {
64        if let Some(max) = limits.or(self.limits.as_ref()) {
65            max.edits.is_none_or(|max| edits < max)
66                && max.deletions.is_none_or(|max| deletions < max)
67        } else {
68            false
69        }
70    }
71
72    /// Check ahead whether a swap (transposition) would stay within the allowed limits.
73    #[inline]
74    fn within_limits_swap_ahead(
75        &self,
76        limits: Option<&FuzzyLimits>,
77        edits: NumEdits,
78        swaps: NumEdits,
79    ) -> bool {
80        if let Some(max) = limits.or(self.limits.as_ref()) {
81            /*println!(
82                "within_limits_swap_ahead() -- max: {max:?} edits: {edits:?} swaps: {swaps:?}\
83                \nresult = {:?}\n"
84            , max.edits.is_none_or(|max| edits < max) && max.swaps.is_none_or(|max| swaps < max))*/
85            max.edits.is_none_or(|max| edits < max) && max.swaps.is_none_or(|max| swaps < max)
86        } else {
87            false
88        }
89    }
90
91    /// Check ahead whether a substitution would stay within the allowed limits.
92    #[inline]
93    fn within_limits_subst(
94        &self,
95        limits: Option<&FuzzyLimits>,
96        edits: NumEdits,
97        substitutions: NumEdits,
98    ) -> bool {
99        if let Some(max) = limits.or(self.limits.as_ref()) {
100            /*println!(
101                "within_limits_subst_ahead() -- max: {max:?} edits: {edits:?} substitutions: {substitutions:?}\
102                \nresult = {result:?}\n"
103            );*/
104            max.edits.is_none_or(|max| edits <= max)
105                && max.substitutions.is_none_or(|max| substitutions <= max)
106        } else {
107            edits == 0 && substitutions == 0
108        }
109    }
110
111    /// General limits check: given all edit counts, returns whether they are
112    /// acceptable under either the node-specific limits or the global default.
113    #[inline]
114    fn within_limits(
115        &self,
116        limits: Option<&FuzzyLimits>,
117        edits: NumEdits,
118        insertions: NumEdits,
119        deletions: NumEdits,
120        substitutions: NumEdits,
121        swaps: NumEdits,
122    ) -> bool {
123        if let Some(max) = limits.or(self.limits.as_ref()) {
124            /*println!(
125                "within_limits() -- max: {max:?} edits: {edits:?} insertions: {insertions:?} deletions: {deletions:?} substitutions: {substitutions:?} swaps: {swaps:?}\
126                \nresult = {result:?}\n"
127            );*/
128            max.edits.is_none_or(|max| edits <= max)
129                && max.insertions.is_none_or(|max| insertions <= max)
130                && max.deletions.is_none_or(|max| deletions <= max)
131                && max.substitutions.is_none_or(|max| substitutions <= max)
132                && max.swaps.is_none_or(|max| swaps <= max)
133        } else {
134            edits == 0 && insertions == 0 && deletions == 0 && substitutions == 0 && swaps == 0
135        }
136    }
137
138    /// Returns the list of patterns the automaton was built with.
139    #[must_use]
140    pub fn patterns(&self) -> &[Pattern] {
141        &self.patterns
142    }
143
144    /// Core fuzzy search over the haystack producing raw matches without any
145    /// global ordering applied. This explores all possible state transitions
146    /// (substitutions, swaps, insertions, deletions) starting at each grapheme
147    /// position, accumulating penalties and enforcing per-pattern limits. Keeps the
148    /// best match for each unique (`start_byte`, `end_byte`, `pattern_index`) key by
149    /// highest similarity, but does **not** sort the results; the returned
150    /// `FuzzyMatches.inner` is effectively unsorted.
151    ///
152    /// Similarity is computed as `(total_graphemes - penalties) / total_graphemes * weight`.
153    /// Matches below `similarity_threshold` are discarded early.
154    ///
155    /// # Parameters
156    /// - `haystack`: the input text to search in.
157    /// - `similarity_threshold`: minimum similarity a candidate must have to be kept.
158    ///
159    /// # Returns
160    /// A `FuzzyMatches` containing the best per-span matches meeting the threshold.
161    #[inline]
162    #[must_use]
163    pub fn search_unsorted<'a>(
164        &'a self,
165        haystack: &'a str,
166        similarity_threshold: f32,
167    ) -> FuzzyMatches<'a> {
168        let grapheme_idx: Vec<(usize, &str)> = haystack.grapheme_indices(true).collect();
169        if grapheme_idx.is_empty() {
170            return FuzzyMatches {
171                haystack,
172                inner: vec![],
173            };
174        }
175        let text_chars: Vec<Cow<str>> = grapheme_idx
176            .iter()
177            .map(|(_, g)| {
178                if self.case_insensitive {
179                    Cow::Owned(g.to_lowercase())
180                } else {
181                    Cow::Borrowed(*g)
182                }
183            })
184            .collect();
185
186        let mut best: BTreeMap<(usize, usize, usize), FuzzyMatch> = BTreeMap::new();
187
188        let mut queue: Vec<State> = Vec::with_capacity(64);
189
190        trace!(
191            "=== fuzzy_search on {haystack:?} (similarity_threshold {similarity_threshold:.2}) ===",
192        );
193        for start in 0..text_chars.len() {
194            trace!(
195                "=== new window at grapheme #{start} ({:?}) ===",
196                text_chars[start]
197            );
198
199            queue.clear();
200            queue.push(State {
201                node: 0,
202                j: start,
203                matched_start: start,
204                matched_end: start,
205                penalties: 0.,
206                edits: 0,
207                insertions: 0,
208                deletions: 0,
209                substitutions: 0,
210                swaps: 0,
211                #[cfg(debug_assertions)]
212                notes: vec![],
213            });
214
215            let mut q_idx = 0;
216            while q_idx < queue.len() {
217                let State {
218                    node,
219                    j,
220                    matched_start,
221                    matched_end,
222                    penalties,
223                    edits,
224                    insertions,
225                    deletions,
226                    substitutions,
227                    swaps,
228                    ..
229                } = queue[q_idx];
230                #[cfg(debug_assertions)]
231                let notes = queue[q_idx].notes.clone();
232                q_idx += 1;
233
234                /*trace!(
235                    "visit: node={} j={} span=({}->{}) score={:.3} edits={}",
236                    node, j, matched_start, matched_end, score, edits
237                );*/
238
239                let Node {
240                    output,
241                    transitions,
242                    ..
243                } = &self.nodes[node];
244
245                if !output.is_empty() {
246                    for &pattern_index in output {
247                        if !self.within_limits(
248                            self.patterns[pattern_index].limits.as_ref(),
249                            edits,
250                            insertions,
251                            deletions,
252                            substitutions,
253                            swaps,
254                        ) {
255                            continue;
256                        }
257                        let start_byte = grapheme_idx.get(matched_start).map_or(0, |&(b, _)| b);
258                        let end_byte = grapheme_idx
259                            .get(matched_end)
260                            .map_or_else(|| haystack.len(), |&(b, _)| b);
261                        let key = (start_byte, end_byte, pattern_index);
262
263                        let total = self.patterns[pattern_index].grapheme_len as f32;
264
265                        let similarity =
266                            (total - penalties) / total * self.patterns[pattern_index].weight;
267
268                        if similarity < similarity_threshold {
269                            continue;
270                        }
271
272                        best.entry(key)
273                            .and_modify(|entry| {
274                                if similarity > entry.similarity {
275                                    *entry = FuzzyMatch {
276                                        insertions,
277                                        deletions,
278                                        substitutions,
279                                        edits,
280                                        swaps,
281                                        pattern_index,
282                                        start: start_byte,
283                                        end: end_byte,
284                                        pattern: &self.patterns[pattern_index],
285                                        similarity,
286                                        text: &haystack[start_byte..end_byte],
287                                        #[cfg(debug_assertions)]
288                                        notes: notes.clone(),
289                                    };
290                                }
291                            })
292                            .or_insert_with(|| FuzzyMatch {
293                                insertions,
294                                deletions,
295                                substitutions,
296                                edits,
297                                swaps,
298                                pattern_index,
299                                start: start_byte,
300                                end: end_byte,
301                                pattern: &self.patterns[pattern_index],
302                                similarity,
303                                text: &haystack[start_byte..end_byte],
304                                #[cfg(debug_assertions)]
305                                notes: notes.clone(),
306                            });
307                    }
308                }
309
310                //
311                // 1) Same or similar symbol — только внутри текста
312                //
313                if j < text_chars.len() {
314                    let current_grapheme = text_chars[j].as_ref();
315                    let current_ch = current_grapheme.chars().next().unwrap_or('\0');
316
317                    for (edge_g, &next_node) in transitions {
318                        #[cfg(debug_assertions)]
319                        let notes = notes.clone();
320
321                        let g_ch = edge_g.chars().next().unwrap_or('\0');
322                        if edge_g == current_grapheme {
323                            // exact match
324                            trace!("  match   {:>8} ─ok→ node={}  sim=1.00", edge_g, next_node);
325                            queue.push(State {
326                                node: next_node,
327                                j: j + 1,
328                                matched_start: if matched_end == matched_start {
329                                    j
330                                } else {
331                                    matched_start
332                                },
333                                matched_end: j + 1,
334                                penalties,
335                                edits,
336                                insertions,
337                                deletions,
338                                substitutions,
339                                swaps,
340                                #[cfg(debug_assertions)]
341                                notes,
342                            });
343                        } else if self.within_limits_subst(
344                            self.get_node_limits(node),
345                            edits,
346                            substitutions,
347                        ) {
348                            // substitution
349                            let sim = *self.similarity.get(&(g_ch, current_ch)).unwrap_or(&0.);
350                            let penalty = self.penalties.substitution * (1.0 - sim);
351
352                            trace!(
353                                "  subst {:>8?} ─sub→ {current_grapheme:?} \
354                                 node={}  sim={:.2} pen={:.2} edits->{}",
355                                edge_g,
356                                next_node,
357                                sim,
358                                penalty,
359                                edits + 1
360                            );
361                            #[cfg(debug_assertions)]
362                            let mut notes = notes.clone();
363                            #[cfg(debug_assertions)]
364                            notes.push(format!("sub {edge_g:?} -> {current_grapheme:?} (sim={sim:.2}, pen={penalty:.2}) (subst->{}, edits->{})", substitutions + 1, edits + 1));
365
366                            queue.push(State {
367                                node: next_node,
368                                j: j + 1,
369                                matched_start: if matched_end == matched_start {
370                                    j
371                                } else {
372                                    matched_start
373                                },
374                                matched_end: j + 1,
375                                penalties: penalties + penalty,
376                                edits: edits + 1,
377                                insertions,
378                                deletions,
379                                substitutions: substitutions + 1,
380                                swaps,
381                                #[cfg(debug_assertions)]
382                                notes,
383                            });
384                        }
385                    }
386
387                    //
388                    // 2) Swap (transposition of two neighboring graphemes)
389                    //
390                    if j + 1 < text_chars.len() {
391                        let a = &text_chars[j];
392                        let b = &text_chars[j + 1];
393                        if let Some(&node2) = transitions
394                            .get(b.as_ref())
395                            .and_then(|&x| self.nodes[x].transitions.get(a.as_ref()))
396                        {
397                            if self.within_limits_swap_ahead(
398                                self.get_node_limits(node2),
399                                edits,
400                                swaps,
401                            ) {
402                                #[cfg(debug_assertions)]
403                                let mut notes = notes.clone();
404                                #[cfg(debug_assertions)]
405                                notes.push(format!(
406                                    "swap a:{a:?} b:{b:?} (swaps->{}, edits->{})",
407                                    swaps + 1,
408                                    edits + 1
409                                ));
410                                queue.push(State {
411                                    node: node2,
412                                    j: j + 2,
413                                    matched_start,
414                                    matched_end: j + 2,
415                                    penalties: penalties + self.penalties.swap,
416                                    edits: edits + 1,
417                                    insertions,
418                                    deletions,
419                                    substitutions,
420                                    swaps: swaps + 1,
421                                    #[cfg(debug_assertions)]
422                                    notes,
423                                });
424                            }
425                        }
426                    }
427
428                    //
429                    // 3a) Insertion (skip a haystack character)
430                    //
431                    if (matched_start != matched_end || matched_start != j)
432                        && self.within_limits_insertion_ahead(
433                            self.get_node_limits(node),
434                            edits,
435                            insertions,
436                        )
437                    {
438                        #[cfg(debug_assertions)]
439                        let mut notes = notes.clone();
440                        #[cfg(debug_assertions)]
441                        notes.push(format!(
442                            "ins {:?} (ins->{} , edits->{})",
443                            text_chars[j],
444                            insertions + 1,
445                            edits + 1
446                        ));
447                        queue.push(State {
448                            node,
449                            j: j + 1,
450                            matched_start,
451                            matched_end,
452                            penalties: penalties + self.penalties.insertion,
453                            edits: edits + 1,
454                            insertions: insertions + 1,
455                            deletions,
456                            substitutions,
457                            swaps,
458                            #[cfg(debug_assertions)]
459                            notes,
460                        });
461                    }
462                }
463
464                //
465                // 3b) Deletion (skip a pattern character) — always, even if j == len
466                //
467                if self.within_limits_deletion_ahead(self.get_node_limits(node), edits, deletions) {
468                    #[allow(unused_variables)]
469                    for (edge_g2, &next_node2) in transitions {
470                        trace!(
471                            "  delete to node={next_node2} penalty={:.2}",
472                            self.penalties.deletion
473                        );
474                        #[cfg(debug_assertions)]
475                        let mut notes = notes.clone();
476                        #[cfg(debug_assertions)]
477                        notes.push(format!("edge_g2 {edge_g2:?} (del->{:?})", deletions + 1));
478                        queue.push(State {
479                            node: next_node2,
480                            j,
481                            matched_start,
482                            matched_end,
483                            penalties: penalties + self.penalties.deletion,
484                            edits: edits + 1,
485                            insertions,
486                            deletions: deletions + 1,
487                            substitutions,
488                            swaps,
489                            #[cfg(debug_assertions)]
490                            notes,
491                        });
492                    }
493                }
494            }
495        }
496        FuzzyMatches {
497            haystack,
498            inner: best
499                .into_values()
500                .map(|mut m| {
501                    m.text = &haystack[m.start..m.end];
502                    m
503                })
504                .collect(),
505        }
506    }
507
508    /// Convenience wrapper over `search_unsorted` that applies the default sorting
509    /// order to the matches (via `default_sort()`).
510    ///
511    /// # Parameters
512    /// - `haystack`: the input text to search in.
513    /// - `similarity_threshold`: minimum similarity threshold for candidates.
514    ///
515    /// # Returns
516    /// `FuzzyMatches` with matches sorted according to the default ranking.
517    #[inline]
518    #[must_use]
519    pub fn search<'a>(&'a self, haystack: &'a str, similarity_threshold: f32) -> FuzzyMatches<'a> {
520        let mut matches = self.search_unsorted(haystack, similarity_threshold);
521        matches.default_sort();
522        matches
523    }
524
525    /// Convenience wrapper over `search_unsorted` that applies a greedy sort (via `greedy_sort()`),
526    ///
527    /// # Parameters
528    /// - `haystack`: the input text to search in.
529    /// - `similarity_threshold`: minimum similarity threshold for candidates.
530    ///
531    /// # Returns
532    /// `FuzzyMatches` with matches sorted by the greedy heuristic.
533    #[inline]
534    #[must_use]
535    pub fn search_greedy<'a>(
536        &'a self,
537        haystack: &'a str,
538        similarity_threshold: f32,
539    ) -> FuzzyMatches<'a> {
540        let mut matches = self.search_unsorted(haystack, similarity_threshold);
541        matches.greedy_sort();
542        matches
543    }
544
545    /// Search that returns non-overlapping matches by delegating to
546    /// `non_overlapping()` on the fully sorted (default) results. This will
547    /// greedily select a set of matches such that their spans do not overlap,
548    /// according to whatever strategy `non_overlapping` encapsulates.
549    ///
550    /// # Parameters
551    /// - `haystack`: the input text to search in.
552    /// - `similarity_threshold`: minimum similarity threshold for candidates.
553    ///
554    /// # Returns
555    /// `FuzzyMatches` containing a non-overlapping subset of matches.
556    #[must_use]
557    pub fn search_non_overlapping<'a>(
558        &'a self,
559        haystack: &'a str,
560        similarity_threshold: f32,
561    ) -> FuzzyMatches<'a> {
562        let mut matches = self.search(haystack, similarity_threshold);
563        matches.non_overlapping();
564        matches
565    }
566
567    /// Variation of `search_non_overlapping` that additionally enforces uniqueness
568    /// of patterns: each pattern (identified by `custom_unique_id` if present or by
569    /// its index) may only contribute one accepted match. Delegates to
570    /// `non_overlapping_unique()` after obtaining the base sorted matches.
571    ///
572    /// # Parameters
573    /// - `haystack`: the input text to search in.
574    /// - `similarity_threshold`: minimum similarity threshold for candidates.
575    ///
576    /// # Returns
577    /// `FuzzyMatches` containing a non-overlapping, pattern-unique subset of matches.
578    #[must_use]
579    pub fn search_non_overlapping_unique<'a>(
580        &'a self,
581        haystack: &'a str,
582        similarity_threshold: f32,
583    ) -> FuzzyMatches<'a> {
584        let mut matches = self.search(haystack, similarity_threshold);
585        matches.non_overlapping_unique();
586        matches
587    }
588
589    /// Perform replacements on `text` by finding non-overlapping fuzzy matches above
590    /// `threshold` and invoking `callback` for each. Matches are resolved via
591    /// `search_non_overlapping`, so they won’t overlap; the first chosen set is
592    /// used in left-to-right order.
593    ///
594    /// The `callback` is called with each `FuzzyMatch`. If it returns `Some(repl)`,
595    /// the matched span is replaced with `repl`. If it returns `None`, the original
596    /// substring from `text` is preserved.
597    ///
598    /// # Parameters
599    /// - `text`: input string to perform replacements on.
600    /// - `callback`: mapping from a `FuzzyMatch` to an optional replacement string.
601    /// - `threshold`: minimum similarity for a match to be considered.
602    ///
603    /// # Returns
604    /// A new `String` with the selected fuzzy matches replaced per `callback`.
605    ///
606    /// # Example
607    /// ```rust
608    /// use fuzzy_aho_corasick::FuzzyAhoCorasickBuilder;
609    /// let automaton = FuzzyAhoCorasickBuilder::new().build(["FOO", "BAR", "BAZ"]);
610    /// let result = automaton.replace("FOO BAR BAZ", |m| {
611    ///     (m.pattern.pattern == "BAR").then_some("###")
612    /// }, 0.8);
613    /// assert_eq!(result, "FOO ### BAZ");
614    /// ```
615    #[must_use]
616    pub fn replace<'a, F, S: Into<Cow<'a, str>>>(
617        &'a self,
618        text: &'a str,
619        callback: F,
620        threshold: f32,
621    ) -> String
622    where
623        F: Fn(&FuzzyMatch<'a>) -> Option<S>,
624        S: Into<Cow<'a, str>>,
625    {
626        self.search_non_overlapping(text, threshold)
627            .replace(callback)
628    }
629
630    /// Strip any leading fuzzy‐matched prefix from `haystack` using the given
631    /// similarity `threshold`, and return the remainder of the string.
632    ///
633    /// # Behavior
634    ///
635    /// - All initial [`Segment::Matched`] variants are skipped.
636    /// - Any unmatched segments consisting solely of whitespace are also skipped.
637    /// - The first non‐whitespace [`Segment::Unmatched`]:
638    ///   - Has its leading whitespace trimmed before appending.
639    ///   - Disables skipping so that all subsequent segments are included.
640    /// - After that point, both `Matched` and `Unmatched` segments are appended
641    ///   in full (without further trimming).
642    ///
643    /// # Parameters
644    ///
645    /// - `haystack`: The text to strip a fuzzy‐matched prefix from.
646    /// - `threshold`: A float from `0.0` to `1.0` indicating the minimum
647    ///   similarity score required for a match.
648    ///
649    /// # Returns
650    ///
651    /// A `String` containing the remainder of `haystack` after removing the
652    /// leading fuzzy‐matched portion and any leading whitespace.
653    ///
654    /// # Examples
655    ///
656    /// ```
657    /// use fuzzy_aho_corasick::{FuzzyAhoCorasickBuilder, FuzzyLimits};
658    /// let f = FuzzyAhoCorasickBuilder::new()
659    ///     .fuzzy(FuzzyLimits::new().edits(1))
660    ///     .case_insensitive(true)
661    ///     .build(["LOREM", "IPSUM"]);
662    ///
663    /// // "LROEM" fuzzy‐matches "LOREM", "PISUM" matches "IPSUM",
664    /// // so both are stripped, and leading space before "ZZZ" is trimmed:
665    /// let result = f.strip_prefix("LrEM ISuM Lorm ZZZ", 0.8);
666    /// assert_eq!(result, "ZZZ");
667    /// ```
668    #[must_use]
669    pub fn strip_prefix<'a>(&'a self, haystack: &'a str, threshold: f32) -> String {
670        self.search_non_overlapping(haystack, threshold)
671            .strip_prefix()
672    }
673
674    /// Perform a non‐overlapping fuzzy search over `haystack` with the given
675    /// similarity `threshold`, then strip any trailing fuzzy‐matched suffix
676    /// from the end of the string and return the leading portion.
677    ///
678    /// # Behavior
679    ///
680    /// - Conducts a non‐overlapping fuzzy search (via [`search_non_overlapping`]).
681    /// - Skips all trailing [`Segment::Matched`] variants.
682    /// - Skips any trailing [`Segment::Unmatched`] variants consisting solely of whitespace.
683    /// - The last non‐whitespace [`Segment::Unmatched`]:
684    ///   - Has its trailing whitespace trimmed before inclusion.
685    ///   - Marks the cutoff point—everything after it is dropped.
686    ///
687    /// # Parameters
688    ///
689    /// - `haystack`: The text to strip a fuzzy‐matched suffix from.
690    /// - `threshold`: A float in `0.0..=1.0` indicating the minimum similarity
691    ///   score required for a match to count as part of the suffix.
692    ///
693    /// # Returns
694    ///
695    /// A `String` containing the beginning of `haystack` with any trailing
696    /// fuzzy‐matched portion (and trailing whitespace) removed.
697    ///
698    /// # Examples
699    ///
700    /// ```rust
701    /// use fuzzy_aho_corasick::{FuzzyAhoCorasickBuilder, FuzzyLimits};
702    ///
703    /// let f = FuzzyAhoCorasickBuilder::new()
704    ///     .fuzzy(FuzzyLimits::new().edits(1))
705    ///     .case_insensitive(true)
706    ///     .build(["LOREM", "IPSUM"]);
707    ///
708    /// // The suffix " LrEM ISuM" fuzzily matches " LOREM IPSUM" at ≥0.8,
709    /// // so it's stripped from the end, leaving only "ZZZ".
710    /// let result = f.strip_postfix("ZZZ LrEM ISuM", 0.8);
711    /// assert_eq!(result, "ZZZ");
712    /// ```
713    #[must_use]
714    pub fn strip_postfix<'a>(&'a self, haystack: &'a str, threshold: f32) -> String {
715        self.search_non_overlapping(haystack, threshold)
716            .strip_postfix()
717    }
718
719    /// Split `haystack` into unmatched substrings by treating each fuzzy match
720    /// (above the given `threshold`) as a separator.
721    ///
722    /// # Behavior
723    ///
724    /// - Performs a non-overlapping fuzzy search over `haystack` using
725    ///   [`search_non_overlapping`].
726    /// - Delegates to the `split()` method on the resulting `FuzzyMatches`.
727    /// - Produces one `String` per unmatched segment in order, including empty
728    ///   segments if matches occur at the very start or end.
729    ///
730    /// # Parameters
731    ///
732    /// - `haystack`: The input text to split on fuzzy matches.
733    /// - `threshold`: A similarity cutoff (`0.0..=1.0`); only matches with
734    ///   a score ≥ `threshold` are treated as separators.
735    ///
736    /// # Returns
737    ///
738    /// A `Vec<String>` containing the parts of `haystack` between each fuzzy match.
739    ///
740    /// # Examples
741    ///
742    /// ```rust
743    /// use fuzzy_aho_corasick::{FuzzyAhoCorasickBuilder, FuzzyLimits};
744    ///
745    /// let engine = FuzzyAhoCorasickBuilder::new()
746    ///     .fuzzy(FuzzyLimits::new().edits(1))
747    ///     .case_insensitive(true)
748    ///     .build(["FOO", "BAR"]);
749    ///
750    /// let parts: Vec<String> = engine.split("xxFo0yyBAARzz", 0.8).collect();
751    /// assert_eq!(parts, vec!["xx", "yy", "zz"]);
752    /// ```
753    #[must_use]
754    pub fn split<'a>(
755        &'a self,
756        haystack: &'a str,
757        threshold: f32,
758    ) -> impl Iterator<Item = String> + 'a {
759        self.search_non_overlapping(haystack, threshold).split()
760    }
761
762    /// Returns an **iterator** that yields interleaving [`Segment::Matched`]
763    /// [`Segment::Unmatched`] items for the given text.
764    pub fn segment_iter<'a>(
765        &'a self,
766        haystack: &'a str,
767        threshold: f32,
768    ) -> impl Iterator<Item = Segment<'a>> {
769        self.search_non_overlapping(haystack, threshold)
770            .segment_iter()
771    }
772    /// Convenience wrapper around [`segment_iter`](Self::segment_iter).
773    #[must_use]
774    pub fn segment_text(&self, haystack: &str, threshold: f32) -> String {
775        self.search_non_overlapping(haystack, threshold)
776            .segment_text()
777    }
778}