pub struct FuzzyBridge { /* private fields */ }Expand description
Bridge to Levenshtein automata for efficient fuzzy literal matching.
Implementations§
Source§impl FuzzyBridge
impl FuzzyBridge
Sourcepub fn new(
literals: &[LiteralPattern],
_default_limits: Option<FuzzyLimits>,
_penalties: Option<FuzzyPenalties>,
case_insensitive: bool,
) -> Option<Self>
pub fn new( literals: &[LiteralPattern], _default_limits: Option<FuzzyLimits>, _penalties: Option<FuzzyPenalties>, case_insensitive: bool, ) -> Option<Self>
Create a new fuzzy bridge from literal patterns.
Sourcepub fn pattern_count(&self) -> usize
pub fn pattern_count(&self) -> usize
Get the number of patterns.
Sourcepub fn is_case_insensitive(&self) -> bool
pub fn is_case_insensitive(&self) -> bool
Returns whether case-insensitive matching is enabled.
Sourcepub fn add_word_list(&mut self, words: &[String], limits: Option<&FuzzyLimits>)
pub fn add_word_list(&mut self, words: &[String], limits: Option<&FuzzyLimits>)
Add word list patterns (from \L
Sourcepub fn total_pattern_count(&self) -> usize
pub fn total_pattern_count(&self) -> usize
Get total pattern count including word lists.
Sourcepub fn limits(&self) -> &[Option<FuzzyLimits>]
pub fn limits(&self) -> &[Option<FuzzyLimits>]
Get the limits for patterns.
Sourcepub fn get_bitap_matcher(&self) -> Option<&BitapMatcher>
pub fn get_bitap_matcher(&self) -> Option<&BitapMatcher>
Get a Bitap matcher for a pattern index (first pattern).
Sourcepub fn pattern_char_len(&self, index: usize) -> Option<usize>
pub fn pattern_char_len(&self, index: usize) -> Option<usize>
Get the character length of a pattern.
Sourcepub fn is_all_exact(&self) -> bool
pub fn is_all_exact(&self) -> bool
Check if all patterns have 0 max edits (exact matching only). Returns true if all patterns either have no limits (exact match) or have explicit limits of 0 edits.
Sourcepub fn pattern_max_edits(&self, index: usize) -> Option<u8>
pub fn pattern_max_edits(&self, index: usize) -> Option<u8>
Get the maximum edit distance for a pattern.
Sourcepub fn max_pattern_len(&self) -> usize
pub fn max_pattern_len(&self) -> usize
Get the maximum pattern length across all patterns.
Sourcepub fn all_patterns_bitap_compatible(&self) -> bool
pub fn all_patterns_bitap_compatible(&self) -> bool
Check if all patterns are compatible with Bitap streaming (<=64 chars).
Sourcepub fn search_all(&self, text: &str, threshold: f32) -> CachedMatches
pub fn search_all(&self, text: &str, threshold: f32) -> CachedMatches
Search the entire text once and cache all matches. Uses Bitap when available (faster), falls back to Levenshtein NFA.
Sourcepub fn search_cached_at_position(
&self,
text: &str,
pos: usize,
threshold: f32,
) -> CachedMatches
pub fn search_cached_at_position( &self, text: &str, pos: usize, threshold: f32, ) -> CachedMatches
Search for fuzzy matches at a specific position and return as CachedMatches.
This is optimized for anchored patterns where we only need to match
at one position (e.g., position 0 for ^ anchored patterns).
Sourcepub fn search_non_overlapping(
&self,
text: &str,
threshold: f32,
pattern_idx: usize,
require_first_char: bool,
) -> Vec<DamLevMatch>
pub fn search_non_overlapping( &self, text: &str, threshold: f32, pattern_idx: usize, require_first_char: bool, ) -> Vec<DamLevMatch>
Fast non-overlapping search optimized for iteration (greedy leftmost).
Returns matches directly as a Vec, avoiding the HashMap overhead.
Uses optimized Bitap path when available.
When require_first_char is true, matches must start with the same first
character as the pattern. This filters out spurious matches like “bore”
when searching for “Lorem”.
Sourcepub fn search_non_overlapping_n(
&self,
text: &str,
threshold: f32,
pattern_idx: usize,
require_first_char: bool,
n: usize,
) -> Vec<DamLevMatch>
pub fn search_non_overlapping_n( &self, text: &str, threshold: f32, pattern_idx: usize, require_first_char: bool, n: usize, ) -> Vec<DamLevMatch>
Find up to n non-overlapping matches for a single pattern.
This is more efficient than search_non_overlapping when only a limited
number of matches is needed, as it stops searching after finding n matches.
Sourcepub fn search_first(
&self,
text: &str,
threshold: f32,
pattern_idx: usize,
) -> Option<DamLevMatch>
pub fn search_first( &self, text: &str, threshold: f32, pattern_idx: usize, ) -> Option<DamLevMatch>
Find the first match in text (fast path for single-match queries).
This is optimized for find() calls where only the first match is needed.
Uses early-exit to avoid scanning the entire text after finding a match.
Returns None if no match is found.
Sourcepub fn search_best_non_overlapping(
&self,
text: &str,
threshold: f32,
pattern_idx: usize,
require_first_char: bool,
) -> Vec<DamLevMatch>
pub fn search_best_non_overlapping( &self, text: &str, threshold: f32, pattern_idx: usize, require_first_char: bool, ) -> Vec<DamLevMatch>
Find best non-overlapping matches, preferring highest similarity.
This method finds all candidates then selects the best non-overlapping set, ensuring higher similarity matches are preferred over lower ones.
When require_first_char is true (default), matches must start with the same
first character as the pattern. This filters out spurious matches like “bore”
when searching for “Lorem”.
Sourcepub fn search_all_with_prefilter(
&self,
text: &str,
threshold: f32,
prefilter: &Prefilter,
) -> CachedMatches
pub fn search_all_with_prefilter( &self, text: &str, threshold: f32, prefilter: &Prefilter, ) -> CachedMatches
Search using prefilter candidates for faster matching.
Uses prefilter to identify candidate positions, then searches with NFA.
Sourcepub fn find_first_guard_nfa(
&self,
text: &str,
threshold: f32,
) -> Option<(usize, FuzzyMatchResult)>
pub fn find_first_guard_nfa( &self, text: &str, threshold: f32, ) -> Option<(usize, FuzzyMatchResult)>
Find the first match using Guard NFA (fast path for single pattern).
Returns immediately on first match - optimal for find_first operations.
Sourcepub fn find_first_with_prefilter(
&self,
text: &str,
threshold: f32,
prefilter: &Prefilter,
) -> Option<(usize, FuzzyMatchResult)>
pub fn find_first_with_prefilter( &self, text: &str, threshold: f32, prefilter: &Prefilter, ) -> Option<(usize, FuzzyMatchResult)>
Find the first match for a single-pattern search using prefilter.
This is optimized for first-match mode: returns as soon as a match is found without scanning the entire text. Only works for single-pattern searches.
Sourcepub fn search_at_position(
&self,
text: &str,
start_pos: usize,
threshold: f32,
) -> Option<(usize, FuzzyMatchResult)>
pub fn search_at_position( &self, text: &str, start_pos: usize, threshold: f32, ) -> Option<(usize, FuzzyMatchResult)>
Search for matches starting from a single position.
Returns the best match starting at the given position, or None if no match. This is used for greedy first-match mode.
Sourcepub fn search_at_position_fast(
&self,
text: &[u8],
start_pos: usize,
threshold: f32,
) -> Option<(usize, FuzzyMatchResult)>
pub fn search_at_position_fast( &self, text: &[u8], start_pos: usize, threshold: f32, ) -> Option<(usize, FuzzyMatchResult)>
Ultra-fast search at a single position using optimized Bitap.
This method avoids all allocations for the common case and is designed for the greedy-first hot path.
Sourcepub fn find_first_boyer_moore(
&self,
text: &[u8],
threshold: f32,
_max_offset: usize,
) -> Option<(usize, FuzzyMatchResult)>
pub fn find_first_boyer_moore( &self, text: &[u8], threshold: f32, _max_offset: usize, ) -> Option<(usize, FuzzyMatchResult)>
Find first match using streaming Bitap (single-pass, O(n*k)). Falls back to Boyer-Moore for very short texts where setup overhead matters.
Sourcepub fn find_first_lazy(
&self,
text: &[u8],
threshold: f32,
prefilter: &Prefilter,
) -> Option<(usize, FuzzyMatchResult)>
pub fn find_first_lazy( &self, text: &[u8], threshold: f32, prefilter: &Prefilter, ) -> Option<(usize, FuzzyMatchResult)>
Find first match using lazy streaming search.
Processes prefilter candidates one at a time, returning immediately when a match is found. This is optimal when matches occur early in the text.
Sourcepub fn find_first_batch_parallel(
&self,
text: &[u8],
threshold: f32,
prefilter: &Prefilter,
) -> Option<(usize, FuzzyMatchResult)>
pub fn find_first_batch_parallel( &self, text: &[u8], threshold: f32, prefilter: &Prefilter, ) -> Option<(usize, FuzzyMatchResult)>
Find first match using batch parallel position search.
Collects candidate positions from prefilter and processes them in batches using SIMD multi-position search for improved throughput.
This is most effective for:
- k=0 (exact match) where SIMD can process 2-4 positions per iteration
- ASCII patterns
- Many candidate positions
Sourcepub fn find_first_multi_pattern(
&self,
text: &[u8],
threshold: f32,
pattern_indices: &[usize],
prefilter: &Prefilter,
) -> Option<(usize, usize, FuzzyMatchResult)>
pub fn find_first_multi_pattern( &self, text: &[u8], threshold: f32, pattern_indices: &[usize], prefilter: &Prefilter, ) -> Option<(usize, usize, FuzzyMatchResult)>
Find first match across multiple patterns using parallel Bitap search.
This is optimized for simple alternations where we can skip NFA simulation and just run Bitap for each pattern at candidate positions.
Returns (pattern_index, start, FuzzyMatchResult) for the first match found.
Sourcepub fn find_first_multi_pattern_individual(
&self,
text: &[u8],
threshold: f32,
pattern_indices: &[usize],
) -> Option<(usize, usize, FuzzyMatchResult)>
pub fn find_first_multi_pattern_individual( &self, text: &[u8], threshold: f32, pattern_indices: &[usize], ) -> Option<(usize, usize, FuzzyMatchResult)>
Find the first match across multiple patterns by running each pattern’s streaming search individually. This is used when the multi-pattern prefilter would be ineffective (too many common bytes).
Returns (pattern_index, start, FuzzyMatchResult) for the earliest match found.
Sourcepub fn calculate_min_effective_threshold(&self, user_threshold: f32) -> f32
pub fn calculate_min_effective_threshold(&self, user_threshold: f32) -> f32
Calculate the minimum effective threshold across all patterns.
This returns the lowest threshold that could match any pattern, useful for early-exit optimizations.
Sourcepub fn search_all_multi_pattern(
&self,
text: &str,
threshold: f32,
pattern_indices: &[usize],
) -> CachedMatches
pub fn search_all_multi_pattern( &self, text: &str, threshold: f32, pattern_indices: &[usize], ) -> CachedMatches
Search for all matches across multiple patterns efficiently.
This is optimized for the multi-pattern case by processing all patterns in parallel, avoiding redundant text scans.
Returns a map of (pattern_index, start) -> Vec<FuzzyMatchResult>.
Sourcepub fn find_at_cached(
&self,
cached: &CachedMatches,
pattern_index: usize,
from: usize,
) -> Option<FuzzyMatchResult>
pub fn find_at_cached( &self, cached: &CachedMatches, pattern_index: usize, from: usize, ) -> Option<FuzzyMatchResult>
Find a fuzzy match using cached results.
Sourcepub fn find_at(
&self,
text: &str,
pattern_index: usize,
from: usize,
threshold: f32,
) -> Option<FuzzyMatchResult>
pub fn find_at( &self, text: &str, pattern_index: usize, from: usize, threshold: f32, ) -> Option<FuzzyMatchResult>
Find a fuzzy match for a specific pattern at a position.
Sourcepub fn find_with_boundary_insertions(
&self,
text: &str,
pattern_index: usize,
from: usize,
to: Option<usize>,
threshold: f32,
cached: Option<&CachedMatches>,
) -> Option<FuzzyMatchResult>
pub fn find_with_boundary_insertions( &self, text: &str, pattern_index: usize, from: usize, to: Option<usize>, threshold: f32, cached: Option<&CachedMatches>, ) -> Option<FuzzyMatchResult>
Find a fuzzy match that allows boundary insertions (for anchored patterns). Uses cached results to avoid O(N) per-call overhead.
Sourcepub fn pattern_text(&self, index: usize) -> Option<&str>
pub fn pattern_text(&self, index: usize) -> Option<&str>
Get the pattern text for a given index.