Skip to main content

FuzzyBridge

Struct FuzzyBridge 

Source
pub struct FuzzyBridge { /* private fields */ }
Expand description

Bridge to Levenshtein automata for efficient fuzzy literal matching.

Implementations§

Source§

impl FuzzyBridge

Source

pub fn new( literals: &[LiteralPattern], _default_limits: Option<FuzzyLimits>, _penalties: Option<FuzzyPenalties>, case_insensitive: bool, ) -> Option<Self>

Create a new fuzzy bridge from literal patterns.

Source

pub fn pattern_count(&self) -> usize

Get the number of patterns.

Source

pub fn is_case_insensitive(&self) -> bool

Returns whether case-insensitive matching is enabled.

Source

pub fn add_word_list(&mut self, words: &[String], limits: Option<&FuzzyLimits>)

Add word list patterns (from \L) for matching. The words will be matched against text with the given fuzzy limits.

Source

pub fn total_pattern_count(&self) -> usize

Get total pattern count including word lists.

Source

pub fn limits(&self) -> &[Option<FuzzyLimits>]

Get the limits for patterns.

Source

pub fn get_bitap_matcher(&self) -> Option<&BitapMatcher>

Get a Bitap matcher for a pattern index (first pattern).

Source

pub fn pattern_char_len(&self, index: usize) -> Option<usize>

Get the character length of a pattern.

Source

pub fn is_all_exact(&self) -> bool

Check if all patterns have 0 max edits (exact matching only). Returns true if all patterns either have no limits (exact match) or have explicit limits of 0 edits.

Source

pub fn pattern_max_edits(&self, index: usize) -> Option<u8>

Get the maximum edit distance for a pattern.

Source

pub fn max_pattern_len(&self) -> usize

Get the maximum pattern length across all patterns.

Source

pub fn max_edits(&self) -> Option<u8>

Get the maximum edit distance across all patterns.

Source

pub fn all_patterns_bitap_compatible(&self) -> bool

Check if all patterns are compatible with Bitap streaming (<=64 chars).

Source

pub fn search_all(&self, text: &str, threshold: f32) -> CachedMatches

Search the entire text once and cache all matches. Uses Bitap when available (faster), falls back to Levenshtein NFA.

Source

pub fn search_cached_at_position( &self, text: &str, pos: usize, threshold: f32, ) -> CachedMatches

Search for fuzzy matches at a specific position and return as CachedMatches.

This is optimized for anchored patterns where we only need to match at one position (e.g., position 0 for ^ anchored patterns).

Source

pub fn search_non_overlapping( &self, text: &str, threshold: f32, pattern_idx: usize, require_first_char: bool, ) -> Vec<DamLevMatch>

Fast non-overlapping search optimized for iteration (greedy leftmost).

Returns matches directly as a Vec, avoiding the HashMap overhead. Uses optimized Bitap path when available.

When require_first_char is true, matches must start with the same first character as the pattern. This filters out spurious matches like “bore” when searching for “Lorem”.

Source

pub fn search_non_overlapping_n( &self, text: &str, threshold: f32, pattern_idx: usize, require_first_char: bool, n: usize, ) -> Vec<DamLevMatch>

Find up to n non-overlapping matches for a single pattern.

This is more efficient than search_non_overlapping when only a limited number of matches is needed, as it stops searching after finding n matches.

Source

pub fn search_first( &self, text: &str, threshold: f32, pattern_idx: usize, ) -> Option<DamLevMatch>

Find the first match in text (fast path for single-match queries).

This is optimized for find() calls where only the first match is needed. Uses early-exit to avoid scanning the entire text after finding a match. Returns None if no match is found.

Source

pub fn search_best_non_overlapping( &self, text: &str, threshold: f32, pattern_idx: usize, require_first_char: bool, ) -> Vec<DamLevMatch>

Find best non-overlapping matches, preferring highest similarity.

This method finds all candidates then selects the best non-overlapping set, ensuring higher similarity matches are preferred over lower ones.

When require_first_char is true (default), matches must start with the same first character as the pattern. This filters out spurious matches like “bore” when searching for “Lorem”.

Source

pub fn search_all_with_prefilter( &self, text: &str, threshold: f32, prefilter: &Prefilter, ) -> CachedMatches

Search using prefilter candidates for faster matching.

Uses prefilter to identify candidate positions, then searches with NFA.

Source

pub fn find_first_guard_nfa( &self, text: &str, threshold: f32, ) -> Option<(usize, FuzzyMatchResult)>

Find the first match using Guard NFA (fast path for single pattern).

Returns immediately on first match - optimal for find_first operations.

Source

pub fn find_first_with_prefilter( &self, text: &str, threshold: f32, prefilter: &Prefilter, ) -> Option<(usize, FuzzyMatchResult)>

Find the first match for a single-pattern search using prefilter.

This is optimized for first-match mode: returns as soon as a match is found without scanning the entire text. Only works for single-pattern searches.

Source

pub fn search_at_position( &self, text: &str, start_pos: usize, threshold: f32, ) -> Option<(usize, FuzzyMatchResult)>

Search for matches starting from a single position.

Returns the best match starting at the given position, or None if no match. This is used for greedy first-match mode.

Source

pub fn search_at_position_fast( &self, text: &[u8], start_pos: usize, threshold: f32, ) -> Option<(usize, FuzzyMatchResult)>

Ultra-fast search at a single position using optimized Bitap.

This method avoids all allocations for the common case and is designed for the greedy-first hot path.

Source

pub fn find_first_boyer_moore( &self, text: &[u8], threshold: f32, _max_offset: usize, ) -> Option<(usize, FuzzyMatchResult)>

Find first match using streaming Bitap (single-pass, O(n*k)). Falls back to Boyer-Moore for very short texts where setup overhead matters.

Source

pub fn find_first_lazy( &self, text: &[u8], threshold: f32, prefilter: &Prefilter, ) -> Option<(usize, FuzzyMatchResult)>

Find first match using lazy streaming search.

Processes prefilter candidates one at a time, returning immediately when a match is found. This is optimal when matches occur early in the text.

Source

pub fn find_first_batch_parallel( &self, text: &[u8], threshold: f32, prefilter: &Prefilter, ) -> Option<(usize, FuzzyMatchResult)>

Find first match using batch parallel position search.

Collects candidate positions from prefilter and processes them in batches using SIMD multi-position search for improved throughput.

This is most effective for:

  • k=0 (exact match) where SIMD can process 2-4 positions per iteration
  • ASCII patterns
  • Many candidate positions
Source

pub fn find_first_multi_pattern( &self, text: &[u8], threshold: f32, pattern_indices: &[usize], prefilter: &Prefilter, ) -> Option<(usize, usize, FuzzyMatchResult)>

Find first match across multiple patterns using parallel Bitap search.

This is optimized for simple alternations where we can skip NFA simulation and just run Bitap for each pattern at candidate positions.

Returns (pattern_index, start, FuzzyMatchResult) for the first match found.

Source

pub fn find_first_multi_pattern_individual( &self, text: &[u8], threshold: f32, pattern_indices: &[usize], ) -> Option<(usize, usize, FuzzyMatchResult)>

Find the first match across multiple patterns by running each pattern’s streaming search individually. This is used when the multi-pattern prefilter would be ineffective (too many common bytes).

Returns (pattern_index, start, FuzzyMatchResult) for the earliest match found.

Source

pub fn calculate_min_effective_threshold(&self, user_threshold: f32) -> f32

Calculate the minimum effective threshold across all patterns.

This returns the lowest threshold that could match any pattern, useful for early-exit optimizations.

Source

pub fn search_all_multi_pattern( &self, text: &str, threshold: f32, pattern_indices: &[usize], ) -> CachedMatches

Search for all matches across multiple patterns efficiently.

This is optimized for the multi-pattern case by processing all patterns in parallel, avoiding redundant text scans.

Returns a map of (pattern_index, start) -> Vec<FuzzyMatchResult>.

Source

pub fn find_at_cached( &self, cached: &CachedMatches, pattern_index: usize, from: usize, ) -> Option<FuzzyMatchResult>

Find a fuzzy match using cached results.

Source

pub fn find_at( &self, text: &str, pattern_index: usize, from: usize, threshold: f32, ) -> Option<FuzzyMatchResult>

Find a fuzzy match for a specific pattern at a position.

Source

pub fn find_with_boundary_insertions( &self, text: &str, pattern_index: usize, from: usize, to: Option<usize>, threshold: f32, cached: Option<&CachedMatches>, ) -> Option<FuzzyMatchResult>

Find a fuzzy match that allows boundary insertions (for anchored patterns). Uses cached results to avoid O(N) per-call overhead.

Source

pub fn pattern_text(&self, index: usize) -> Option<&str>

Get the pattern text for a given index.

Trait Implementations§

Source§

impl Debug for FuzzyBridge

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.