Struct FuzzyRegex

Source

pub struct FuzzyRegex { /* private fields */ }

Expand description

A compiled fuzzy regular expression.

§Example

use fuzzy_regex::FuzzyRegex;

let re = FuzzyRegex::new(r"hello~2").unwrap();
assert!(re.is_match("helo"));  // Matches with 1 edit
assert!(re.is_match("hello")); // Exact match

Implementations§

Source §

impl FuzzyRegex

Source

pub fn new(pattern: &str) -> Result<Self>

Create a new FuzzyRegex with default settings.

For customized settings, use FuzzyRegexBuilder.

§Errors

Returns an error if the pattern is invalid or cannot be compiled.

Source

pub fn builder(pattern: &str) -> FuzzyRegexBuilder

Create a builder for customized regex construction.

Source

pub fn as_str(&self) -> &str

Get the original pattern string.

Source

pub fn captures_len(&self) -> usize

Get the number of capture groups.

Source

pub fn similarity_threshold(&self) -> f32

Get the configured similarity threshold.

Source

pub fn literals(&self) -> &[LiteralPattern]

Get the literal patterns extracted from this regex.

This is useful for debugging and introspection.

Source

pub fn is_simple_fuzzy(&self) -> bool

Check if this pattern is detected as “simple” (single fuzzy literal). Simple patterns can skip NFA simulation for faster matching.

Source

pub fn set_word_list( &mut self, name: impl Into<SmartCow<'static>>, words: Vec<impl Into<SmartCow<'static>>>, )

Set a named word list for \L patterns.

§Example

let mut re = fuzzy_regex::FuzzyRegex::new(r"\b\L<words>{e<=1}\b").unwrap();
re.set_word_list("words", vec!["cat", "dog", "frog"]);

assert!(re.is_match("cot"));  // 1 substitution from "cat"
assert!(re.is_match("dag"));  // 1 substitution from "dog")

Source

pub fn get_word_list(&self, name: &str) -> Option<&[SmartCow<'static>]>

Get a named word list.

Source

pub fn named_lists(&self) -> &HashMap<SmartCow<'static>, Vec<SmartCow<'static>>>

Get all named word lists.

Returns a reference to the internal word lists map. This matches the API of mrab-regex’s named_lists property.

Source

pub fn has_word_lists(&self) -> bool

Check if this regex has any word lists defined.

Source

pub fn is_match(&self, text: &str) -> bool

Check if the pattern matches anywhere in the text.

Source

pub fn is_match_at(&self, text: &str, start: usize) -> bool

Check if the pattern matches at the start of the text.

Source

pub fn is_full_match(&self, text: &str) -> bool

Check if the pattern matches the entire text.

This is equivalent to anchoring the pattern at both start and end.

Source

pub fn fullmatch<'t>(&self, text: &'t str) -> Option<Match<'t>>

Find a match that spans the entire text.

Returns Some if the pattern matches the full string from start to end. This is equivalent to using ^pattern$ in a regular expression.

Source

pub fn fullmatch_at<'t>(&self, text: &'t str, start: usize) -> Option<Match<'t>>

Find a match that spans from the given start position to the end.

The match must start at start and end at text.len().

Source

pub fn find<'t>(&self, text: &'t str) -> Option<Match<'t>>

Find the first match in the text. In BESTMATCH mode, returns the match with fewest errors. In ENHANCEMATCH mode, improves the fit of the found match.

Source

pub fn find_with_timeout<'t>( &self, text: &'t str, timeout: Duration, ) -> Result<Option<Match<'t>>>

Find the first match with a timeout.

Note: Timeout is checked at certain checkpoints during matching, so it’s not precise. The actual time may exceed the timeout slightly.

Source

pub fn find_with_config_timeout<'t>( &self, text: &'t str, ) -> Result<Option<Match<'t>>>

Find first match using config timeout (if set). This uses the timeout configured via FuzzyRegexBuilder::timeout().

Source

pub fn find_at<'t>(&self, text: &'t str, start: usize) -> Option<Match<'t>>

Find a match starting at exactly the given position.

This only matches if a match starts at exactly start. Use find_from to search from start onwards.

The full text is passed to the matcher for proper boundary handling (e.g., \b word boundaries need context from preceding characters).

Source

pub fn find_from<'t>(&self, text: &'t str, start: usize) -> Option<Match<'t>>

Find the first match at or after the given position.

Unlike find_at which only matches at exactly start, this searches forward from start until a match is found or the text is exhausted.

Source

pub fn find_rev<'t>(&self, text: &'t str) -> Option<Match<'t>>

Find the last match in the text (reverse search).

This searches from the end of the text backwards, returning the rightmost match. Similar to Python’s re.search() with a reversed pattern.

Source

pub fn find_iter_rev<'t>(&self, text: &'t str) -> Vec<Match<'t>>

Find all matches from the end (reverse order).

Returns matches in reverse order (rightmost first).

Source

pub fn find_iter<'t>(&self, text: &'t str) -> Matches<'t> ⓘ

Find all non-overlapping matches.

Source

pub fn find_n<'t>(&self, text: &'t str, n: usize) -> Vec<Match<'t>>

Find the first n non-overlapping matches.

This is more efficient than find_iter().take(n).collect() because it stops searching after finding n matches instead of collecting all matches first.

§Example

use fuzzy_regex::FuzzyRegex;

let re = FuzzyRegex::new(r"(?:test){e<=1}").unwrap();
let text = "test tset testing tests";
let first_two = re.find_n(text, 2);
assert_eq!(first_two.len(), 2);

Source

pub fn find_all_overlapping<'t>(&self, text: &'t str) -> Vec<Match<'t>>

Find all matches, including overlapping ones.

Unlike find_iter, this method tries every position in the text and returns all possible matches, even if they overlap.

Source

pub fn find_all_overlapping_filtered<'t>( &self, text: &'t str, similarity_threshold: f32, ) -> Vec<Match<'t>>

Find all matches above a similarity threshold, including overlapping ones.

This is more efficient than find_all_overlapping followed by filtering, as it skips creating Match objects for results below the threshold.

Source

pub fn captures_all_overlapping<'t>( &self, text: &'t str, similarity_threshold: f32, ) -> Vec<Captures<'t>>

Get all overlapping matches with capture group information.

This is useful for identifying which alternative in an alternation matched.

Source

pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>>

Get captures for the first match.

Source

pub fn captures_at<'t>( &self, text: &'t str, start: usize, ) -> Option<Captures<'t>>

Get captures starting at a specific position.

Source

pub fn captures_iter<'r, 't>(&'r self, text: &'t str) -> CaptureMatches<'r, 't> ⓘ

Iterate over all capture groups.

Source

pub fn replace(&self, text: &str, replacement: &str) -> String

Replace the first match.

§Panics

This function should not panic. The internal unwrap() is safe because a match result always contains the full match at index 0.

Source

pub fn replace_all(&self, text: &str, replacement: &str) -> String

Replace all non-overlapping matches.

Source

pub fn replace_all_with<F>(&self, text: &str, replacer: F) -> String
where F: FnMut(&Captures<'_>) -> String,

Replace matches using a closure.

Source

pub fn split<'r, 't>(&'r self, text: &'t str) -> Split<'r, 't> ⓘ

Split the text by matches.

Source

pub fn splitn<'t>(&self, text: &'t str, n: usize) -> Vec<&'t str>

Split the text into at most n parts.

This is more efficient than split().take(n).collect() because it stops searching after finding enough splits.

The last element will contain the remainder of the string if there are more than n-1 matches.

§Example

use fuzzy_regex::FuzzyRegex;

let re = FuzzyRegex::new(r",").unwrap();
let parts = re.splitn("a,b,c,d,e", 3);
assert_eq!(parts, vec!["a", "b", "c,d,e"]);

Source

pub fn stream(&self) -> StreamingMatcher<'_>

Create a streaming matcher for incremental processing.

This allows processing large files or network streams without loading everything into memory. Matches can span chunk boundaries.

§Example

use fuzzy_regex::FuzzyRegex;

let re = FuzzyRegex::new("(?:hello){e<=1}").unwrap();
let mut stream = re.stream();

// Process data in chunks
for m in stream.feed(b"hel") {
    println!("Match at {}", m.start());
}
for m in stream.feed(b"lo world") {
    println!("Match at {}", m.start());
}