Skip to main content

fuzzy_regex/api/
builder.rs

1//! Builder for `FuzzyRegex`.
2
3use crate::types::{FuzzyLimits, FuzzyPenalties};
4
5use super::regex::FuzzyRegex;
6use crate::error::Result;
7
8/// Builder for constructing a `FuzzyRegex` with custom configuration.
9#[derive(Debug, Clone)]
10pub struct FuzzyRegexBuilder {
11    pattern: String,
12    config: RegexConfig,
13}
14
15/// Flags controlling match behavior.
16#[derive(Debug, Clone, Copy, Default)]
17#[allow(clippy::struct_excessive_bools)]
18pub struct MatchFlags {
19    /// `BESTMATCH` flag - find best match instead of first.
20    pub best_match: bool,
21    /// `ENHANCEMATCH` flag - improve match quality.
22    pub enhance_match: bool,
23    /// `POSIX` flag - find longest match at leftmost position.
24    pub posix: bool,
25    /// `(?x)` - Verbose mode (ignore whitespace, allow comments).
26    pub verbose: bool,
27    /// `(?s)` - Dot-all mode (`.` matches newlines).
28    pub dot_all: bool,
29    /// `(?m)` - Multi-line mode (`^`/`$` match at line boundaries).
30    pub multi_line: bool,
31    /// `(?U)` - Ungreedy mode (invert greediness of quantifiers).
32    /// When set, `*`, `+`, `?` become non-greedy by default, and `*?`, `+?`, `??` become greedy.
33    pub ungreedy: bool,
34    /// `(?g)` - Global mode (find all matches).
35    /// When false (default), stops at first valid match (faster).
36    /// When true, searches for all matches.
37    pub global: bool,
38    /// `(?u)` - Unicode mode (enable Unicode character classes).
39    /// When set, \w, \d, \s match Unicode characters instead of ASCII only.
40    pub unicode: bool,
41}
42
43/// Configuration for regex matching.
44#[derive(Debug, Clone)]
45#[allow(clippy::struct_excessive_bools)]
46pub struct RegexConfig {
47    /// Case-insensitive matching.
48    pub case_insensitive: bool,
49    /// Verbose mode - ignore whitespace and allow `#` comments in pattern.
50    pub verbose: bool,
51    /// Dot-all mode - `.` matches newlines.
52    pub dot_all: bool,
53    /// Multi-line mode - `^` and `$` match at line boundaries.
54    pub multi_line: bool,
55    /// Ungreedy mode - invert default greediness of quantifiers.
56    pub ungreedy: bool,
57    /// Default similarity threshold.
58    pub similarity_threshold: f32,
59    /// Default number of edits allowed.
60    pub default_edits: u8,
61    /// Default fuzzy limits.
62    pub default_limits: Option<FuzzyLimits>,
63    /// Edit penalties.
64    pub penalties: Option<FuzzyPenalties>,
65    /// Maximum threads for NFA simulation (beam width).
66    pub max_threads: usize,
67    /// Match behavior flags.
68    pub match_flags: MatchFlags,
69    /// Partial matching - allow matches that reach end of text.
70    pub partial: bool,
71    /// Default timeout for matching operations.
72    pub timeout: Option<std::time::Duration>,
73    /// Greedy first-match mode - return first match found (faster).
74    /// Similar to mrab-regex behavior - searches position by position,
75    /// returning on first match instead of searching for best match.
76    pub greedy_first: bool,
77}
78
79impl Default for RegexConfig {
80    fn default() -> Self {
81        RegexConfig {
82            case_insensitive: false,
83            verbose: false,
84            dot_all: false,
85            multi_line: false,
86            ungreedy: false,
87            // Default to 0.0 so that edit-limited patterns like {e<=N} accept all valid matches.
88            // Users can set a higher threshold with .similarity() to filter by quality.
89            // This matches mrab-regex behavior where only edit limits matter, not similarity.
90            similarity_threshold: 0.0,
91            default_edits: 0,
92            default_limits: None,
93            penalties: None,
94            max_threads: 1000,
95            match_flags: MatchFlags::default(),
96            partial: false,
97            timeout: None,
98            greedy_first: false,
99        }
100    }
101}
102
103impl FuzzyRegexBuilder {
104    /// Create a new builder with the given pattern.
105    #[must_use]
106    pub fn new(pattern: &str) -> Self {
107        FuzzyRegexBuilder {
108            pattern: pattern.to_string(),
109            config: RegexConfig::default(),
110        }
111    }
112
113    /// Set case-insensitive matching.
114    #[must_use]
115    pub fn case_insensitive(mut self, yes: bool) -> Self {
116        self.config.case_insensitive = yes;
117        self
118    }
119
120    /// Enable verbose mode (ignore whitespace and allow `#` comments).
121    ///
122    /// In verbose mode:
123    /// - Whitespace is ignored (use `\s` or `[ ]` for literal space)
124    /// - `#` starts a comment that extends to end of line
125    ///
126    /// This allows formatting patterns for readability:
127    /// ```text
128    /// (?x)
129    /// [A-Z][a-z]+     # First name
130    /// \s+             # Whitespace
131    /// [A-Z][a-z]+     # Last name
132    /// ```
133    #[must_use]
134    pub fn verbose(mut self, yes: bool) -> Self {
135        self.config.verbose = yes;
136        self
137    }
138
139    /// Enable dot-all mode (`.` matches newlines).
140    ///
141    /// By default, `.` matches any character except newlines.
142    /// When enabled, `.` matches any character including `\n`.
143    #[must_use]
144    pub fn dot_all(mut self, yes: bool) -> Self {
145        self.config.dot_all = yes;
146        self
147    }
148
149    /// Enable multi-line mode (`^` and `$` match at line boundaries).
150    ///
151    /// By default, `^` matches only at the start of text and `$` only at the end.
152    /// When enabled:
153    /// - `^` also matches after each newline
154    /// - `$` also matches before each newline
155    #[must_use]
156    pub fn multi_line(mut self, yes: bool) -> Self {
157        self.config.multi_line = yes;
158        self
159    }
160
161    /// Enable ungreedy mode (invert default greediness of quantifiers).
162    ///
163    /// When enabled:
164    /// - `*`, `+`, `?` become non-greedy (match as little as possible)
165    /// - `*?`, `+?`, `??` become greedy (match as much as possible)
166    ///
167    /// This is equivalent to the `(?U)` inline flag.
168    #[must_use]
169    pub fn ungreedy(mut self, yes: bool) -> Self {
170        self.config.ungreedy = yes;
171        self
172    }
173
174    /// Enable global mode (find all matches).
175    ///
176    /// When enabled, use `find_iter()` to get all matches.
177    /// When disabled (default), stops at first valid match (faster).
178    /// This is equivalent to the `(?g)` inline flag.
179    #[must_use]
180    pub fn global(mut self, yes: bool) -> Self {
181        self.config.match_flags.global = yes;
182        self
183    }
184
185    /// Enable greedy first-match mode (similar to mrab-regex behavior).
186    ///
187    /// When enabled, searches position by position and returns the first match found.
188    /// This is faster than the default best-match behavior when matches exist early
189    /// in the text, but may not find the optimal match.
190    ///
191    /// Default behavior (`greedy_first=false`): searches all positions to find best match.
192    /// Greedy first (`greedy_first=true`): stops at first match found.
193    #[must_use]
194    pub fn greedy_first(mut self, yes: bool) -> Self {
195        self.config.greedy_first = yes;
196        self
197    }
198
199    /// Enable Unicode mode for character classes.
200    ///
201    /// When enabled:
202    /// - `\w` matches Unicode word characters (not just ASCII `[a-zA-Z0-9_]`)
203    /// - `\d` matches Unicode digits
204    /// - `\s` matches Unicode whitespace
205    ///
206    /// This is equivalent to the `(?u)` inline flag.
207    #[must_use]
208    pub fn unicode(mut self, yes: bool) -> Self {
209        self.config.match_flags.unicode = yes;
210        self
211    }
212
213    /// Set the similarity threshold (0.0 - 1.0).
214    ///
215    /// Matches with similarity below this threshold are rejected.
216    #[must_use]
217    pub fn similarity(mut self, threshold: f32) -> Self {
218        self.config.similarity_threshold = threshold.clamp(0.0, 1.0);
219        self
220    }
221
222    /// Set the default number of edits allowed for fuzzy matching.
223    ///
224    /// This applies to literals without explicit fuzziness markers.
225    #[must_use]
226    pub fn edits(mut self, count: u8) -> Self {
227        self.config.default_edits = count;
228        self
229    }
230
231    /// Set detailed fuzzy limits for default matching.
232    #[must_use]
233    pub fn fuzzy(mut self, limits: FuzzyLimits) -> Self {
234        self.config.default_limits = Some(limits);
235        self
236    }
237
238    /// Set edit operation penalties.
239    #[must_use]
240    pub fn penalties(mut self, penalties: FuzzyPenalties) -> Self {
241        self.config.penalties = Some(penalties);
242        self
243    }
244
245    /// Set the maximum number of threads for NFA simulation.
246    ///
247    /// Higher values allow more complex patterns but use more memory.
248    #[must_use]
249    pub fn max_threads(mut self, max: usize) -> Self {
250        self.config.max_threads = max;
251        self
252    }
253
254    /// Enable partial matching.
255    ///
256    /// When enabled, matches that reach the end of the text are considered
257    /// successful even if they haven't completed. The `Match::partial()` method
258    /// can be used to check if a match is partial.
259    ///
260    /// This is useful for streaming input or when the text may be truncated.
261    #[must_use]
262    pub fn partial(mut self, yes: bool) -> Self {
263        self.config.partial = yes;
264        self
265    }
266
267    /// Set a timeout for matching operations.
268    ///
269    /// If a match operation takes longer than the timeout, it will be cancelled.
270    /// The default is no timeout.
271    ///
272    /// Note: Timeout is checked at certain checkpoints during matching, so it's
273    /// not precise. The actual time may exceed the timeout slightly.
274    #[must_use]
275    pub fn timeout(mut self, duration: std::time::Duration) -> Self {
276        self.config.timeout = Some(duration);
277        self
278    }
279
280    /// Build the `FuzzyRegex`.
281    ///
282    /// # Errors
283    ///
284    /// Returns an error if the pattern is invalid or cannot be compiled.
285    pub fn build(self) -> Result<FuzzyRegex> {
286        FuzzyRegex::compile(self.pattern, self.config)
287    }
288}