fuzzy_regex/api/builder.rs
1//! Builder for `FuzzyRegex`.
2
3use crate::types::{FuzzyLimits, FuzzyPenalties};
4
5use super::regex::FuzzyRegex;
6use crate::error::Result;
7
8/// Builder for constructing a `FuzzyRegex` with custom configuration.
9#[derive(Debug, Clone)]
10pub struct FuzzyRegexBuilder {
11 pattern: String,
12 config: RegexConfig,
13}
14
15/// Flags controlling match behavior.
16#[derive(Debug, Clone, Copy, Default)]
17#[allow(clippy::struct_excessive_bools)]
18pub struct MatchFlags {
19 /// `BESTMATCH` flag - find best match instead of first.
20 pub best_match: bool,
21 /// `ENHANCEMATCH` flag - improve match quality.
22 pub enhance_match: bool,
23 /// `POSIX` flag - find longest match at leftmost position.
24 pub posix: bool,
25 /// `(?x)` - Verbose mode (ignore whitespace, allow comments).
26 pub verbose: bool,
27 /// `(?s)` - Dot-all mode (`.` matches newlines).
28 pub dot_all: bool,
29 /// `(?m)` - Multi-line mode (`^`/`$` match at line boundaries).
30 pub multi_line: bool,
31 /// `(?U)` - Ungreedy mode (invert greediness of quantifiers).
32 /// When set, `*`, `+`, `?` become non-greedy by default, and `*?`, `+?`, `??` become greedy.
33 pub ungreedy: bool,
34 /// `(?g)` - Global mode (find all matches).
35 /// When false (default), stops at first valid match (faster).
36 /// When true, searches for all matches.
37 pub global: bool,
38 /// `(?u)` - Unicode mode (enable Unicode character classes).
39 /// When set, \w, \d, \s match Unicode characters instead of ASCII only.
40 pub unicode: bool,
41}
42
43/// Configuration for regex matching.
44#[derive(Debug, Clone)]
45#[allow(clippy::struct_excessive_bools)]
46pub struct RegexConfig {
47 /// Case-insensitive matching.
48 pub case_insensitive: bool,
49 /// Verbose mode - ignore whitespace and allow `#` comments in pattern.
50 pub verbose: bool,
51 /// Dot-all mode - `.` matches newlines.
52 pub dot_all: bool,
53 /// Multi-line mode - `^` and `$` match at line boundaries.
54 pub multi_line: bool,
55 /// Ungreedy mode - invert default greediness of quantifiers.
56 pub ungreedy: bool,
57 /// Default similarity threshold.
58 pub similarity_threshold: f32,
59 /// Default number of edits allowed.
60 pub default_edits: u8,
61 /// Default fuzzy limits.
62 pub default_limits: Option<FuzzyLimits>,
63 /// Edit penalties.
64 pub penalties: Option<FuzzyPenalties>,
65 /// Maximum threads for NFA simulation (beam width).
66 pub max_threads: usize,
67 /// Match behavior flags.
68 pub match_flags: MatchFlags,
69 /// Partial matching - allow matches that reach end of text.
70 pub partial: bool,
71 /// Default timeout for matching operations.
72 pub timeout: Option<std::time::Duration>,
73 /// Greedy first-match mode - return first match found (faster).
74 /// Similar to mrab-regex behavior - searches position by position,
75 /// returning on first match instead of searching for best match.
76 pub greedy_first: bool,
77}
78
79impl Default for RegexConfig {
80 fn default() -> Self {
81 RegexConfig {
82 case_insensitive: false,
83 verbose: false,
84 dot_all: false,
85 multi_line: false,
86 ungreedy: false,
87 // Default to 0.0 so that edit-limited patterns like {e<=N} accept all valid matches.
88 // Users can set a higher threshold with .similarity() to filter by quality.
89 // This matches mrab-regex behavior where only edit limits matter, not similarity.
90 similarity_threshold: 0.0,
91 default_edits: 0,
92 default_limits: None,
93 penalties: None,
94 max_threads: 1000,
95 match_flags: MatchFlags::default(),
96 partial: false,
97 timeout: None,
98 greedy_first: false,
99 }
100 }
101}
102
103impl FuzzyRegexBuilder {
104 /// Create a new builder with the given pattern.
105 #[must_use]
106 pub fn new(pattern: &str) -> Self {
107 FuzzyRegexBuilder {
108 pattern: pattern.to_string(),
109 config: RegexConfig::default(),
110 }
111 }
112
113 /// Set case-insensitive matching.
114 #[must_use]
115 pub fn case_insensitive(mut self, yes: bool) -> Self {
116 self.config.case_insensitive = yes;
117 self
118 }
119
120 /// Enable verbose mode (ignore whitespace and allow `#` comments).
121 ///
122 /// In verbose mode:
123 /// - Whitespace is ignored (use `\s` or `[ ]` for literal space)
124 /// - `#` starts a comment that extends to end of line
125 ///
126 /// This allows formatting patterns for readability:
127 /// ```text
128 /// (?x)
129 /// [A-Z][a-z]+ # First name
130 /// \s+ # Whitespace
131 /// [A-Z][a-z]+ # Last name
132 /// ```
133 #[must_use]
134 pub fn verbose(mut self, yes: bool) -> Self {
135 self.config.verbose = yes;
136 self
137 }
138
139 /// Enable dot-all mode (`.` matches newlines).
140 ///
141 /// By default, `.` matches any character except newlines.
142 /// When enabled, `.` matches any character including `\n`.
143 #[must_use]
144 pub fn dot_all(mut self, yes: bool) -> Self {
145 self.config.dot_all = yes;
146 self
147 }
148
149 /// Enable multi-line mode (`^` and `$` match at line boundaries).
150 ///
151 /// By default, `^` matches only at the start of text and `$` only at the end.
152 /// When enabled:
153 /// - `^` also matches after each newline
154 /// - `$` also matches before each newline
155 #[must_use]
156 pub fn multi_line(mut self, yes: bool) -> Self {
157 self.config.multi_line = yes;
158 self
159 }
160
161 /// Enable ungreedy mode (invert default greediness of quantifiers).
162 ///
163 /// When enabled:
164 /// - `*`, `+`, `?` become non-greedy (match as little as possible)
165 /// - `*?`, `+?`, `??` become greedy (match as much as possible)
166 ///
167 /// This is equivalent to the `(?U)` inline flag.
168 #[must_use]
169 pub fn ungreedy(mut self, yes: bool) -> Self {
170 self.config.ungreedy = yes;
171 self
172 }
173
174 /// Enable global mode (find all matches).
175 ///
176 /// When enabled, use `find_iter()` to get all matches.
177 /// When disabled (default), stops at first valid match (faster).
178 /// This is equivalent to the `(?g)` inline flag.
179 #[must_use]
180 pub fn global(mut self, yes: bool) -> Self {
181 self.config.match_flags.global = yes;
182 self
183 }
184
185 /// Enable greedy first-match mode (similar to mrab-regex behavior).
186 ///
187 /// When enabled, searches position by position and returns the first match found.
188 /// This is faster than the default best-match behavior when matches exist early
189 /// in the text, but may not find the optimal match.
190 ///
191 /// Default behavior (`greedy_first=false`): searches all positions to find best match.
192 /// Greedy first (`greedy_first=true`): stops at first match found.
193 #[must_use]
194 pub fn greedy_first(mut self, yes: bool) -> Self {
195 self.config.greedy_first = yes;
196 self
197 }
198
199 /// Enable Unicode mode for character classes.
200 ///
201 /// When enabled:
202 /// - `\w` matches Unicode word characters (not just ASCII `[a-zA-Z0-9_]`)
203 /// - `\d` matches Unicode digits
204 /// - `\s` matches Unicode whitespace
205 ///
206 /// This is equivalent to the `(?u)` inline flag.
207 #[must_use]
208 pub fn unicode(mut self, yes: bool) -> Self {
209 self.config.match_flags.unicode = yes;
210 self
211 }
212
213 /// Set the similarity threshold (0.0 - 1.0).
214 ///
215 /// Matches with similarity below this threshold are rejected.
216 #[must_use]
217 pub fn similarity(mut self, threshold: f32) -> Self {
218 self.config.similarity_threshold = threshold.clamp(0.0, 1.0);
219 self
220 }
221
222 /// Set the default number of edits allowed for fuzzy matching.
223 ///
224 /// This applies to literals without explicit fuzziness markers.
225 #[must_use]
226 pub fn edits(mut self, count: u8) -> Self {
227 self.config.default_edits = count;
228 self
229 }
230
231 /// Set detailed fuzzy limits for default matching.
232 #[must_use]
233 pub fn fuzzy(mut self, limits: FuzzyLimits) -> Self {
234 self.config.default_limits = Some(limits);
235 self
236 }
237
238 /// Set edit operation penalties.
239 #[must_use]
240 pub fn penalties(mut self, penalties: FuzzyPenalties) -> Self {
241 self.config.penalties = Some(penalties);
242 self
243 }
244
245 /// Set the maximum number of threads for NFA simulation.
246 ///
247 /// Higher values allow more complex patterns but use more memory.
248 #[must_use]
249 pub fn max_threads(mut self, max: usize) -> Self {
250 self.config.max_threads = max;
251 self
252 }
253
254 /// Enable partial matching.
255 ///
256 /// When enabled, matches that reach the end of the text are considered
257 /// successful even if they haven't completed. The `Match::partial()` method
258 /// can be used to check if a match is partial.
259 ///
260 /// This is useful for streaming input or when the text may be truncated.
261 #[must_use]
262 pub fn partial(mut self, yes: bool) -> Self {
263 self.config.partial = yes;
264 self
265 }
266
267 /// Set a timeout for matching operations.
268 ///
269 /// If a match operation takes longer than the timeout, it will be cancelled.
270 /// The default is no timeout.
271 ///
272 /// Note: Timeout is checked at certain checkpoints during matching, so it's
273 /// not precise. The actual time may exceed the timeout slightly.
274 #[must_use]
275 pub fn timeout(mut self, duration: std::time::Duration) -> Self {
276 self.config.timeout = Some(duration);
277 self
278 }
279
280 /// Build the `FuzzyRegex`.
281 ///
282 /// # Errors
283 ///
284 /// Returns an error if the pattern is invalid or cannot be compiled.
285 pub fn build(self) -> Result<FuzzyRegex> {
286 FuzzyRegex::compile(self.pattern, self.config)
287 }
288}