Skip to main content

fuzzy_regex/api/
match_result.rs

1//! Match result types for the public API.
2
3#![allow(clippy::needless_range_loop)]
4
5use std::collections::HashMap;
6use std::ops::Range;
7
8use crate::engine::EditCounts;
9
10/// A single match in the text.
11#[derive(Debug, Clone)]
12pub struct Match<'t> {
13    text: &'t str,
14    start: usize,
15    end: usize,
16    similarity: f32,
17    edits: EditCounts,
18    fuzzy_changes: Option<(Vec<usize>, Vec<usize>, Vec<usize>)>,
19    partial: bool,
20}
21
22impl<'t> Match<'t> {
23    /// Create a new match.
24    pub(crate) fn new(
25        text: &'t str,
26        start: usize,
27        end: usize,
28        similarity: f32,
29        edits: EditCounts,
30    ) -> Self {
31        Match {
32            text,
33            start,
34            end,
35            similarity,
36            edits,
37            fuzzy_changes: None,
38            partial: false,
39        }
40    }
41
42    /// Create a new match with fuzzy changes (edit positions).
43    /// Note: This is not currently used because fuzzy changes are computed lazily
44    /// via `fuzzy_changes_with_pattern()` for better performance.
45    /// Kept for potential future optimization where fuzzy changes are computed during matching.
46    #[allow(dead_code)]
47    pub(crate) fn new_with_changes(
48        text: &'t str,
49        start: usize,
50        end: usize,
51        similarity: f32,
52        edits: EditCounts,
53        fuzzy_changes: (Vec<usize>, Vec<usize>, Vec<usize>),
54    ) -> Self {
55        Match {
56            text,
57            start,
58            end,
59            similarity,
60            edits,
61            fuzzy_changes: Some(fuzzy_changes),
62            partial: false,
63        }
64    }
65
66    /// Create a new match with all options specified.
67    pub(crate) fn new_full(
68        text: &'t str,
69        start: usize,
70        end: usize,
71        similarity: f32,
72        edits: EditCounts,
73        fuzzy_changes: Option<(Vec<usize>, Vec<usize>, Vec<usize>)>,
74        partial: bool,
75    ) -> Self {
76        Match {
77            text,
78            start,
79            end,
80            similarity,
81            edits,
82            fuzzy_changes,
83            partial,
84        }
85    }
86
87    /// Get the matched text.
88    #[must_use]
89    pub fn as_str(&self) -> &'t str {
90        &self.text[self.start..self.end]
91    }
92
93    /// Get the start byte offset.
94    #[must_use]
95    pub fn start(&self) -> usize {
96        self.start
97    }
98
99    /// Get the end byte offset.
100    #[must_use]
101    pub fn end(&self) -> usize {
102        self.end
103    }
104
105    /// Get the byte range.
106    #[must_use]
107    pub fn range(&self) -> Range<usize> {
108        self.start..self.end
109    }
110
111    /// Get the length in bytes.
112    #[must_use]
113    pub fn len(&self) -> usize {
114        self.end - self.start
115    }
116
117    /// Check if empty.
118    #[must_use]
119    pub fn is_empty(&self) -> bool {
120        self.start == self.end
121    }
122
123    /// Get the similarity score (0.0 - 1.0).
124    #[must_use]
125    pub fn similarity(&self) -> f32 {
126        self.similarity
127    }
128
129    /// Get the edit counts.
130    #[must_use]
131    pub fn edits(&self) -> &EditCounts {
132        &self.edits
133    }
134
135    /// Get the total number of edits.
136    #[must_use]
137    pub fn total_edits(&self) -> u8 {
138        self.edits.total()
139    }
140
141    /// Get fuzzy counts as (insertions, deletions, substitutions).
142    ///
143    /// This matches the API of mrab-regex's `fuzzy_counts` property.
144    /// - insertions: characters in the text not in the pattern
145    /// - deletions: characters in the pattern not in the text
146    /// - substitutions: characters that differ between pattern and text
147    #[must_use]
148    pub fn fuzzy_counts(&self) -> (u32, u32, u32) {
149        (
150            u32::from(self.edits.insertions),
151            u32::from(self.edits.deletions),
152            u32::from(self.edits.substitutions),
153        )
154    }
155
156    /// Get fuzzy changes as (insertions, deletions, substitutions).
157    ///
158    /// This matches the API of mrab-regex's `fuzzy_changes` property.
159    /// Returns positions within the matched text where edits occurred.
160    ///
161    /// Note: This requires the original pattern to compute accurately.
162    /// If the pattern was not provided during matching, returns empty vectors.
163    #[must_use]
164    pub fn fuzzy_changes(&self) -> (Vec<usize>, Vec<usize>, Vec<usize>) {
165        if let Some(changes) = &self.fuzzy_changes {
166            return changes.clone();
167        }
168
169        // If we don't have pre-computed changes, return empty
170        // The pattern is needed to compute these accurately
171        (Vec::new(), Vec::new(), Vec::new())
172    }
173
174    /// Get fuzzy changes given the original pattern.
175    ///
176    /// This computes the edit positions by comparing the matched text
177    /// against the original pattern.
178    #[must_use]
179    pub fn fuzzy_changes_with_pattern(
180        &self,
181        pattern: &str,
182    ) -> (Vec<usize>, Vec<usize>, Vec<usize>) {
183        let matched_text = self.as_str();
184        compute_fuzzy_changes(pattern, matched_text)
185    }
186
187    /// Check if this is a partial match.
188    ///
189    /// A partial match is one that reaches the end of the input text
190    /// without completing the match. This is useful when searching
191    /// through streamed or truncated text.
192    #[must_use]
193    pub fn partial(&self) -> bool {
194        self.partial
195    }
196}
197
198/// Compute fuzzy changes: positions of insertions, deletions, and substitutions.
199/// Returns (insertions, deletions, substitutions) as position vectors.
200#[allow(clippy::cast_possible_truncation)]
201fn compute_fuzzy_changes(pattern: &str, text: &str) -> (Vec<usize>, Vec<usize>, Vec<usize>) {
202    let a = pattern.as_bytes();
203    let b = text.as_bytes();
204    let a_len = a.len();
205    let b_len = b.len();
206
207    if a_len == 0 {
208        let insertions: Vec<usize> = (0..b_len).collect();
209        return (insertions, Vec::new(), Vec::new());
210    }
211    if b_len == 0 {
212        let deletions: Vec<usize> = (0..a_len).collect();
213        return (Vec::new(), deletions, Vec::new());
214    }
215
216    // Build the DP matrix
217    let mut matrix = vec![vec![0u32; b_len + 1]; a_len + 1];
218
219    for i in 0..=a_len {
220        matrix[i][0] = i as u32;
221    }
222    for j in 0..=b_len {
223        matrix[0][j] = j as u32;
224    }
225
226    for i in 1..=a_len {
227        for j in 1..=b_len {
228            let cost = u32::from(a[i - 1] != b[j - 1]);
229            matrix[i][j] = (matrix[i - 1][j] + 1)
230                .min(matrix[i][j - 1] + 1)
231                .min(matrix[i - 1][j - 1] + cost);
232        }
233    }
234
235    // Backtrack to find edit positions
236    let mut insertions = Vec::new();
237    let mut deletions = Vec::new();
238    let mut substitutions = Vec::new();
239
240    let mut i = a_len;
241    let mut j = b_len;
242
243    while i > 0 || j > 0 {
244        if i > 0 && j > 0 && a[i - 1] == b[j - 1] {
245            i -= 1;
246            j -= 1;
247        } else if i > 0 && j > 0 && matrix[i][j] == matrix[i - 1][j - 1] + 1 {
248            substitutions.push(j - 1);
249            i -= 1;
250            j -= 1;
251        } else if j > 0 && matrix[i][j] == matrix[i][j - 1] + 1 {
252            insertions.push(j - 1);
253            j -= 1;
254        } else if i > 0 && matrix[i][j] == matrix[i - 1][j] + 1 {
255            deletions.push(i - 1);
256            i -= 1;
257        } else {
258            break;
259        }
260    }
261
262    (insertions, deletions, substitutions)
263}
264
265/// Capture groups from a match.
266#[derive(Debug, Clone)]
267pub struct Captures<'t> {
268    text: &'t str,
269    slots: Vec<Option<(usize, usize)>>,
270    names: HashMap<String, usize>,
271    similarity: f32,
272    edits: EditCounts,
273}
274
275impl<'t> Captures<'t> {
276    /// Create captures from match result.
277    pub(crate) fn new(
278        text: &'t str,
279        slots: Vec<Option<(usize, usize)>>,
280        names: HashMap<String, usize>,
281        similarity: f32,
282        edits: EditCounts,
283    ) -> Self {
284        Captures {
285            text,
286            slots,
287            names,
288            similarity,
289            edits,
290        }
291    }
292
293    /// Get the full match (group 0).
294    #[must_use]
295    pub fn get(&self, index: usize) -> Option<Match<'t>> {
296        self.slots
297            .get(index)
298            .copied()
299            .flatten()
300            .map(|(start, end)| {
301                Match::new(self.text, start, end, self.similarity, self.edits.clone())
302            })
303    }
304
305    /// Get a capture by name.
306    #[must_use]
307    pub fn name(&self, name: &str) -> Option<Match<'t>> {
308        self.names.get(name).and_then(|&idx| self.get(idx))
309    }
310
311    /// Get the number of capture groups (including group 0).
312    #[must_use]
313    pub fn len(&self) -> usize {
314        self.slots.len()
315    }
316
317    /// Check if there are no captures.
318    #[must_use]
319    pub fn is_empty(&self) -> bool {
320        self.slots.is_empty()
321    }
322
323    /// Iterate over all captures.
324    #[must_use]
325    pub fn iter(&self) -> CapturesIter<'_, 't> {
326        CapturesIter {
327            captures: self,
328            index: 0,
329        }
330    }
331
332    /// Get the similarity score.
333    #[must_use]
334    pub fn similarity(&self) -> f32 {
335        self.similarity
336    }
337
338    /// Get edit counts.
339    #[must_use]
340    pub fn edits(&self) -> &EditCounts {
341        &self.edits
342    }
343
344    /// Get fuzzy counts as (insertions, deletions, substitutions).
345    ///
346    /// This matches the API of mrab-regex's `fuzzy_counts` property.
347    #[must_use]
348    pub fn fuzzy_counts(&self) -> (u32, u32, u32) {
349        (
350            u32::from(self.edits.insertions),
351            u32::from(self.edits.deletions),
352            u32::from(self.edits.substitutions),
353        )
354    }
355
356    /// Get fuzzy changes as (insertions, deletions, substitutions).
357    ///
358    /// This matches the API of mrab-regex's `fuzzy_changes` property.
359    #[must_use]
360    pub fn fuzzy_changes(&self) -> (Vec<usize>, Vec<usize>, Vec<usize>) {
361        (Vec::new(), Vec::new(), Vec::new())
362    }
363
364    /// Expand a replacement string with capture references.
365    ///
366    /// Supports `$0`, `$1`, etc. for numbered groups and `$name` for named groups.
367    #[must_use]
368    pub fn expand(&self, replacement: &str) -> String {
369        let mut result = String::new();
370        let mut chars = replacement.chars().peekable();
371
372        while let Some(ch) = chars.next() {
373            if ch == '$' {
374                match chars.peek() {
375                    Some('$') => {
376                        chars.next();
377                        result.push('$');
378                    }
379                    Some(&c) if c.is_ascii_digit() => {
380                        let mut num = 0usize;
381                        while let Some(&c) = chars.peek() {
382                            if let Some(d) = c.to_digit(10) {
383                                num = num * 10 + d as usize;
384                                chars.next();
385                            } else {
386                                break;
387                            }
388                        }
389                        if let Some(m) = self.get(num) {
390                            result.push_str(m.as_str());
391                        }
392                    }
393                    Some(&c) if c.is_alphabetic() || c == '_' => {
394                        let mut name = String::new();
395                        while let Some(&c) = chars.peek() {
396                            if c.is_alphanumeric() || c == '_' {
397                                name.push(c);
398                                chars.next();
399                            } else {
400                                break;
401                            }
402                        }
403                        if let Some(m) = self.name(&name) {
404                            result.push_str(m.as_str());
405                        }
406                    }
407                    Some('{') => {
408                        chars.next(); // consume '{'
409                        let mut name = String::new();
410                        while let Some(&c) = chars.peek() {
411                            if c == '}' {
412                                chars.next();
413                                break;
414                            }
415                            name.push(c);
416                            chars.next();
417                        }
418                        // Try as number first
419                        if let Ok(num) = name.parse::<usize>() {
420                            if let Some(m) = self.get(num) {
421                                result.push_str(m.as_str());
422                            }
423                        } else if let Some(m) = self.name(&name) {
424                            result.push_str(m.as_str());
425                        }
426                    }
427                    _ => result.push('$'),
428                }
429            } else {
430                result.push(ch);
431            }
432        }
433
434        result
435    }
436}
437
438/// Iterator over captures.
439pub struct CapturesIter<'c, 't> {
440    captures: &'c Captures<'t>,
441    index: usize,
442}
443
444impl<'t> Iterator for CapturesIter<'_, 't> {
445    type Item = Option<Match<'t>>;
446
447    fn next(&mut self) -> Option<Self::Item> {
448        if self.index < self.captures.slots.len() {
449            let result = self.captures.get(self.index);
450            self.index += 1;
451            Some(result)
452        } else {
453            None
454        }
455    }
456}
457
458impl<'c, 't> IntoIterator for &'c Captures<'t> {
459    type Item = Option<Match<'t>>;
460    type IntoIter = CapturesIter<'c, 't>;
461
462    fn into_iter(self) -> Self::IntoIter {
463        self.iter()
464    }
465}
466
467/// Iterator over all matches.
468pub struct Matches<'t> {
469    matches: std::vec::IntoIter<Match<'t>>,
470}
471
472impl<'t> Matches<'t> {
473    /// Create a Matches iterator from pre-collected matches.
474    pub(crate) fn new(matches: Vec<Match<'t>>) -> Self {
475        Matches {
476            matches: matches.into_iter(),
477        }
478    }
479}
480
481impl<'t> Iterator for Matches<'t> {
482    type Item = Match<'t>;
483
484    fn next(&mut self) -> Option<Self::Item> {
485        self.matches.next()
486    }
487}
488
489/// Iterator over all capture groups.
490pub struct CaptureMatches<'r, 't> {
491    pub(crate) regex: &'r super::regex::FuzzyRegex,
492    pub(crate) text: &'t str,
493    pub(crate) pos: usize,
494}
495
496impl<'t> Iterator for CaptureMatches<'_, 't> {
497    type Item = Captures<'t>;
498
499    fn next(&mut self) -> Option<Self::Item> {
500        if self.pos > self.text.len() {
501            return None;
502        }
503
504        let result = self.regex.captures_at(self.text, self.pos);
505
506        if let Some(caps) = result {
507            if let Some(m) = caps.get(0) {
508                self.pos = if m.end() > self.pos {
509                    m.end()
510                } else {
511                    self.text[self.pos..]
512                        .char_indices()
513                        .nth(1)
514                        .map_or(self.text.len() + 1, |(i, _)| self.pos + i)
515                };
516            } else {
517                self.pos = self.text.len() + 1;
518            }
519            Some(caps)
520        } else {
521            self.pos = self.text.len() + 1;
522            None
523        }
524    }
525}
526
527/// Iterator over split segments.
528pub struct Split<'r, 't> {
529    pub(crate) regex: &'r super::regex::FuzzyRegex,
530    pub(crate) text: &'t str,
531    pub(crate) pos: usize,
532    pub(crate) done: bool,
533}
534
535impl<'t> Iterator for Split<'_, 't> {
536    type Item = &'t str;
537
538    fn next(&mut self) -> Option<Self::Item> {
539        if self.done {
540            return None;
541        }
542
543        if self.pos > self.text.len() {
544            self.done = true;
545            return None;
546        }
547
548        // Use find_from to search from current position onwards
549        let result = self.regex.find_from(self.text, self.pos);
550
551        if let Some(m) = result {
552            let segment = &self.text[self.pos..m.start()];
553            self.pos = m.end();
554            Some(segment)
555        } else {
556            let segment = &self.text[self.pos..];
557            self.done = true;
558            Some(segment)
559        }
560    }
561}