Skip to main content

eregex/
match_obj.rs

1//! The [`Match`] type and the match iterators.
2
3use std::collections::HashMap;
4
5/// A successful match, carrying the full capture state.
6///
7/// Group indexing follows the usual convention: index `0` is the whole match,
8/// indices `1..` are capturing groups in order of opening parenthesis, and
9/// named groups may also be looked up by name.
10///
11/// Obtain a `Match` from [`Regex::find`](crate::Regex::find) and friends. For
12/// repeated captures (the signature mrab-regex feature), use
13/// [`captures`](Self::captures) to see every value a group took.
14///
15/// # Examples
16///
17/// ```
18/// use eregex::Regex;
19/// let re = Regex::new(r"(?P<host>\w+)=(?P<port>\d+)")?;
20/// let m = re.find("srv=8080").unwrap();
21/// assert_eq!(m.name("host"), Some("srv"));
22/// assert_eq!(m.group(2), Some("8080"));
23/// assert_eq!(m.span(), (0, 8));
24/// # Ok::<(), eregex::Error>(())
25/// ```
26pub struct Match<'h> {
27    pub(crate) haystack: &'h str,
28    pub(crate) char_to_byte: Vec<usize>,
29    /// Char-index spans per group; `[0]` is the whole match.
30    pub(crate) caps: Vec<Option<(usize, usize)>>,
31    /// Full capture history per group (for repeated captures).
32    pub(crate) log: Vec<Vec<(usize, usize)>>,
33    pub(crate) names: HashMap<String, usize>,
34}
35
36impl<'h> Match<'h> {
37    fn byte_span(&self, g: usize) -> Option<(usize, usize)> {
38        let (s, e) = self.caps.get(g).copied().flatten()?;
39        Some((self.char_to_byte[s], self.char_to_byte[e]))
40    }
41
42    /// The whole match, equivalent to [`group(0)`](Self::group).
43    pub fn as_str(&self) -> &'h str {
44        self.group(0).unwrap_or("")
45    }
46
47    /// Return the text of group `g`, or `None` if it didn't participate.
48    pub fn group(&self, g: usize) -> Option<&'h str> {
49        let (s, e) = self.byte_span(g)?;
50        Some(&self.haystack[s..e])
51    }
52
53    /// Return the text of a named group.
54    pub fn name(&self, name: &str) -> Option<&'h str> {
55        let g = *self.names.get(name)?;
56        self.group(g)
57    }
58
59    /// The byte offset where the whole match (or group `g`) starts.
60    pub fn start(&self) -> usize {
61        self.start_of(0)
62    }
63
64    /// The byte offset where the whole match (or group `g`) ends.
65    pub fn end(&self) -> usize {
66        self.end_of(0)
67    }
68
69    /// The `(start, end)` byte span of the whole match.
70    pub fn span(&self) -> (usize, usize) {
71        (self.start(), self.end())
72    }
73
74    /// Start byte offset of group `g` (the end of the string if the group
75    /// didn't participate, matching Python semantics).
76    pub fn start_of(&self, g: usize) -> usize {
77        match self.byte_span(g) {
78            Some((s, _)) => s,
79            None => self.haystack.len(),
80        }
81    }
82
83    /// End byte offset of group `g`.
84    pub fn end_of(&self, g: usize) -> usize {
85        match self.byte_span(g) {
86            Some((_, e)) => e,
87            None => self.haystack.len(),
88        }
89    }
90
91    /// Span of group `g`.
92    pub fn span_of(&self, g: usize) -> (usize, usize) {
93        (self.start_of(g), self.end_of(g))
94    }
95
96    /// Number of capturing groups plus one (for group 0).
97    pub fn len(&self) -> usize {
98        self.caps.len()
99    }
100
101    /// Always `false`; groups are never empty container-wise.
102    pub fn is_empty(&self) -> bool {
103        false
104    }
105
106    /// Iterator over all groups' current text.
107    pub fn groups(&self) -> Vec<Option<&'h str>> {
108        (0..self.caps.len()).map(|i| self.group(i)).collect()
109    }
110
111    /// All captures of group `g` (repeated-capture support, a signature
112    /// mrab-regex feature). The last entry equals [`group(g)`](Self::group).
113    pub fn captures(&self, g: usize) -> Vec<Option<&'h str>> {
114        self.log
115            .get(g)
116            .map(|v| {
117                v.iter()
118                    .map(|(s, e)| {
119                        Some(&self.haystack[self.char_to_byte[*s]..self.char_to_byte[*e]])
120                    })
121                    .collect()
122            })
123            .unwrap_or_default()
124    }
125
126    /// All captures of a named group.
127    pub fn captures_name(&self, name: &str) -> Vec<Option<&'h str>> {
128        match self.names.get(name) {
129            Some(&g) => self.captures(g),
130            None => Vec::new(),
131        }
132    }
133
134    /// All start byte offsets of group `g`'s repeated captures.
135    ///
136    /// Mirrors mrab-regex's `Match.starts(group)`.
137    pub fn starts(&self, g: usize) -> Vec<usize> {
138        self.log
139            .get(g)
140            .map(|v| v.iter().map(|(s, _)| self.char_to_byte[*s]).collect())
141            .unwrap_or_default()
142    }
143
144    /// All end byte offsets of group `g`'s repeated captures.
145    pub fn ends(&self, g: usize) -> Vec<usize> {
146        self.log
147            .get(g)
148            .map(|v| v.iter().map(|(_, e)| self.char_to_byte[*e]).collect())
149            .unwrap_or_default()
150    }
151
152    /// All byte spans of group `g`'s repeated captures.
153    pub fn spans(&self, g: usize) -> Vec<(usize, usize)> {
154        self.log
155            .get(g)
156            .map(|v| {
157                v.iter()
158                    .map(|(s, e)| (self.char_to_byte[*s], self.char_to_byte[*e]))
159                    .collect()
160            })
161            .unwrap_or_default()
162    }
163
164    /// A map from group name to the group's **current** text (a.k.a.
165    /// `groupdict` in Python / mrab-regex).
166    pub fn named_groups(&self) -> HashMap<String, &'h str> {
167        let mut out = HashMap::new();
168        for (name, &g) in &self.names {
169            if let Some(s) = self.group(g) {
170                out.insert(name.clone(), s);
171            }
172        }
173        out
174    }
175
176    /// A map from group name to **all** of that group's captures (mrab-regex's
177    /// `capturesdict`).
178    pub fn captures_dict(&self) -> HashMap<String, Vec<&'h str>> {
179        let mut out = HashMap::new();
180        for (name, &g) in &self.names {
181            let v: Vec<&'h str> = self.captures(g).into_iter().flatten().collect();
182            out.insert(name.clone(), v);
183        }
184        out
185    }
186
187    /// All captures of **all** groups (group 0 first), as a list per group.
188    /// Mirrors mrab-regex's `allcaptures`.
189    pub fn all_captures(&self) -> Vec<Vec<&'h str>> {
190        (0..self.caps.len())
191            .map(|g| self.captures(g).into_iter().flatten().collect())
192            .collect()
193    }
194
195    /// All byte spans of all captures of all groups. Mirrors mrab-regex's
196    /// `allspans`.
197    pub fn all_spans(&self) -> Vec<Vec<(usize, usize)>> {
198        (0..self.caps.len()).map(|g| self.spans(g)).collect()
199    }
200
201    /// The whole match text (alias of [`as_str`](Self::as_str)).
202    pub fn group0(&self) -> &'h str {
203        self.as_str()
204    }
205
206    /// A tuple-like view of **all** groups' current text — the Rust analogue
207    /// of mrab-regex's `m[:]` (which returns a tuple in Python). Index 0 is
208    /// the whole match.
209    pub fn all_groups(&self) -> Vec<Option<&'h str>> {
210        (0..self.caps.len()).map(|i| self.group(i)).collect()
211    }
212}
213
214impl<'h> std::ops::Index<usize> for Match<'h> {
215    type Output = str;
216    fn index(&self, i: usize) -> &str {
217        self.group(i).unwrap_or("")
218    }
219}
220
221impl<'h> std::ops::Index<&str> for Match<'h> {
222    type Output = str;
223    fn index(&self, name: &str) -> &str {
224        self.name(name).unwrap_or("")
225    }
226}
227
228impl<'h> std::fmt::Debug for Match<'h> {
229    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
230        let (s, e) = self.span();
231        write!(f, "Match {:?} span={}..{}", &self.haystack[s..e], s, e)
232    }
233}
234
235// --- Iterators -------------------------------------------------------------
236
237/// Iterator over non-overlapping matches of a [`Regex`](crate::Regex).
238pub struct FindIter<'r, 'h> {
239    pub(crate) re: &'r crate::Regex,
240    pub(crate) haystack: &'h str,
241    pub(crate) st: crate::state::State,
242    pub(crate) pos: usize,
243    pub(crate) last_end: Option<usize>,
244}
245
246impl<'r, 'h> Iterator for FindIter<'r, 'h> {
247    type Item = Match<'h>;
248    fn next(&mut self) -> Option<Match<'h>> {
249        if let Some((start, end)) = self.re.find_from(&mut self.st, self.pos) {
250            let m = Match {
251                haystack: self.haystack,
252                char_to_byte: self.st.char_to_byte.clone(),
253                caps: self.st.caps.clone(),
254                log: self.st.log.clone(),
255                names: self.re.names_clone(),
256            };
257            // Advance, guarding against zero-width match loops.
258            self.pos = if end == start { end + 1 } else { end };
259            self.last_end = Some(end);
260            Some(m)
261        } else {
262            None
263        }
264    }
265}
266
267/// Iterator that yields [`Match`] objects with full capture state (an alias of
268/// [`FindIter`] in this implementation, since matches always carry captures).
269pub type CaptureMatches<'r, 'h> = FindIter<'r, 'h>;
270
271// ---------------------------------------------------------------------------
272// Partial matching
273// ---------------------------------------------------------------------------
274
275/// The outcome kind of a [`Regex::find_partial`](crate::Regex::find_partial)
276/// attempt. `NoMatch` is represented by `Option::<PartialMatch>::None`.
277#[derive(Clone, Copy, Debug, PartialEq, Eq)]
278pub enum MatchStatus {
279    /// The pattern matched and consumed the input all the way to its end.
280    Full,
281    /// The input is a prefix of some full match: the pattern consumed to the
282    /// end of input but still wanted more. Equivalently, a consuming leaf was
283    /// blocked solely by end-of-input.
284    Partial,
285}
286
287/// The state of a single group within a [`PartialMatch`].
288#[derive(Clone, Debug, PartialEq, Eq)]
289pub enum GroupMatch<'h> {
290    /// The group fully matched (its body completed).
291    Matched(&'h str),
292    /// The group was entered but its body did not complete before input ended.
293    Partial(&'h str),
294    /// The group never participated.
295    None,
296}
297
298/// A partial (or full) match produced by
299/// [`Regex::find_partial`](crate::Regex::find_partial).
300///
301/// The match is *end-anchored*: it always ends exactly at the end of the
302/// haystack. `status` distinguishes a fully-satisfied match from one that was
303/// cut short by end-of-input.
304pub struct PartialMatch<'h> {
305    /// Whether the match is fully satisfied (`Full`) or cut short (`Partial`).
306    pub status: MatchStatus,
307    /// The whole matched text.
308    pub matched: &'h str,
309    /// Byte offset where the match starts.
310    pub start: usize,
311    /// Byte offset where the match ends (always the haystack length).
312    pub end: usize,
313    /// Per-group state; index 0 is the whole match, 1.. are the capturing
314    /// groups in order.
315    pub groups: Vec<GroupMatch<'h>>,
316    /// Group-name → index map (for [`PartialMatch::name`]).
317    pub(crate) names: HashMap<String, usize>,
318}
319
320impl<'h> PartialMatch<'h> {
321    /// `true` if [`status`](Self::status) is [`MatchStatus::Full`].
322    pub fn is_full(&self) -> bool {
323        matches!(self.status, MatchStatus::Full)
324    }
325
326    /// `true` if [`status`](Self::status) is [`MatchStatus::Partial`].
327    pub fn is_partial(&self) -> bool {
328        matches!(self.status, MatchStatus::Partial)
329    }
330
331    /// The text of group `g` (1-based), whether matched or partial. `None` if
332    /// the group did not participate.
333    pub fn group(&self, g: usize) -> Option<&'h str> {
334        match self.groups.get(g)? {
335            GroupMatch::Matched(s) | GroupMatch::Partial(s) => Some(*s),
336            GroupMatch::None => None,
337        }
338    }
339
340    /// The text of a named group, whether matched or partial.
341    pub fn name(&self, name: &str) -> Option<&'h str> {
342        let g = *self.names.get(name)?;
343        self.group(g)
344    }
345
346    /// Whether group `g` (1-based) is fully matched.
347    pub fn group_matched(&self, g: usize) -> bool {
348        matches!(self.groups.get(g), Some(GroupMatch::Matched(_)))
349    }
350
351    /// Whether group `g` (1-based) is partial (entered but not completed).
352    pub fn group_partial(&self, g: usize) -> bool {
353        matches!(self.groups.get(g), Some(GroupMatch::Partial(_)))
354    }
355
356    /// Whether group `g` (1-based) never participated.
357    pub fn group_none(&self, g: usize) -> bool {
358        matches!(self.groups.get(g), Some(GroupMatch::None) | None)
359    }
360}
361
362impl<'h> std::fmt::Debug for PartialMatch<'h> {
363    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
364        write!(
365            f,
366            "PartialMatch {:?} status={:?} span={}..{}",
367            self.matched, self.status, self.start, self.end
368        )
369    }
370}