eregex/match_obj.rs
1//! The [`Match`] type and the match iterators.
2
3use std::collections::HashMap;
4
5/// A successful match, carrying the full capture state.
6///
7/// Group indexing follows the usual convention: index `0` is the whole match,
8/// indices `1..` are capturing groups in order of opening parenthesis, and
9/// named groups may also be looked up by name.
10///
11/// Obtain a `Match` from [`Regex::find`](crate::Regex::find) and friends. For
12/// repeated captures (the signature mrab-regex feature), use
13/// [`captures`](Self::captures) to see every value a group took.
14///
15/// # Examples
16///
17/// ```
18/// use eregex::Regex;
19/// let re = Regex::new(r"(?P<host>\w+)=(?P<port>\d+)")?;
20/// let m = re.find("srv=8080").unwrap();
21/// assert_eq!(m.name("host"), Some("srv"));
22/// assert_eq!(m.group(2), Some("8080"));
23/// assert_eq!(m.span(), (0, 8));
24/// # Ok::<(), eregex::Error>(())
25/// ```
26pub struct Match<'h> {
27 pub(crate) haystack: &'h str,
28 pub(crate) char_to_byte: Vec<usize>,
29 /// Char-index spans per group; `[0]` is the whole match.
30 pub(crate) caps: Vec<Option<(usize, usize)>>,
31 /// Full capture history per group (for repeated captures).
32 pub(crate) log: Vec<Vec<(usize, usize)>>,
33 pub(crate) names: HashMap<String, usize>,
34}
35
36impl<'h> Match<'h> {
37 fn byte_span(&self, g: usize) -> Option<(usize, usize)> {
38 let (s, e) = self.caps.get(g).copied().flatten()?;
39 Some((self.char_to_byte[s], self.char_to_byte[e]))
40 }
41
42 /// The whole match, equivalent to [`group(0)`](Self::group).
43 pub fn as_str(&self) -> &'h str {
44 self.group(0).unwrap_or("")
45 }
46
47 /// Return the text of group `g`, or `None` if it didn't participate.
48 pub fn group(&self, g: usize) -> Option<&'h str> {
49 let (s, e) = self.byte_span(g)?;
50 Some(&self.haystack[s..e])
51 }
52
53 /// Return the text of a named group.
54 pub fn name(&self, name: &str) -> Option<&'h str> {
55 let g = *self.names.get(name)?;
56 self.group(g)
57 }
58
59 /// The byte offset where the whole match (or group `g`) starts.
60 pub fn start(&self) -> usize {
61 self.start_of(0)
62 }
63
64 /// The byte offset where the whole match (or group `g`) ends.
65 pub fn end(&self) -> usize {
66 self.end_of(0)
67 }
68
69 /// The `(start, end)` byte span of the whole match.
70 pub fn span(&self) -> (usize, usize) {
71 (self.start(), self.end())
72 }
73
74 /// Start byte offset of group `g` (the end of the string if the group
75 /// didn't participate, matching Python semantics).
76 pub fn start_of(&self, g: usize) -> usize {
77 match self.byte_span(g) {
78 Some((s, _)) => s,
79 None => self.haystack.len(),
80 }
81 }
82
83 /// End byte offset of group `g`.
84 pub fn end_of(&self, g: usize) -> usize {
85 match self.byte_span(g) {
86 Some((_, e)) => e,
87 None => self.haystack.len(),
88 }
89 }
90
91 /// Span of group `g`.
92 pub fn span_of(&self, g: usize) -> (usize, usize) {
93 (self.start_of(g), self.end_of(g))
94 }
95
96 /// Number of capturing groups plus one (for group 0).
97 pub fn len(&self) -> usize {
98 self.caps.len()
99 }
100
101 /// Always `false`; groups are never empty container-wise.
102 pub fn is_empty(&self) -> bool {
103 false
104 }
105
106 /// Iterator over all groups' current text.
107 pub fn groups(&self) -> Vec<Option<&'h str>> {
108 (0..self.caps.len()).map(|i| self.group(i)).collect()
109 }
110
111 /// All captures of group `g` (repeated-capture support, a signature
112 /// mrab-regex feature). The last entry equals [`group(g)`](Self::group).
113 pub fn captures(&self, g: usize) -> Vec<Option<&'h str>> {
114 self.log
115 .get(g)
116 .map(|v| {
117 v.iter()
118 .map(|(s, e)| {
119 Some(&self.haystack[self.char_to_byte[*s]..self.char_to_byte[*e]])
120 })
121 .collect()
122 })
123 .unwrap_or_default()
124 }
125
126 /// All captures of a named group.
127 pub fn captures_name(&self, name: &str) -> Vec<Option<&'h str>> {
128 match self.names.get(name) {
129 Some(&g) => self.captures(g),
130 None => Vec::new(),
131 }
132 }
133
134 /// All start byte offsets of group `g`'s repeated captures.
135 ///
136 /// Mirrors mrab-regex's `Match.starts(group)`.
137 pub fn starts(&self, g: usize) -> Vec<usize> {
138 self.log
139 .get(g)
140 .map(|v| v.iter().map(|(s, _)| self.char_to_byte[*s]).collect())
141 .unwrap_or_default()
142 }
143
144 /// All end byte offsets of group `g`'s repeated captures.
145 pub fn ends(&self, g: usize) -> Vec<usize> {
146 self.log
147 .get(g)
148 .map(|v| v.iter().map(|(_, e)| self.char_to_byte[*e]).collect())
149 .unwrap_or_default()
150 }
151
152 /// All byte spans of group `g`'s repeated captures.
153 pub fn spans(&self, g: usize) -> Vec<(usize, usize)> {
154 self.log
155 .get(g)
156 .map(|v| {
157 v.iter()
158 .map(|(s, e)| (self.char_to_byte[*s], self.char_to_byte[*e]))
159 .collect()
160 })
161 .unwrap_or_default()
162 }
163
164 /// A map from group name to the group's **current** text (a.k.a.
165 /// `groupdict` in Python / mrab-regex).
166 pub fn named_groups(&self) -> HashMap<String, &'h str> {
167 let mut out = HashMap::new();
168 for (name, &g) in &self.names {
169 if let Some(s) = self.group(g) {
170 out.insert(name.clone(), s);
171 }
172 }
173 out
174 }
175
176 /// A map from group name to **all** of that group's captures (mrab-regex's
177 /// `capturesdict`).
178 pub fn captures_dict(&self) -> HashMap<String, Vec<&'h str>> {
179 let mut out = HashMap::new();
180 for (name, &g) in &self.names {
181 let v: Vec<&'h str> = self.captures(g).into_iter().flatten().collect();
182 out.insert(name.clone(), v);
183 }
184 out
185 }
186
187 /// All captures of **all** groups (group 0 first), as a list per group.
188 /// Mirrors mrab-regex's `allcaptures`.
189 pub fn all_captures(&self) -> Vec<Vec<&'h str>> {
190 (0..self.caps.len())
191 .map(|g| self.captures(g).into_iter().flatten().collect())
192 .collect()
193 }
194
195 /// All byte spans of all captures of all groups. Mirrors mrab-regex's
196 /// `allspans`.
197 pub fn all_spans(&self) -> Vec<Vec<(usize, usize)>> {
198 (0..self.caps.len()).map(|g| self.spans(g)).collect()
199 }
200
201 /// The whole match text (alias of [`as_str`](Self::as_str)).
202 pub fn group0(&self) -> &'h str {
203 self.as_str()
204 }
205
206 /// A tuple-like view of **all** groups' current text — the Rust analogue
207 /// of mrab-regex's `m[:]` (which returns a tuple in Python). Index 0 is
208 /// the whole match.
209 pub fn all_groups(&self) -> Vec<Option<&'h str>> {
210 (0..self.caps.len()).map(|i| self.group(i)).collect()
211 }
212}
213
214impl<'h> std::ops::Index<usize> for Match<'h> {
215 type Output = str;
216 fn index(&self, i: usize) -> &str {
217 self.group(i).unwrap_or("")
218 }
219}
220
221impl<'h> std::ops::Index<&str> for Match<'h> {
222 type Output = str;
223 fn index(&self, name: &str) -> &str {
224 self.name(name).unwrap_or("")
225 }
226}
227
228impl<'h> std::fmt::Debug for Match<'h> {
229 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
230 let (s, e) = self.span();
231 write!(f, "Match {:?} span={}..{}", &self.haystack[s..e], s, e)
232 }
233}
234
235// --- Iterators -------------------------------------------------------------
236
237/// Iterator over non-overlapping matches of a [`Regex`](crate::Regex).
238pub struct FindIter<'r, 'h> {
239 pub(crate) re: &'r crate::Regex,
240 pub(crate) haystack: &'h str,
241 pub(crate) st: crate::state::State,
242 pub(crate) pos: usize,
243 pub(crate) last_end: Option<usize>,
244}
245
246impl<'r, 'h> Iterator for FindIter<'r, 'h> {
247 type Item = Match<'h>;
248 fn next(&mut self) -> Option<Match<'h>> {
249 if let Some((start, end)) = self.re.find_from(&mut self.st, self.pos) {
250 let m = Match {
251 haystack: self.haystack,
252 char_to_byte: self.st.char_to_byte.clone(),
253 caps: self.st.caps.clone(),
254 log: self.st.log.clone(),
255 names: self.re.names_clone(),
256 };
257 // Advance, guarding against zero-width match loops.
258 self.pos = if end == start { end + 1 } else { end };
259 self.last_end = Some(end);
260 Some(m)
261 } else {
262 None
263 }
264 }
265}
266
267/// Iterator that yields [`Match`] objects with full capture state (an alias of
268/// [`FindIter`] in this implementation, since matches always carry captures).
269pub type CaptureMatches<'r, 'h> = FindIter<'r, 'h>;
270
271// ---------------------------------------------------------------------------
272// Partial matching
273// ---------------------------------------------------------------------------
274
275/// The outcome kind of a [`Regex::find_partial`](crate::Regex::find_partial)
276/// attempt. `NoMatch` is represented by `Option::<PartialMatch>::None`.
277#[derive(Clone, Copy, Debug, PartialEq, Eq)]
278pub enum MatchStatus {
279 /// The pattern matched and consumed the input all the way to its end.
280 Full,
281 /// The input is a prefix of some full match: the pattern consumed to the
282 /// end of input but still wanted more. Equivalently, a consuming leaf was
283 /// blocked solely by end-of-input.
284 Partial,
285}
286
287/// The state of a single group within a [`PartialMatch`].
288#[derive(Clone, Debug, PartialEq, Eq)]
289pub enum GroupMatch<'h> {
290 /// The group fully matched (its body completed).
291 Matched(&'h str),
292 /// The group was entered but its body did not complete before input ended.
293 Partial(&'h str),
294 /// The group never participated.
295 None,
296}
297
298/// A partial (or full) match produced by
299/// [`Regex::find_partial`](crate::Regex::find_partial).
300///
301/// The match is *end-anchored*: it always ends exactly at the end of the
302/// haystack. `status` distinguishes a fully-satisfied match from one that was
303/// cut short by end-of-input.
304pub struct PartialMatch<'h> {
305 /// Whether the match is fully satisfied (`Full`) or cut short (`Partial`).
306 pub status: MatchStatus,
307 /// The whole matched text.
308 pub matched: &'h str,
309 /// Byte offset where the match starts.
310 pub start: usize,
311 /// Byte offset where the match ends (always the haystack length).
312 pub end: usize,
313 /// Per-group state; index 0 is the whole match, 1.. are the capturing
314 /// groups in order.
315 pub groups: Vec<GroupMatch<'h>>,
316 /// Group-name → index map (for [`PartialMatch::name`]).
317 pub(crate) names: HashMap<String, usize>,
318}
319
320impl<'h> PartialMatch<'h> {
321 /// `true` if [`status`](Self::status) is [`MatchStatus::Full`].
322 pub fn is_full(&self) -> bool {
323 matches!(self.status, MatchStatus::Full)
324 }
325
326 /// `true` if [`status`](Self::status) is [`MatchStatus::Partial`].
327 pub fn is_partial(&self) -> bool {
328 matches!(self.status, MatchStatus::Partial)
329 }
330
331 /// The text of group `g` (1-based), whether matched or partial. `None` if
332 /// the group did not participate.
333 pub fn group(&self, g: usize) -> Option<&'h str> {
334 match self.groups.get(g)? {
335 GroupMatch::Matched(s) | GroupMatch::Partial(s) => Some(*s),
336 GroupMatch::None => None,
337 }
338 }
339
340 /// The text of a named group, whether matched or partial.
341 pub fn name(&self, name: &str) -> Option<&'h str> {
342 let g = *self.names.get(name)?;
343 self.group(g)
344 }
345
346 /// Whether group `g` (1-based) is fully matched.
347 pub fn group_matched(&self, g: usize) -> bool {
348 matches!(self.groups.get(g), Some(GroupMatch::Matched(_)))
349 }
350
351 /// Whether group `g` (1-based) is partial (entered but not completed).
352 pub fn group_partial(&self, g: usize) -> bool {
353 matches!(self.groups.get(g), Some(GroupMatch::Partial(_)))
354 }
355
356 /// Whether group `g` (1-based) never participated.
357 pub fn group_none(&self, g: usize) -> bool {
358 matches!(self.groups.get(g), Some(GroupMatch::None) | None)
359 }
360}
361
362impl<'h> std::fmt::Debug for PartialMatch<'h> {
363 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
364 write!(
365 f,
366 "PartialMatch {:?} status={:?} span={}..{}",
367 self.matched, self.status, self.start, self.end
368 )
369 }
370}