regex_automata/util/
captures.rs

1/*!
2Provides types for dealing with capturing groups.
3
4Capturing groups refer to sub-patterns of regexes that some regex engines can
5report matching offsets for. For example, matching `[a-z]([0-9]+)` against
6`a789` would give `a789` as the overall match (for the implicit capturing group
7at index `0`) and `789` as the match for the capturing group `([0-9]+)` (an
8explicit capturing group at index `1`).
9
10Not all regex engines can report match offsets for capturing groups. Indeed,
11to a first approximation, regex engines that can report capturing group offsets
12tend to be quite a bit slower than regex engines that can't. This is because
13tracking capturing groups at search time usually requires more "power" that
14in turn adds overhead.
15
16Other regex implementations might call capturing groups "submatches."
17
18# Overview
19
20The main types in this module are:
21
22* [`Captures`] records the capturing group offsets found during a search. It
23provides convenience routines for looking up capturing group offsets by either
24index or name.
25* [`GroupInfo`] records the mapping between capturing groups and "slots,"
26where the latter are how capturing groups are recorded during a regex search.
27This also keeps a mapping from capturing group name to index, and capture
28group index to name. A `GroupInfo` is used by `Captures` internally to
29provide a convenient API. It is unlikely that you'll use a `GroupInfo`
30directly, but for example, if you've compiled an Thompson NFA, then you can use
31[`thompson::NFA::group_info`](crate::nfa::thompson::NFA::group_info) to get its
32underlying `GroupInfo`.
33*/
34
35use alloc::{string::String, sync::Arc, vec, vec::Vec};
36
37use crate::util::{
38    interpolate,
39    primitives::{
40        NonMaxUsize, PatternID, PatternIDError, PatternIDIter, SmallIndex,
41    },
42    search::{Match, Span},
43};
44
45/// The span offsets of capturing groups after a match has been found.
46///
47/// This type represents the output of regex engines that can report the
48/// offsets at which capturing groups matches or "submatches" occur. For
49/// example, the [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM). When a match
50/// occurs, it will at minimum contain the [`PatternID`] of the pattern that
51/// matched. Depending upon how it was constructed, it may also contain the
52/// start/end offsets of the entire match of the pattern and the start/end
53/// offsets of each capturing group that participated in the match.
54///
55/// Values of this type are always created for a specific [`GroupInfo`]. It is
56/// unspecified behavior to use a `Captures` value in a search with any regex
57/// engine that has a different `GroupInfo` than the one the `Captures` were
58/// created with.
59///
60/// # Constructors
61///
62/// There are three constructors for this type that control what kind of
63/// information is available upon a match:
64///
65/// * [`Captures::all`]: Will store overall pattern match offsets in addition
66/// to the offsets of capturing groups that participated in the match.
67/// * [`Captures::matches`]: Will store only the overall pattern
68/// match offsets. The offsets of capturing groups (even ones that participated
69/// in the match) are not available.
70/// * [`Captures::empty`]: Will only store the pattern ID that matched. No
71/// match offsets are available at all.
72///
73/// If you aren't sure which to choose, then pick the first one. The first one
74/// is what convenience routines like,
75/// [`PikeVM::create_captures`](crate::nfa::thompson::pikevm::PikeVM::create_captures),
76/// will use automatically.
77///
78/// The main difference between these choices is performance. Namely, if you
79/// ask for _less_ information, then the execution of regex search may be able
80/// to run more quickly.
81///
82/// # Notes
83///
84/// It is worth pointing out that this type is not coupled to any one specific
85/// regex engine. Instead, its coupling is with [`GroupInfo`], which is the
86/// thing that is responsible for mapping capturing groups to "slot" offsets.
87/// Slot offsets are indices into a single sequence of memory at which matching
88/// haystack offsets for the corresponding group are written by regex engines.
89///
90/// # Example
91///
92/// This example shows how to parse a simple date and extract the components of
93/// the date via capturing groups:
94///
95/// ```
96/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
97///
98/// let re = PikeVM::new(r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$")?;
99/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
100///
101/// re.captures(&mut cache, "2010-03-14", &mut caps);
102/// assert!(caps.is_match());
103/// assert_eq!(Some(Span::from(0..4)), caps.get_group(1));
104/// assert_eq!(Some(Span::from(5..7)), caps.get_group(2));
105/// assert_eq!(Some(Span::from(8..10)), caps.get_group(3));
106///
107/// # Ok::<(), Box<dyn std::error::Error>>(())
108/// ```
109///
110/// # Example: named capturing groups
111///
112/// This example is like the one above, but leverages the ability to name
113/// capturing groups in order to make the code a bit clearer:
114///
115/// ```
116/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
117///
118/// let re = PikeVM::new(r"^(?P<y>[0-9]{4})-(?P<m>[0-9]{2})-(?P<d>[0-9]{2})$")?;
119/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
120///
121/// re.captures(&mut cache, "2010-03-14", &mut caps);
122/// assert!(caps.is_match());
123/// assert_eq!(Some(Span::from(0..4)), caps.get_group_by_name("y"));
124/// assert_eq!(Some(Span::from(5..7)), caps.get_group_by_name("m"));
125/// assert_eq!(Some(Span::from(8..10)), caps.get_group_by_name("d"));
126///
127/// # Ok::<(), Box<dyn std::error::Error>>(())
128/// ```
129#[derive(Clone)]
130pub struct Captures {
131    /// The group info that these capture groups are coupled to. This is what
132    /// gives the "convenience" of the `Captures` API. Namely, it provides the
133    /// slot mapping and the name|-->index mapping for capture lookups by name.
134    group_info: GroupInfo,
135    /// The ID of the pattern that matched. Regex engines must set this to
136    /// None when no match occurs.
137    pid: Option<PatternID>,
138    /// The slot values, i.e., submatch offsets.
139    ///
140    /// In theory, the smallest sequence of slots would be something like
141    /// `max(groups(pattern) for pattern in regex) * 2`, but instead, we use
142    /// `sum(groups(pattern) for pattern in regex) * 2`. Why?
143    ///
144    /// Well, the former could be used in theory, because we don't generally
145    /// have any overlapping APIs that involve capturing groups. Therefore,
146    /// there's technically never any need to have slots set for multiple
147    /// patterns. However, this might change some day, in which case, we would
148    /// need to have slots available.
149    ///
150    /// The other reason is that during the execution of some regex engines,
151    /// there exists a point in time where multiple slots for different
152    /// patterns may be written to before knowing which pattern has matched.
153    /// Therefore, the regex engines themselves, in order to support multiple
154    /// patterns correctly, must have all slots available. If `Captures`
155    /// doesn't have all slots available, then regex engines can't write
156    /// directly into the caller provided `Captures` and must instead write
157    /// into some other storage and then copy the slots involved in the match
158    /// at the end of the search.
159    ///
160    /// So overall, at least as of the time of writing, it seems like the path
161    /// of least resistance is to just require allocating all possible slots
162    /// instead of the conceptual minimum. Another way to justify this is that
163    /// the most common case is a single pattern, in which case, there is no
164    /// inefficiency here since the 'max' and 'sum' calculations above are
165    /// equivalent in that case.
166    ///
167    /// N.B. The mapping from group index to slot is maintained by `GroupInfo`
168    /// and is considered an API guarantee. See `GroupInfo` for more details on
169    /// that mapping.
170    ///
171    /// N.B. `Option<NonMaxUsize>` has the same size as a `usize`.
172    slots: Vec<Option<NonMaxUsize>>,
173}
174
175impl Captures {
176    /// Create new storage for the offsets of all matching capturing groups.
177    ///
178    /// This routine provides the most information for matches---namely, the
179    /// spans of matching capturing groups---but also requires the regex search
180    /// routines to do the most work.
181    ///
182    /// It is unspecified behavior to use the returned `Captures` value in a
183    /// search with a `GroupInfo` other than the one that is provided to this
184    /// constructor.
185    ///
186    /// # Example
187    ///
188    /// This example shows that all capturing groups---but only ones that
189    /// participated in a match---are available to query after a match has
190    /// been found:
191    ///
192    /// ```
193    /// use regex_automata::{
194    ///     nfa::thompson::pikevm::PikeVM,
195    ///     util::captures::Captures,
196    ///     Span, Match,
197    /// };
198    ///
199    /// let re = PikeVM::new(
200    ///     r"^(?:(?P<lower>[a-z]+)|(?P<upper>[A-Z]+))(?P<digits>[0-9]+)$",
201    /// )?;
202    /// let mut cache = re.create_cache();
203    /// let mut caps = Captures::all(re.get_nfa().group_info().clone());
204    ///
205    /// re.captures(&mut cache, "ABC123", &mut caps);
206    /// assert!(caps.is_match());
207    /// assert_eq!(Some(Match::must(0, 0..6)), caps.get_match());
208    /// // The 'lower' group didn't match, so it won't have any offsets.
209    /// assert_eq!(None, caps.get_group_by_name("lower"));
210    /// assert_eq!(Some(Span::from(0..3)), caps.get_group_by_name("upper"));
211    /// assert_eq!(Some(Span::from(3..6)), caps.get_group_by_name("digits"));
212    ///
213    /// # Ok::<(), Box<dyn std::error::Error>>(())
214    /// ```
215    pub fn all(group_info: GroupInfo) -> Captures {
216        let slots = group_info.slot_len();
217        Captures { group_info, pid: None, slots: vec![None; slots] }
218    }
219
220    /// Create new storage for only the full match spans of a pattern. This
221    /// does not include any capturing group offsets.
222    ///
223    /// It is unspecified behavior to use the returned `Captures` value in a
224    /// search with a `GroupInfo` other than the one that is provided to this
225    /// constructor.
226    ///
227    /// # Example
228    ///
229    /// This example shows that only overall match offsets are reported when
230    /// this constructor is used. Accessing any capturing groups other than
231    /// the 0th will always return `None`.
232    ///
233    /// ```
234    /// use regex_automata::{
235    ///     nfa::thompson::pikevm::PikeVM,
236    ///     util::captures::Captures,
237    ///     Match,
238    /// };
239    ///
240    /// let re = PikeVM::new(
241    ///     r"^(?:(?P<lower>[a-z]+)|(?P<upper>[A-Z]+))(?P<digits>[0-9]+)$",
242    /// )?;
243    /// let mut cache = re.create_cache();
244    /// let mut caps = Captures::matches(re.get_nfa().group_info().clone());
245    ///
246    /// re.captures(&mut cache, "ABC123", &mut caps);
247    /// assert!(caps.is_match());
248    /// assert_eq!(Some(Match::must(0, 0..6)), caps.get_match());
249    /// // We didn't ask for capturing group offsets, so they aren't available.
250    /// assert_eq!(None, caps.get_group_by_name("lower"));
251    /// assert_eq!(None, caps.get_group_by_name("upper"));
252    /// assert_eq!(None, caps.get_group_by_name("digits"));
253    ///
254    /// # Ok::<(), Box<dyn std::error::Error>>(())
255    /// ```
256    pub fn matches(group_info: GroupInfo) -> Captures {
257        // This is OK because we know there are at least this many slots,
258        // and GroupInfo construction guarantees that the number of slots fits
259        // into a usize.
260        let slots = group_info.pattern_len().checked_mul(2).unwrap();
261        Captures { group_info, pid: None, slots: vec![None; slots] }
262    }
263
264    /// Create new storage for only tracking which pattern matched. No offsets
265    /// are stored at all.
266    ///
267    /// It is unspecified behavior to use the returned `Captures` value in a
268    /// search with a `GroupInfo` other than the one that is provided to this
269    /// constructor.
270    ///
271    /// # Example
272    ///
273    /// This example shows that only the pattern that matched can be accessed
274    /// from a `Captures` value created via this constructor.
275    ///
276    /// ```
277    /// use regex_automata::{
278    ///     nfa::thompson::pikevm::PikeVM,
279    ///     util::captures::Captures,
280    ///     PatternID,
281    /// };
282    ///
283    /// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?;
284    /// let mut cache = re.create_cache();
285    /// let mut caps = Captures::empty(re.get_nfa().group_info().clone());
286    ///
287    /// re.captures(&mut cache, "aABCz", &mut caps);
288    /// assert!(caps.is_match());
289    /// assert_eq!(Some(PatternID::must(0)), caps.pattern());
290    /// // We didn't ask for any offsets, so they aren't available.
291    /// assert_eq!(None, caps.get_match());
292    ///
293    /// re.captures(&mut cache, &"aABCz"[1..], &mut caps);
294    /// assert!(caps.is_match());
295    /// assert_eq!(Some(PatternID::must(1)), caps.pattern());
296    /// // We didn't ask for any offsets, so they aren't available.
297    /// assert_eq!(None, caps.get_match());
298    ///
299    /// # Ok::<(), Box<dyn std::error::Error>>(())
300    /// ```
301    pub fn empty(group_info: GroupInfo) -> Captures {
302        Captures { group_info, pid: None, slots: vec![] }
303    }
304
305    /// Returns true if and only if this capturing group represents a match.
306    ///
307    /// This is a convenience routine for `caps.pattern().is_some()`.
308    ///
309    /// # Example
310    ///
311    /// When using the PikeVM (for example), the lightest weight way of
312    /// detecting whether a match exists is to create capturing groups that
313    /// only track the ID of the pattern that match (if any):
314    ///
315    /// ```
316    /// use regex_automata::{
317    ///     nfa::thompson::pikevm::PikeVM,
318    ///     util::captures::Captures,
319    /// };
320    ///
321    /// let re = PikeVM::new(r"[a-z]+")?;
322    /// let mut cache = re.create_cache();
323    /// let mut caps = Captures::empty(re.get_nfa().group_info().clone());
324    ///
325    /// re.captures(&mut cache, "aABCz", &mut caps);
326    /// assert!(caps.is_match());
327    ///
328    /// # Ok::<(), Box<dyn std::error::Error>>(())
329    /// ```
330    #[inline]
331    pub fn is_match(&self) -> bool {
332        self.pid.is_some()
333    }
334
335    /// Returns the identifier of the pattern that matched when this
336    /// capturing group represents a match. If no match was found, then this
337    /// always returns `None`.
338    ///
339    /// This returns a pattern ID in precisely the cases in which `is_match`
340    /// returns `true`. Similarly, the pattern ID returned is always the
341    /// same pattern ID found in the `Match` returned by `get_match`.
342    ///
343    /// # Example
344    ///
345    /// When using the PikeVM (for example), the lightest weight way of
346    /// detecting which pattern matched is to create capturing groups that only
347    /// track the ID of the pattern that match (if any):
348    ///
349    /// ```
350    /// use regex_automata::{
351    ///     nfa::thompson::pikevm::PikeVM,
352    ///     util::captures::Captures,
353    ///     PatternID,
354    /// };
355    ///
356    /// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?;
357    /// let mut cache = re.create_cache();
358    /// let mut caps = Captures::empty(re.get_nfa().group_info().clone());
359    ///
360    /// re.captures(&mut cache, "ABC", &mut caps);
361    /// assert_eq!(Some(PatternID::must(1)), caps.pattern());
362    /// // Recall that offsets are only available when using a non-empty
363    /// // Captures value. So even though a match occurred, this returns None!
364    /// assert_eq!(None, caps.get_match());
365    ///
366    /// # Ok::<(), Box<dyn std::error::Error>>(())
367    /// ```
368    #[inline]
369    pub fn pattern(&self) -> Option<PatternID> {
370        self.pid
371    }
372
373    /// Returns the pattern ID and the span of the match, if one occurred.
374    ///
375    /// This always returns `None` when `Captures` was created with
376    /// [`Captures::empty`], even if a match was found.
377    ///
378    /// If this routine returns a non-`None` value, then `is_match` is
379    /// guaranteed to return `true` and `pattern` is also guaranteed to return
380    /// a non-`None` value.
381    ///
382    /// # Example
383    ///
384    /// This example shows how to get the full match from a search:
385    ///
386    /// ```
387    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
388    ///
389    /// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?;
390    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
391    ///
392    /// re.captures(&mut cache, "ABC", &mut caps);
393    /// assert_eq!(Some(Match::must(1, 0..3)), caps.get_match());
394    ///
395    /// # Ok::<(), Box<dyn std::error::Error>>(())
396    /// ```
397    #[inline]
398    pub fn get_match(&self) -> Option<Match> {
399        Some(Match::new(self.pattern()?, self.get_group(0)?))
400    }
401
402    /// Returns the span of a capturing group match corresponding to the group
403    /// index given, only if both the overall pattern matched and the capturing
404    /// group participated in that match.
405    ///
406    /// This returns `None` if `index` is invalid. `index` is valid if and only
407    /// if it's less than [`Captures::group_len`] for the matching pattern.
408    ///
409    /// This always returns `None` when `Captures` was created with
410    /// [`Captures::empty`], even if a match was found. This also always
411    /// returns `None` for any `index > 0` when `Captures` was created with
412    /// [`Captures::matches`].
413    ///
414    /// If this routine returns a non-`None` value, then `is_match` is
415    /// guaranteed to return `true`, `pattern` is guaranteed to return a
416    /// non-`None` value and `get_match` is guaranteed to return a non-`None`
417    /// value.
418    ///
419    /// By convention, the 0th capture group will always return the same
420    /// span as the span returned by `get_match`. This is because the 0th
421    /// capture group always corresponds to the entirety of the pattern's
422    /// match. (It is similarly always unnamed because it is implicit.) This
423    /// isn't necessarily true of all regex engines. For example, one can
424    /// hand-compile a [`thompson::NFA`](crate::nfa::thompson::NFA) via a
425    /// [`thompson::Builder`](crate::nfa::thompson::Builder), which isn't
426    /// technically forced to make the 0th capturing group always correspond to
427    /// the entire match.
428    ///
429    /// # Example
430    ///
431    /// This example shows how to get the capturing groups, by index, from a
432    /// match:
433    ///
434    /// ```
435    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
436    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span, Match};
437    ///
438    /// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
439    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
440    ///
441    /// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
442    /// assert_eq!(Some(Match::must(0, 0..17)), caps.get_match());
443    /// assert_eq!(Some(Span::from(0..5)), caps.get_group(1));
444    /// assert_eq!(Some(Span::from(6..17)), caps.get_group(2));
445    /// // Looking for a non-existent capturing group will return None:
446    /// assert_eq!(None, caps.get_group(3));
447    /// # // literals are too big for 32-bit usize: #1039
448    /// # #[cfg(target_pointer_width = "64")]
449    /// assert_eq!(None, caps.get_group(9944060567225171988));
450    ///
451    /// # Ok::<(), Box<dyn std::error::Error>>(())
452    /// ```
453    #[inline]
454    pub fn get_group(&self, index: usize) -> Option<Span> {
455        let pid = self.pattern()?;
456        // There's a little bit of work needed to map captures to slots in the
457        // fully general case. But in the overwhelming common case of a single
458        // pattern, we can just do some simple arithmetic.
459        let (slot_start, slot_end) = if self.group_info().pattern_len() == 1 {
460            (index.checked_mul(2)?, index.checked_mul(2)?.checked_add(1)?)
461        } else {
462            self.group_info().slots(pid, index)?
463        };
464        let start = self.slots.get(slot_start).copied()??;
465        let end = self.slots.get(slot_end).copied()??;
466        Some(Span { start: start.get(), end: end.get() })
467    }
468
469    /// Returns the span of a capturing group match corresponding to the group
470    /// name given, only if both the overall pattern matched and the capturing
471    /// group participated in that match.
472    ///
473    /// This returns `None` if `name` does not correspond to a valid capturing
474    /// group for the pattern that matched.
475    ///
476    /// This always returns `None` when `Captures` was created with
477    /// [`Captures::empty`], even if a match was found. This also always
478    /// returns `None` for any `index > 0` when `Captures` was created with
479    /// [`Captures::matches`].
480    ///
481    /// If this routine returns a non-`None` value, then `is_match` is
482    /// guaranteed to return `true`, `pattern` is guaranteed to return a
483    /// non-`None` value and `get_match` is guaranteed to return a non-`None`
484    /// value.
485    ///
486    /// # Example
487    ///
488    /// This example shows how to get the capturing groups, by name, from a
489    /// match:
490    ///
491    /// ```
492    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
493    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span, Match};
494    ///
495    /// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
496    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
497    ///
498    /// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
499    /// assert_eq!(Some(Match::must(0, 0..17)), caps.get_match());
500    /// assert_eq!(Some(Span::from(0..5)), caps.get_group_by_name("first"));
501    /// assert_eq!(Some(Span::from(6..17)), caps.get_group_by_name("last"));
502    /// // Looking for a non-existent capturing group will return None:
503    /// assert_eq!(None, caps.get_group_by_name("middle"));
504    ///
505    /// # Ok::<(), Box<dyn std::error::Error>>(())
506    /// ```
507    pub fn get_group_by_name(&self, name: &str) -> Option<Span> {
508        let index = self.group_info().to_index(self.pattern()?, name)?;
509        self.get_group(index)
510    }
511
512    /// Returns an iterator of possible spans for every capturing group in the
513    /// matching pattern.
514    ///
515    /// If this `Captures` value does not correspond to a match, then the
516    /// iterator returned yields no elements.
517    ///
518    /// Note that the iterator returned yields elements of type `Option<Span>`.
519    /// A span is present if and only if it corresponds to a capturing group
520    /// that participated in a match.
521    ///
522    /// # Example
523    ///
524    /// This example shows how to collect all capturing groups:
525    ///
526    /// ```
527    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
528    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
529    ///
530    /// let re = PikeVM::new(
531    ///     // Matches first/last names, with an optional middle name.
532    ///     r"^(?P<first>\pL+)\s+(?:(?P<middle>\pL+)\s+)?(?P<last>\pL+)$",
533    /// )?;
534    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
535    ///
536    /// re.captures(&mut cache, "Harry James Potter", &mut caps);
537    /// assert!(caps.is_match());
538    /// let groups: Vec<Option<Span>> = caps.iter().collect();
539    /// assert_eq!(groups, vec![
540    ///     Some(Span::from(0..18)),
541    ///     Some(Span::from(0..5)),
542    ///     Some(Span::from(6..11)),
543    ///     Some(Span::from(12..18)),
544    /// ]);
545    ///
546    /// # Ok::<(), Box<dyn std::error::Error>>(())
547    /// ```
548    ///
549    /// This example uses the same regex as the previous example, but with a
550    /// haystack that omits the middle name. This results in a capturing group
551    /// that is present in the elements yielded by the iterator but without a
552    /// match:
553    ///
554    /// ```
555    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
556    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
557    ///
558    /// let re = PikeVM::new(
559    ///     // Matches first/last names, with an optional middle name.
560    ///     r"^(?P<first>\pL+)\s+(?:(?P<middle>\pL+)\s+)?(?P<last>\pL+)$",
561    /// )?;
562    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
563    ///
564    /// re.captures(&mut cache, "Harry Potter", &mut caps);
565    /// assert!(caps.is_match());
566    /// let groups: Vec<Option<Span>> = caps.iter().collect();
567    /// assert_eq!(groups, vec![
568    ///     Some(Span::from(0..12)),
569    ///     Some(Span::from(0..5)),
570    ///     None,
571    ///     Some(Span::from(6..12)),
572    /// ]);
573    ///
574    /// # Ok::<(), Box<dyn std::error::Error>>(())
575    /// ```
576    pub fn iter(&self) -> CapturesPatternIter<'_> {
577        let names = self
578            .pattern()
579            .map_or(GroupInfoPatternNames::empty().enumerate(), |pid| {
580                self.group_info().pattern_names(pid).enumerate()
581            });
582        CapturesPatternIter { caps: self, names }
583    }
584
585    /// Return the total number of capturing groups for the matching pattern.
586    ///
587    /// If this `Captures` value does not correspond to a match, then this
588    /// always returns `0`.
589    ///
590    /// This always returns the same number of elements yielded by
591    /// [`Captures::iter`]. That is, the number includes capturing groups even
592    /// if they don't participate in the match.
593    ///
594    /// # Example
595    ///
596    /// This example shows how to count the total number of capturing groups
597    /// associated with a pattern. Notice that it includes groups that did not
598    /// participate in a match (just like `Captures::iter` does).
599    ///
600    /// ```
601    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
602    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
603    ///
604    /// let re = PikeVM::new(
605    ///     // Matches first/last names, with an optional middle name.
606    ///     r"^(?P<first>\pL+)\s+(?:(?P<middle>\pL+)\s+)?(?P<last>\pL+)$",
607    /// )?;
608    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
609    ///
610    /// re.captures(&mut cache, "Harry Potter", &mut caps);
611    /// assert_eq!(4, caps.group_len());
612    ///
613    /// # Ok::<(), Box<dyn std::error::Error>>(())
614    /// ```
615    pub fn group_len(&self) -> usize {
616        let pid = match self.pattern() {
617            None => return 0,
618            Some(pid) => pid,
619        };
620        self.group_info().group_len(pid)
621    }
622
623    /// Returns a reference to the underlying group info on which these
624    /// captures are based.
625    ///
626    /// The difference between `GroupInfo` and `Captures` is that the former
627    /// defines the structure of capturing groups where as the latter is what
628    /// stores the actual match information. So where as `Captures` only gives
629    /// you access to the current match, `GroupInfo` lets you query any
630    /// information about all capturing groups, even ones for patterns that
631    /// weren't involved in a match.
632    ///
633    /// Note that a `GroupInfo` uses reference counting internally, so it may
634    /// be cloned cheaply.
635    ///
636    /// # Example
637    ///
638    /// This example shows how to get all capturing group names from the
639    /// underlying `GroupInfo`. Notice that we don't even need to run a
640    /// search.
641    ///
642    /// ```
643    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
644    ///
645    /// let re = PikeVM::new_many(&[
646    ///     r"(?P<foo>a)",
647    ///     r"(a)(b)",
648    ///     r"ab",
649    ///     r"(?P<bar>a)(?P<quux>a)",
650    ///     r"(?P<foo>z)",
651    /// ])?;
652    /// let caps = re.create_captures();
653    ///
654    /// let expected = vec![
655    ///     (PatternID::must(0), 0, None),
656    ///     (PatternID::must(0), 1, Some("foo")),
657    ///     (PatternID::must(1), 0, None),
658    ///     (PatternID::must(1), 1, None),
659    ///     (PatternID::must(1), 2, None),
660    ///     (PatternID::must(2), 0, None),
661    ///     (PatternID::must(3), 0, None),
662    ///     (PatternID::must(3), 1, Some("bar")),
663    ///     (PatternID::must(3), 2, Some("quux")),
664    ///     (PatternID::must(4), 0, None),
665    ///     (PatternID::must(4), 1, Some("foo")),
666    /// ];
667    /// // We could also just use 're.get_nfa().group_info()'.
668    /// let got: Vec<(PatternID, usize, Option<&str>)> =
669    ///     caps.group_info().all_names().collect();
670    /// assert_eq!(expected, got);
671    ///
672    /// # Ok::<(), Box<dyn std::error::Error>>(())
673    /// ```
674    pub fn group_info(&self) -> &GroupInfo {
675        &self.group_info
676    }
677
678    /// Interpolates the capture references in `replacement` with the
679    /// corresponding substrings in `haystack` matched by each reference. The
680    /// interpolated string is returned.
681    ///
682    /// See the [`interpolate` module](interpolate) for documentation on the
683    /// format of the replacement string.
684    ///
685    /// # Example
686    ///
687    /// This example shows how to use interpolation, and also shows how it
688    /// can work with multi-pattern regexes.
689    ///
690    /// ```
691    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
692    ///
693    /// let re = PikeVM::new_many(&[
694    ///     r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
695    ///     r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
696    /// ])?;
697    /// let mut cache = re.create_cache();
698    /// let mut caps = re.create_captures();
699    ///
700    /// let replacement = "year=$year, month=$month, day=$day";
701    ///
702    /// // This matches the first pattern.
703    /// let hay = "On 14-03-2010, I became a Tennessee lamb.";
704    /// re.captures(&mut cache, hay, &mut caps);
705    /// let result = caps.interpolate_string(hay, replacement);
706    /// assert_eq!("year=2010, month=03, day=14", result);
707    ///
708    /// // And this matches the second pattern.
709    /// let hay = "On 2010-03-14, I became a Tennessee lamb.";
710    /// re.captures(&mut cache, hay, &mut caps);
711    /// let result = caps.interpolate_string(hay, replacement);
712    /// assert_eq!("year=2010, month=03, day=14", result);
713    ///
714    /// # Ok::<(), Box<dyn std::error::Error>>(())
715    /// ```
716    pub fn interpolate_string(
717        &self,
718        haystack: &str,
719        replacement: &str,
720    ) -> String {
721        let mut dst = String::new();
722        self.interpolate_string_into(haystack, replacement, &mut dst);
723        dst
724    }
725
726    /// Interpolates the capture references in `replacement` with the
727    /// corresponding substrings in `haystack` matched by each reference. The
728    /// interpolated string is written to `dst`.
729    ///
730    /// See the [`interpolate` module](interpolate) for documentation on the
731    /// format of the replacement string.
732    ///
733    /// # Example
734    ///
735    /// This example shows how to use interpolation, and also shows how it
736    /// can work with multi-pattern regexes.
737    ///
738    /// ```
739    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
740    ///
741    /// let re = PikeVM::new_many(&[
742    ///     r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
743    ///     r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
744    /// ])?;
745    /// let mut cache = re.create_cache();
746    /// let mut caps = re.create_captures();
747    ///
748    /// let replacement = "year=$year, month=$month, day=$day";
749    ///
750    /// // This matches the first pattern.
751    /// let hay = "On 14-03-2010, I became a Tennessee lamb.";
752    /// re.captures(&mut cache, hay, &mut caps);
753    /// let mut dst = String::new();
754    /// caps.interpolate_string_into(hay, replacement, &mut dst);
755    /// assert_eq!("year=2010, month=03, day=14", dst);
756    ///
757    /// // And this matches the second pattern.
758    /// let hay = "On 2010-03-14, I became a Tennessee lamb.";
759    /// re.captures(&mut cache, hay, &mut caps);
760    /// let mut dst = String::new();
761    /// caps.interpolate_string_into(hay, replacement, &mut dst);
762    /// assert_eq!("year=2010, month=03, day=14", dst);
763    ///
764    /// # Ok::<(), Box<dyn std::error::Error>>(())
765    /// ```
766    pub fn interpolate_string_into(
767        &self,
768        haystack: &str,
769        replacement: &str,
770        dst: &mut String,
771    ) {
772        interpolate::string(
773            replacement,
774            |index, dst| {
775                let span = match self.get_group(index) {
776                    None => return,
777                    Some(span) => span,
778                };
779                dst.push_str(&haystack[span]);
780            },
781            |name| self.group_info().to_index(self.pattern()?, name),
782            dst,
783        );
784    }
785
786    /// Interpolates the capture references in `replacement` with the
787    /// corresponding substrings in `haystack` matched by each reference. The
788    /// interpolated byte string is returned.
789    ///
790    /// See the [`interpolate` module](interpolate) for documentation on the
791    /// format of the replacement string.
792    ///
793    /// # Example
794    ///
795    /// This example shows how to use interpolation, and also shows how it
796    /// can work with multi-pattern regexes.
797    ///
798    /// ```
799    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
800    ///
801    /// let re = PikeVM::new_many(&[
802    ///     r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
803    ///     r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
804    /// ])?;
805    /// let mut cache = re.create_cache();
806    /// let mut caps = re.create_captures();
807    ///
808    /// let replacement = b"year=$year, month=$month, day=$day";
809    ///
810    /// // This matches the first pattern.
811    /// let hay = b"On 14-03-2010, I became a Tennessee lamb.";
812    /// re.captures(&mut cache, hay, &mut caps);
813    /// let result = caps.interpolate_bytes(hay, replacement);
814    /// assert_eq!(&b"year=2010, month=03, day=14"[..], result);
815    ///
816    /// // And this matches the second pattern.
817    /// let hay = b"On 2010-03-14, I became a Tennessee lamb.";
818    /// re.captures(&mut cache, hay, &mut caps);
819    /// let result = caps.interpolate_bytes(hay, replacement);
820    /// assert_eq!(&b"year=2010, month=03, day=14"[..], result);
821    ///
822    /// # Ok::<(), Box<dyn std::error::Error>>(())
823    /// ```
824    pub fn interpolate_bytes(
825        &self,
826        haystack: &[u8],
827        replacement: &[u8],
828    ) -> Vec<u8> {
829        let mut dst = vec![];
830        self.interpolate_bytes_into(haystack, replacement, &mut dst);
831        dst
832    }
833
834    /// Interpolates the capture references in `replacement` with the
835    /// corresponding substrings in `haystack` matched by each reference. The
836    /// interpolated byte string is written to `dst`.
837    ///
838    /// See the [`interpolate` module](interpolate) for documentation on the
839    /// format of the replacement string.
840    ///
841    /// # Example
842    ///
843    /// This example shows how to use interpolation, and also shows how it
844    /// can work with multi-pattern regexes.
845    ///
846    /// ```
847    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
848    ///
849    /// let re = PikeVM::new_many(&[
850    ///     r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
851    ///     r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
852    /// ])?;
853    /// let mut cache = re.create_cache();
854    /// let mut caps = re.create_captures();
855    ///
856    /// let replacement = b"year=$year, month=$month, day=$day";
857    ///
858    /// // This matches the first pattern.
859    /// let hay = b"On 14-03-2010, I became a Tennessee lamb.";
860    /// re.captures(&mut cache, hay, &mut caps);
861    /// let mut dst = vec![];
862    /// caps.interpolate_bytes_into(hay, replacement, &mut dst);
863    /// assert_eq!(&b"year=2010, month=03, day=14"[..], dst);
864    ///
865    /// // And this matches the second pattern.
866    /// let hay = b"On 2010-03-14, I became a Tennessee lamb.";
867    /// re.captures(&mut cache, hay, &mut caps);
868    /// let mut dst = vec![];
869    /// caps.interpolate_bytes_into(hay, replacement, &mut dst);
870    /// assert_eq!(&b"year=2010, month=03, day=14"[..], dst);
871    ///
872    /// # Ok::<(), Box<dyn std::error::Error>>(())
873    /// ```
874    pub fn interpolate_bytes_into(
875        &self,
876        haystack: &[u8],
877        replacement: &[u8],
878        dst: &mut Vec<u8>,
879    ) {
880        interpolate::bytes(
881            replacement,
882            |index, dst| {
883                let span = match self.get_group(index) {
884                    None => return,
885                    Some(span) => span,
886                };
887                dst.extend_from_slice(&haystack[span]);
888            },
889            |name| self.group_info().to_index(self.pattern()?, name),
890            dst,
891        );
892    }
893
894    /// This is a convenience routine for extracting the substrings
895    /// corresponding to matching capture groups in the given `haystack`. The
896    /// `haystack` should be the same substring used to find the match spans in
897    /// this `Captures` value.
898    ///
899    /// This is identical to [`Captures::extract_bytes`], except it works with
900    /// `&str` instead of `&[u8]`.
901    ///
902    /// # Panics
903    ///
904    /// This panics if the number of explicit matching groups in this
905    /// `Captures` value is less than `N`. This also panics if this `Captures`
906    /// value does not correspond to a match.
907    ///
908    /// Note that this does *not* panic if the number of explicit matching
909    /// groups is bigger than `N`. In that case, only the first `N` matching
910    /// groups are extracted.
911    ///
912    /// # Example
913    ///
914    /// ```
915    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
916    ///
917    /// let re = PikeVM::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})")?;
918    /// let mut cache = re.create_cache();
919    /// let mut caps = re.create_captures();
920    ///
921    /// let hay = "On 2010-03-14, I became a Tennessee lamb.";
922    /// re.captures(&mut cache, hay, &mut caps);
923    /// assert!(caps.is_match());
924    /// let (full, [year, month, day]) = caps.extract(hay);
925    /// assert_eq!("2010-03-14", full);
926    /// assert_eq!("2010", year);
927    /// assert_eq!("03", month);
928    /// assert_eq!("14", day);
929    ///
930    /// // We can also ask for fewer than all capture groups.
931    /// let (full, [year]) = caps.extract(hay);
932    /// assert_eq!("2010-03-14", full);
933    /// assert_eq!("2010", year);
934    ///
935    /// # Ok::<(), Box<dyn std::error::Error>>(())
936    /// ```
937    pub fn extract<'h, const N: usize>(
938        &self,
939        haystack: &'h str,
940    ) -> (&'h str, [&'h str; N]) {
941        let mut matched = self.iter().flatten();
942        let whole_match = &haystack[matched.next().expect("a match")];
943        let group_matches = [0; N].map(|_| {
944            let sp = matched.next().expect("too few matching groups");
945            &haystack[sp]
946        });
947        (whole_match, group_matches)
948    }
949
950    /// This is a convenience routine for extracting the substrings
951    /// corresponding to matching capture groups in the given `haystack`. The
952    /// `haystack` should be the same substring used to find the match spans in
953    /// this `Captures` value.
954    ///
955    /// This is identical to [`Captures::extract`], except it works with
956    /// `&[u8]` instead of `&str`.
957    ///
958    /// # Panics
959    ///
960    /// This panics if the number of explicit matching groups in this
961    /// `Captures` value is less than `N`. This also panics if this `Captures`
962    /// value does not correspond to a match.
963    ///
964    /// Note that this does *not* panic if the number of explicit matching
965    /// groups is bigger than `N`. In that case, only the first `N` matching
966    /// groups are extracted.
967    ///
968    /// # Example
969    ///
970    /// ```
971    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
972    ///
973    /// let re = PikeVM::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})")?;
974    /// let mut cache = re.create_cache();
975    /// let mut caps = re.create_captures();
976    ///
977    /// let hay = b"On 2010-03-14, I became a Tennessee lamb.";
978    /// re.captures(&mut cache, hay, &mut caps);
979    /// assert!(caps.is_match());
980    /// let (full, [year, month, day]) = caps.extract_bytes(hay);
981    /// assert_eq!(b"2010-03-14", full);
982    /// assert_eq!(b"2010", year);
983    /// assert_eq!(b"03", month);
984    /// assert_eq!(b"14", day);
985    ///
986    /// // We can also ask for fewer than all capture groups.
987    /// let (full, [year]) = caps.extract_bytes(hay);
988    /// assert_eq!(b"2010-03-14", full);
989    /// assert_eq!(b"2010", year);
990    ///
991    /// # Ok::<(), Box<dyn std::error::Error>>(())
992    /// ```
993    pub fn extract_bytes<'h, const N: usize>(
994        &self,
995        haystack: &'h [u8],
996    ) -> (&'h [u8], [&'h [u8]; N]) {
997        let mut matched = self.iter().flatten();
998        let whole_match = &haystack[matched.next().expect("a match")];
999        let group_matches = [0; N].map(|_| {
1000            let sp = matched.next().expect("too few matching groups");
1001            &haystack[sp]
1002        });
1003        (whole_match, group_matches)
1004    }
1005}
1006
1007/// Lower level "slot" oriented APIs. One does not typically need to use these
1008/// when executing a search. They are instead mostly intended for folks that
1009/// are writing their own regex engine while reusing this `Captures` type.
1010impl Captures {
1011    /// Clear this `Captures` value.
1012    ///
1013    /// After clearing, all slots inside this `Captures` value will be set to
1014    /// `None`. Similarly, any pattern ID that it was previously associated
1015    /// with (for a match) is erased.
1016    ///
1017    /// It is not usually necessary to call this routine. Namely, a `Captures`
1018    /// value only provides high level access to the capturing groups of the
1019    /// pattern that matched, and only low level access to individual slots.
1020    /// Thus, even if slots corresponding to groups that aren't associated
1021    /// with the matching pattern are set, then it won't impact the higher
1022    /// level APIs. Namely, higher level APIs like [`Captures::get_group`] will
1023    /// return `None` if no pattern ID is present, even if there are spans set
1024    /// in the underlying slots.
1025    ///
1026    /// Thus, to "clear" a `Captures` value of a match, it is usually only
1027    /// necessary to call [`Captures::set_pattern`] with `None`.
1028    ///
1029    /// # Example
1030    ///
1031    /// This example shows what happens when a `Captures` value is cleared.
1032    ///
1033    /// ```
1034    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
1035    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
1036    ///
1037    /// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
1038    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
1039    ///
1040    /// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
1041    /// assert!(caps.is_match());
1042    /// let slots: Vec<Option<usize>> =
1043    ///     caps.slots().iter().map(|s| s.map(|x| x.get())).collect();
1044    /// // Note that the following ordering is considered an API guarantee.
1045    /// assert_eq!(slots, vec![
1046    ///     Some(0),
1047    ///     Some(17),
1048    ///     Some(0),
1049    ///     Some(5),
1050    ///     Some(6),
1051    ///     Some(17),
1052    /// ]);
1053    ///
1054    /// // Now clear the slots. Everything is gone and it is no longer a match.
1055    /// caps.clear();
1056    /// assert!(!caps.is_match());
1057    /// let slots: Vec<Option<usize>> =
1058    ///     caps.slots().iter().map(|s| s.map(|x| x.get())).collect();
1059    /// assert_eq!(slots, vec![
1060    ///     None,
1061    ///     None,
1062    ///     None,
1063    ///     None,
1064    ///     None,
1065    ///     None,
1066    /// ]);
1067    ///
1068    /// # Ok::<(), Box<dyn std::error::Error>>(())
1069    /// ```
1070    #[inline]
1071    pub fn clear(&mut self) {
1072        self.pid = None;
1073        for slot in self.slots.iter_mut() {
1074            *slot = None;
1075        }
1076    }
1077
1078    /// Set the pattern on this `Captures` value.
1079    ///
1080    /// When the pattern ID is `None`, then this `Captures` value does not
1081    /// correspond to a match (`is_match` will return `false`). Otherwise, it
1082    /// corresponds to a match.
1083    ///
1084    /// This is useful in search implementations where you might want to
1085    /// initially call `set_pattern(None)` in order to avoid the cost of
1086    /// calling `clear()` if it turns out to not be necessary.
1087    ///
1088    /// # Example
1089    ///
1090    /// This example shows that `set_pattern` merely overwrites the pattern ID.
1091    /// It does not actually change the underlying slot values.
1092    ///
1093    /// ```
1094    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
1095    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
1096    ///
1097    /// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
1098    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
1099    ///
1100    /// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
1101    /// assert!(caps.is_match());
1102    /// assert!(caps.pattern().is_some());
1103    /// let slots: Vec<Option<usize>> =
1104    ///     caps.slots().iter().map(|s| s.map(|x| x.get())).collect();
1105    /// // Note that the following ordering is considered an API guarantee.
1106    /// assert_eq!(slots, vec![
1107    ///     Some(0),
1108    ///     Some(17),
1109    ///     Some(0),
1110    ///     Some(5),
1111    ///     Some(6),
1112    ///     Some(17),
1113    /// ]);
1114    ///
1115    /// // Now set the pattern to None. Note that the slot values remain.
1116    /// caps.set_pattern(None);
1117    /// assert!(!caps.is_match());
1118    /// assert!(!caps.pattern().is_some());
1119    /// let slots: Vec<Option<usize>> =
1120    ///     caps.slots().iter().map(|s| s.map(|x| x.get())).collect();
1121    /// // Note that the following ordering is considered an API guarantee.
1122    /// assert_eq!(slots, vec![
1123    ///     Some(0),
1124    ///     Some(17),
1125    ///     Some(0),
1126    ///     Some(5),
1127    ///     Some(6),
1128    ///     Some(17),
1129    /// ]);
1130    ///
1131    /// # Ok::<(), Box<dyn std::error::Error>>(())
1132    /// ```
1133    #[inline]
1134    pub fn set_pattern(&mut self, pid: Option<PatternID>) {
1135        self.pid = pid;
1136    }
1137
1138    /// Returns the underlying slots, where each slot stores a single offset.
1139    ///
1140    /// Every matching capturing group generally corresponds to two slots: one
1141    /// slot for the starting position and another for the ending position.
1142    /// Typically, either both are present or neither are. (The weasel word
1143    /// "typically" is used here because it really depends on the regex engine
1144    /// implementation. Every sensible regex engine likely adheres to this
1145    /// invariant, and every regex engine in this crate is sensible.)
1146    ///
1147    /// Generally speaking, callers should prefer to use higher level routines
1148    /// like [`Captures::get_match`] or [`Captures::get_group`].
1149    ///
1150    /// An important note here is that a regex engine may not reset all of the
1151    /// slots to `None` values when no match occurs, or even when a match of
1152    /// a different pattern occurs. But this depends on how the regex engine
1153    /// implementation deals with slots.
1154    ///
1155    /// # Example
1156    ///
1157    /// This example shows how to get the underlying slots from a regex match.
1158    ///
1159    /// ```
1160    /// use regex_automata::{
1161    ///     nfa::thompson::pikevm::PikeVM,
1162    ///     util::primitives::{PatternID, NonMaxUsize},
1163    /// };
1164    ///
1165    /// let re = PikeVM::new_many(&[
1166    ///     r"[a-z]+",
1167    ///     r"[0-9]+",
1168    /// ])?;
1169    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
1170    ///
1171    /// re.captures(&mut cache, "123", &mut caps);
1172    /// assert_eq!(Some(PatternID::must(1)), caps.pattern());
1173    /// // Note that the only guarantee we have here is that slots 2 and 3
1174    /// // are set to correct values. The contents of the first two slots are
1175    /// // unspecified since the 0th pattern did not match.
1176    /// let expected = &[
1177    ///     None,
1178    ///     None,
1179    ///     NonMaxUsize::new(0),
1180    ///     NonMaxUsize::new(3),
1181    /// ];
1182    /// assert_eq!(expected, caps.slots());
1183    ///
1184    /// # Ok::<(), Box<dyn std::error::Error>>(())
1185    /// ```
1186    #[inline]
1187    pub fn slots(&self) -> &[Option<NonMaxUsize>] {
1188        &self.slots
1189    }
1190
1191    /// Returns the underlying slots as a mutable slice, where each slot stores
1192    /// a single offset.
1193    ///
1194    /// This tends to be most useful for regex engine implementations for
1195    /// writing offsets for matching capturing groups to slots.
1196    ///
1197    /// See [`Captures::slots`] for more information about slots.
1198    #[inline]
1199    pub fn slots_mut(&mut self) -> &mut [Option<NonMaxUsize>] {
1200        &mut self.slots
1201    }
1202}
1203
1204impl core::fmt::Debug for Captures {
1205    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1206        let mut dstruct = f.debug_struct("Captures");
1207        dstruct.field("pid", &self.pid);
1208        if let Some(pid) = self.pid {
1209            dstruct.field("spans", &CapturesDebugMap { pid, caps: self });
1210        }
1211        dstruct.finish()
1212    }
1213}
1214
1215/// A little helper type to provide a nice map-like debug representation for
1216/// our capturing group spans.
1217struct CapturesDebugMap<'a> {
1218    pid: PatternID,
1219    caps: &'a Captures,
1220}
1221
1222impl<'a> core::fmt::Debug for CapturesDebugMap<'a> {
1223    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1224        struct Key<'a>(usize, Option<&'a str>);
1225
1226        impl<'a> core::fmt::Debug for Key<'a> {
1227            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
1228                write!(f, "{}", self.0)?;
1229                if let Some(name) = self.1 {
1230                    write!(f, "/{name:?}")?;
1231                }
1232                Ok(())
1233            }
1234        }
1235
1236        let mut map = f.debug_map();
1237        let names = self.caps.group_info().pattern_names(self.pid);
1238        for (group_index, maybe_name) in names.enumerate() {
1239            let key = Key(group_index, maybe_name);
1240            match self.caps.get_group(group_index) {
1241                None => map.entry(&key, &None::<()>),
1242                Some(span) => map.entry(&key, &span),
1243            };
1244        }
1245        map.finish()
1246    }
1247}
1248
1249/// An iterator over all capturing groups in a `Captures` value.
1250///
1251/// This iterator includes capturing groups that did not participate in a
1252/// match. See the [`Captures::iter`] method documentation for more details
1253/// and examples.
1254///
1255/// The lifetime parameter `'a` refers to the lifetime of the underlying
1256/// `Captures` value.
1257#[derive(Clone, Debug)]
1258pub struct CapturesPatternIter<'a> {
1259    caps: &'a Captures,
1260    names: core::iter::Enumerate<GroupInfoPatternNames<'a>>,
1261}
1262
1263impl<'a> Iterator for CapturesPatternIter<'a> {
1264    type Item = Option<Span>;
1265
1266    fn next(&mut self) -> Option<Option<Span>> {
1267        let (group_index, _) = self.names.next()?;
1268        Some(self.caps.get_group(group_index))
1269    }
1270
1271    fn size_hint(&self) -> (usize, Option<usize>) {
1272        self.names.size_hint()
1273    }
1274
1275    fn count(self) -> usize {
1276        self.names.count()
1277    }
1278}
1279
1280impl<'a> ExactSizeIterator for CapturesPatternIter<'a> {}
1281impl<'a> core::iter::FusedIterator for CapturesPatternIter<'a> {}
1282
1283/// Represents information about capturing groups in a compiled regex.
1284///
1285/// The information encapsulated by this type consists of the following. For
1286/// each pattern:
1287///
1288/// * A map from every capture group name to its corresponding capture group
1289/// index.
1290/// * A map from every capture group index to its corresponding capture group
1291/// name.
1292/// * A map from capture group index to its corresponding slot index. A slot
1293/// refers to one half of a capturing group. That is, a capture slot is either
1294/// the start or end of a capturing group. A slot is usually the mechanism
1295/// by which a regex engine records offsets for each capturing group during a
1296/// search.
1297///
1298/// A `GroupInfo` uses reference counting internally and is thus cheap to
1299/// clone.
1300///
1301/// # Mapping from capture groups to slots
1302///
1303/// One of the main responsibilities of a `GroupInfo` is to build a mapping
1304/// from `(PatternID, u32)` (where the `u32` is a capture index) to something
1305/// called a "slot." As mentioned above, a slot refers to one half of a
1306/// capturing group. Both combined provide the start and end offsets of
1307/// a capturing group that participated in a match.
1308///
1309/// **The mapping between group indices and slots is an API guarantee.** That
1310/// is, the mapping won't change within a semver compatible release.
1311///
1312/// Slots exist primarily because this is a convenient mechanism by which
1313/// regex engines report group offsets at search time. For example, the
1314/// [`nfa::thompson::State::Capture`](crate::nfa::thompson::State::Capture)
1315/// NFA state includes the slot index. When a regex engine transitions through
1316/// this state, it will likely use the slot index to write the current haystack
1317/// offset to some region of memory. When a match is found, those slots are
1318/// then reported to the caller, typically via a convenient abstraction like a
1319/// [`Captures`] value.
1320///
1321/// Because this crate provides first class support for multi-pattern regexes,
1322/// and because of some performance related reasons, the mapping between
1323/// capturing groups and slots is a little complex. However, in the case of a
1324/// single pattern, the mapping can be described very simply: for all capture
1325/// group indices `i`, its corresponding slots are at `i * 2` and `i * 2 + 1`.
1326/// Notice that the pattern ID isn't involved at all here, because it only
1327/// applies to a single-pattern regex, it is therefore always `0`.
1328///
1329/// In the multi-pattern case, the mapping is a bit more complicated. To talk
1330/// about it, we must define what we mean by "implicit" vs "explicit"
1331/// capturing groups:
1332///
1333/// * An **implicit** capturing group refers to the capturing group that is
1334/// present for every pattern automatically, and corresponds to the overall
1335/// match of a pattern. Every pattern has precisely one implicit capturing
1336/// group. It is always unnamed and it always corresponds to the capture group
1337/// index `0`.
1338/// * An **explicit** capturing group refers to any capturing group that
1339/// appears in the concrete syntax of the pattern. (Or, if an NFA was hand
1340/// built without any concrete syntax, it refers to any capturing group with an
1341/// index greater than `0`.)
1342///
1343/// Some examples:
1344///
1345/// * `\w+` has one implicit capturing group and zero explicit capturing
1346/// groups.
1347/// * `(\w+)` has one implicit group and one explicit group.
1348/// * `foo(\d+)(?:\pL+)(\d+)` has one implicit group and two explicit groups.
1349///
1350/// Turning back to the slot mapping, we can now state it as follows:
1351///
1352/// * Given a pattern ID `pid`, the slots for its implicit group are always
1353/// at `pid * 2` and `pid * 2 + 1`.
1354/// * Given a pattern ID `0`, the slots for its explicit groups start
1355/// at `group_info.pattern_len() * 2`.
1356/// * Given a pattern ID `pid > 0`, the slots for its explicit groups start
1357/// immediately following where the slots for the explicit groups of `pid - 1`
1358/// end.
1359///
1360/// In particular, while there is a concrete formula one can use to determine
1361/// where the slots for the implicit group of any pattern are, there is no
1362/// general formula for determining where the slots for explicit capturing
1363/// groups are. This is because each pattern can contain a different number
1364/// of groups.
1365///
1366/// The intended way of getting the slots for a particular capturing group
1367/// (whether implicit or explicit) is via the [`GroupInfo::slot`] or
1368/// [`GroupInfo::slots`] method.
1369///
1370/// See below for a concrete example of how capturing groups get mapped to
1371/// slots.
1372///
1373/// # Example
1374///
1375/// This example shows how to build a new `GroupInfo` and query it for
1376/// information.
1377///
1378/// ```
1379/// use regex_automata::util::{captures::GroupInfo, primitives::PatternID};
1380///
1381/// let info = GroupInfo::new(vec![
1382///     vec![None, Some("foo")],
1383///     vec![None],
1384///     vec![None, None, None, Some("bar"), None],
1385///     vec![None, None, Some("foo")],
1386/// ])?;
1387/// // The number of patterns being tracked.
1388/// assert_eq!(4, info.pattern_len());
1389/// // We can query the number of groups for any pattern.
1390/// assert_eq!(2, info.group_len(PatternID::must(0)));
1391/// assert_eq!(1, info.group_len(PatternID::must(1)));
1392/// assert_eq!(5, info.group_len(PatternID::must(2)));
1393/// assert_eq!(3, info.group_len(PatternID::must(3)));
1394/// // An invalid pattern always has zero groups.
1395/// assert_eq!(0, info.group_len(PatternID::must(999)));
1396/// // 2 slots per group
1397/// assert_eq!(22, info.slot_len());
1398///
1399/// // We can map a group index for a particular pattern to its name, if
1400/// // one exists.
1401/// assert_eq!(Some("foo"), info.to_name(PatternID::must(3), 2));
1402/// assert_eq!(None, info.to_name(PatternID::must(2), 4));
1403/// // Or map a name to its group index.
1404/// assert_eq!(Some(1), info.to_index(PatternID::must(0), "foo"));
1405/// assert_eq!(Some(2), info.to_index(PatternID::must(3), "foo"));
1406///
1407/// # Ok::<(), Box<dyn std::error::Error>>(())
1408/// ```
1409///
1410/// # Example: mapping from capture groups to slots
1411///
1412/// This example shows the specific mapping from capture group indices for
1413/// each pattern to their corresponding slots. The slot values shown in this
1414/// example are considered an API guarantee.
1415///
1416/// ```
1417/// use regex_automata::util::{captures::GroupInfo, primitives::PatternID};
1418///
1419/// let info = GroupInfo::new(vec![
1420///     vec![None, Some("foo")],
1421///     vec![None],
1422///     vec![None, None, None, Some("bar"), None],
1423///     vec![None, None, Some("foo")],
1424/// ])?;
1425///
1426/// // We first show the slots for each pattern's implicit group.
1427/// assert_eq!(Some((0, 1)), info.slots(PatternID::must(0), 0));
1428/// assert_eq!(Some((2, 3)), info.slots(PatternID::must(1), 0));
1429/// assert_eq!(Some((4, 5)), info.slots(PatternID::must(2), 0));
1430/// assert_eq!(Some((6, 7)), info.slots(PatternID::must(3), 0));
1431///
1432/// // And now we show the slots for each pattern's explicit group.
1433/// assert_eq!(Some((8, 9)), info.slots(PatternID::must(0), 1));
1434/// assert_eq!(Some((10, 11)), info.slots(PatternID::must(2), 1));
1435/// assert_eq!(Some((12, 13)), info.slots(PatternID::must(2), 2));
1436/// assert_eq!(Some((14, 15)), info.slots(PatternID::must(2), 3));
1437/// assert_eq!(Some((16, 17)), info.slots(PatternID::must(2), 4));
1438/// assert_eq!(Some((18, 19)), info.slots(PatternID::must(3), 1));
1439/// assert_eq!(Some((20, 21)), info.slots(PatternID::must(3), 2));
1440///
1441/// // Asking for the slots for an invalid pattern ID or even for an invalid
1442/// // group index for a specific pattern will return None. So for example,
1443/// // you're guaranteed to not get the slots for a different pattern than the
1444/// // one requested.
1445/// assert_eq!(None, info.slots(PatternID::must(5), 0));
1446/// assert_eq!(None, info.slots(PatternID::must(1), 1));
1447///
1448/// # Ok::<(), Box<dyn std::error::Error>>(())
1449/// ```
1450#[derive(Clone, Debug, Default)]
1451pub struct GroupInfo(Arc<GroupInfoInner>);
1452
1453impl GroupInfo {
1454    /// Creates a new group info from a sequence of patterns, where each
1455    /// sequence of patterns yields a sequence of possible group names. The
1456    /// index of each pattern in the sequence corresponds to its `PatternID`,
1457    /// and the index of each group in each pattern's sequence corresponds to
1458    /// its corresponding group index.
1459    ///
1460    /// While this constructor is very generic and therefore perhaps hard to
1461    /// chew on, an example of a valid concrete type that can be passed to
1462    /// this constructor is `Vec<Vec<Option<String>>>`. The outer `Vec`
1463    /// corresponds to the patterns, i.e., one `Vec<Option<String>>` per
1464    /// pattern. The inner `Vec` corresponds to the capturing groups for
1465    /// each pattern. The `Option<String>` corresponds to the name of the
1466    /// capturing group, if present.
1467    ///
1468    /// It is legal to pass an empty iterator to this constructor. It will
1469    /// return an empty group info with zero slots. An empty group info is
1470    /// useful for cases where you have no patterns or for cases where slots
1471    /// aren't being used at all (e.g., for most DFAs in this crate).
1472    ///
1473    /// # Errors
1474    ///
1475    /// This constructor returns an error if the given capturing groups are
1476    /// invalid in some way. Those reasons include, but are not necessarily
1477    /// limited to:
1478    ///
1479    /// * Too many patterns (i.e., `PatternID` would overflow).
1480    /// * Too many capturing groups (e.g., `u32` would overflow).
1481    /// * A pattern is given that has no capturing groups. (All patterns must
1482    /// have at least an implicit capturing group at index `0`.)
1483    /// * The capturing group at index `0` has a name. It must be unnamed.
1484    /// * There are duplicate capturing group names within the same pattern.
1485    /// (Multiple capturing groups with the same name may exist, but they
1486    /// must be in different patterns.)
1487    ///
1488    /// An example below shows how to trigger some of the above error
1489    /// conditions.
1490    ///
1491    /// # Example
1492    ///
1493    /// This example shows how to build a new `GroupInfo` and query it for
1494    /// information.
1495    ///
1496    /// ```
1497    /// use regex_automata::util::captures::GroupInfo;
1498    ///
1499    /// let info = GroupInfo::new(vec![
1500    ///     vec![None, Some("foo")],
1501    ///     vec![None],
1502    ///     vec![None, None, None, Some("bar"), None],
1503    ///     vec![None, None, Some("foo")],
1504    /// ])?;
1505    /// // The number of patterns being tracked.
1506    /// assert_eq!(4, info.pattern_len());
1507    /// // 2 slots per group
1508    /// assert_eq!(22, info.slot_len());
1509    ///
1510    /// # Ok::<(), Box<dyn std::error::Error>>(())
1511    /// ```
1512    ///
1513    /// # Example: empty `GroupInfo`
1514    ///
1515    /// This example shows how to build a new `GroupInfo` and query it for
1516    /// information.
1517    ///
1518    /// ```
1519    /// use regex_automata::util::captures::GroupInfo;
1520    ///
1521    /// let info = GroupInfo::empty();
1522    /// // Everything is zero.
1523    /// assert_eq!(0, info.pattern_len());
1524    /// assert_eq!(0, info.slot_len());
1525    ///
1526    /// # Ok::<(), Box<dyn std::error::Error>>(())
1527    /// ```
1528    ///
1529    /// # Example: error conditions
1530    ///
1531    /// This example shows how to provoke some of the ways in which building
1532    /// a `GroupInfo` can fail.
1533    ///
1534    /// ```
1535    /// use regex_automata::util::captures::GroupInfo;
1536    ///
1537    /// // Either the group info is empty, or all patterns must have at least
1538    /// // one capturing group.
1539    /// assert!(GroupInfo::new(vec![
1540    ///     vec![None, Some("a")], // ok
1541    ///     vec![None], // ok
1542    ///     vec![], // not ok
1543    /// ]).is_err());
1544    /// // Note that building an empty group info is OK.
1545    /// assert!(GroupInfo::new(Vec::<Vec<Option<String>>>::new()).is_ok());
1546    ///
1547    /// // The first group in each pattern must correspond to an implicit
1548    /// // anonymous group. i.e., One that is not named. By convention, this
1549    /// // group corresponds to the overall match of a regex. Every other group
1550    /// // in a pattern is explicit and optional.
1551    /// assert!(GroupInfo::new(vec![vec![Some("foo")]]).is_err());
1552    ///
1553    /// // There must not be duplicate group names within the same pattern.
1554    /// assert!(GroupInfo::new(vec![
1555    ///     vec![None, Some("foo"), Some("foo")],
1556    /// ]).is_err());
1557    /// // But duplicate names across distinct patterns is OK.
1558    /// assert!(GroupInfo::new(vec![
1559    ///     vec![None, Some("foo")],
1560    ///     vec![None, Some("foo")],
1561    /// ]).is_ok());
1562    ///
1563    /// # Ok::<(), Box<dyn std::error::Error>>(())
1564    /// ```
1565    ///
1566    /// There are other ways for building a `GroupInfo` to fail but are
1567    /// difficult to show. For example, if the number of patterns given would
1568    /// overflow `PatternID`.
1569    pub fn new<P, G, N>(pattern_groups: P) -> Result<GroupInfo, GroupInfoError>
1570    where
1571        P: IntoIterator<Item = G>,
1572        G: IntoIterator<Item = Option<N>>,
1573        N: AsRef<str>,
1574    {
1575        let mut group_info = GroupInfoInner {
1576            slot_ranges: vec![],
1577            name_to_index: vec![],
1578            index_to_name: vec![],
1579            memory_extra: 0,
1580        };
1581        for (pattern_index, groups) in pattern_groups.into_iter().enumerate() {
1582            // If we can't convert the pattern index to an ID, then the caller
1583            // tried to build capture info for too many patterns.
1584            let pid = PatternID::new(pattern_index)
1585                .map_err(GroupInfoError::too_many_patterns)?;
1586
1587            let mut groups_iter = groups.into_iter().enumerate();
1588            match groups_iter.next() {
1589                None => return Err(GroupInfoError::missing_groups(pid)),
1590                Some((_, Some(_))) => {
1591                    return Err(GroupInfoError::first_must_be_unnamed(pid))
1592                }
1593                Some((_, None)) => {}
1594            }
1595            group_info.add_first_group(pid);
1596            // Now iterate over the rest, which correspond to all of the
1597            // (conventionally) explicit capture groups in a regex pattern.
1598            for (group_index, maybe_name) in groups_iter {
1599                // Just like for patterns, if the group index can't be
1600                // converted to a "small" index, then the caller has given too
1601                // many groups for a particular pattern.
1602                let group = SmallIndex::new(group_index).map_err(|_| {
1603                    GroupInfoError::too_many_groups(pid, group_index)
1604                })?;
1605                group_info.add_explicit_group(pid, group, maybe_name)?;
1606            }
1607        }
1608        group_info.fixup_slot_ranges()?;
1609        group_info.slot_ranges.shrink_to_fit();
1610        group_info.name_to_index.shrink_to_fit();
1611        group_info.index_to_name.shrink_to_fit();
1612        Ok(GroupInfo(Arc::new(group_info)))
1613    }
1614
1615    /// This creates an empty `GroupInfo`.
1616    ///
1617    /// This is a convenience routine for calling `GroupInfo::new` with an
1618    /// iterator that yields no elements.
1619    ///
1620    /// # Example
1621    ///
1622    /// This example shows how to build a new empty `GroupInfo` and query it
1623    /// for information.
1624    ///
1625    /// ```
1626    /// use regex_automata::util::captures::GroupInfo;
1627    ///
1628    /// let info = GroupInfo::empty();
1629    /// // Everything is zero.
1630    /// assert_eq!(0, info.pattern_len());
1631    /// assert_eq!(0, info.all_group_len());
1632    /// assert_eq!(0, info.slot_len());
1633    ///
1634    /// # Ok::<(), Box<dyn std::error::Error>>(())
1635    /// ```
1636    pub fn empty() -> GroupInfo {
1637        GroupInfo::new(core::iter::empty::<[Option<&str>; 0]>())
1638            .expect("empty group info is always valid")
1639    }
1640
1641    /// Return the capture group index corresponding to the given name in the
1642    /// given pattern. If no such capture group name exists in the given
1643    /// pattern, then this returns `None`.
1644    ///
1645    /// If the given pattern ID is invalid, then this returns `None`.
1646    ///
1647    /// This also returns `None` for all inputs if these captures are empty
1648    /// (e.g., built from an empty [`GroupInfo`]). To check whether captures
1649    /// are present for a specific pattern, use [`GroupInfo::group_len`].
1650    ///
1651    /// # Example
1652    ///
1653    /// This example shows how to find the capture index for the given pattern
1654    /// and group name.
1655    ///
1656    /// Remember that capture indices are relative to the pattern, such that
1657    /// the same capture index value may refer to different capturing groups
1658    /// for distinct patterns.
1659    ///
1660    /// ```
1661    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
1662    /// use regex_automata::{nfa::thompson::NFA, PatternID};
1663    ///
1664    /// let (pid0, pid1) = (PatternID::must(0), PatternID::must(1));
1665    ///
1666    /// let nfa = NFA::new_many(&[
1667    ///     r"a(?P<quux>\w+)z(?P<foo>\s+)",
1668    ///     r"a(?P<foo>\d+)z",
1669    /// ])?;
1670    /// let groups = nfa.group_info();
1671    /// assert_eq!(Some(2), groups.to_index(pid0, "foo"));
1672    /// // Recall that capture index 0 is always unnamed and refers to the
1673    /// // entire pattern. So the first capturing group present in the pattern
1674    /// // itself always starts at index 1.
1675    /// assert_eq!(Some(1), groups.to_index(pid1, "foo"));
1676    ///
1677    /// // And if a name does not exist for a particular pattern, None is
1678    /// // returned.
1679    /// assert!(groups.to_index(pid0, "quux").is_some());
1680    /// assert!(groups.to_index(pid1, "quux").is_none());
1681    ///
1682    /// # Ok::<(), Box<dyn std::error::Error>>(())
1683    /// ```
1684    #[inline]
1685    pub fn to_index(&self, pid: PatternID, name: &str) -> Option<usize> {
1686        let indices = self.0.name_to_index.get(pid.as_usize())?;
1687        indices.get(name).cloned().map(|i| i.as_usize())
1688    }
1689
1690    /// Return the capture name for the given index and given pattern. If the
1691    /// corresponding group does not have a name, then this returns `None`.
1692    ///
1693    /// If the pattern ID is invalid, then this returns `None`.
1694    ///
1695    /// If the group index is invalid for the given pattern, then this returns
1696    /// `None`. A group `index` is valid for a pattern `pid` in an `nfa` if and
1697    /// only if `index < nfa.pattern_capture_len(pid)`.
1698    ///
1699    /// This also returns `None` for all inputs if these captures are empty
1700    /// (e.g., built from an empty [`GroupInfo`]). To check whether captures
1701    /// are present for a specific pattern, use [`GroupInfo::group_len`].
1702    ///
1703    /// # Example
1704    ///
1705    /// This example shows how to find the capture group name for the given
1706    /// pattern and group index.
1707    ///
1708    /// ```
1709    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
1710    /// use regex_automata::{nfa::thompson::NFA, PatternID};
1711    ///
1712    /// let (pid0, pid1) = (PatternID::must(0), PatternID::must(1));
1713    ///
1714    /// let nfa = NFA::new_many(&[
1715    ///     r"a(?P<foo>\w+)z(\s+)x(\d+)",
1716    ///     r"a(\d+)z(?P<foo>\s+)",
1717    /// ])?;
1718    /// let groups = nfa.group_info();
1719    /// assert_eq!(None, groups.to_name(pid0, 0));
1720    /// assert_eq!(Some("foo"), groups.to_name(pid0, 1));
1721    /// assert_eq!(None, groups.to_name(pid0, 2));
1722    /// assert_eq!(None, groups.to_name(pid0, 3));
1723    ///
1724    /// assert_eq!(None, groups.to_name(pid1, 0));
1725    /// assert_eq!(None, groups.to_name(pid1, 1));
1726    /// assert_eq!(Some("foo"), groups.to_name(pid1, 2));
1727    /// // '3' is not a valid capture index for the second pattern.
1728    /// assert_eq!(None, groups.to_name(pid1, 3));
1729    ///
1730    /// # Ok::<(), Box<dyn std::error::Error>>(())
1731    /// ```
1732    #[inline]
1733    pub fn to_name(&self, pid: PatternID, group_index: usize) -> Option<&str> {
1734        let pattern_names = self.0.index_to_name.get(pid.as_usize())?;
1735        pattern_names.get(group_index)?.as_deref()
1736    }
1737
1738    /// Return an iterator of all capture groups and their names (if present)
1739    /// for a particular pattern.
1740    ///
1741    /// If the given pattern ID is invalid or if this `GroupInfo` is empty,
1742    /// then the iterator yields no elements.
1743    ///
1744    /// The number of elements yielded by this iterator is always equal to
1745    /// the result of calling [`GroupInfo::group_len`] with the same
1746    /// `PatternID`.
1747    ///
1748    /// # Example
1749    ///
1750    /// This example shows how to get a list of all capture group names for
1751    /// a particular pattern.
1752    ///
1753    /// ```
1754    /// use regex_automata::{nfa::thompson::NFA, PatternID};
1755    ///
1756    /// let nfa = NFA::new(r"(a)(?P<foo>b)(c)(d)(?P<bar>e)")?;
1757    /// // The first is the implicit group that is always unnamed. The next
1758    /// // 5 groups are the explicit groups found in the concrete syntax above.
1759    /// let expected = vec![None, None, Some("foo"), None, None, Some("bar")];
1760    /// let got: Vec<Option<&str>> =
1761    ///     nfa.group_info().pattern_names(PatternID::ZERO).collect();
1762    /// assert_eq!(expected, got);
1763    ///
1764    /// // Using an invalid pattern ID will result in nothing yielded.
1765    /// let got = nfa.group_info().pattern_names(PatternID::must(999)).count();
1766    /// assert_eq!(0, got);
1767    ///
1768    /// # Ok::<(), Box<dyn std::error::Error>>(())
1769    /// ```
1770    #[inline]
1771    pub fn pattern_names(&self, pid: PatternID) -> GroupInfoPatternNames<'_> {
1772        GroupInfoPatternNames {
1773            it: self
1774                .0
1775                .index_to_name
1776                .get(pid.as_usize())
1777                .map(|indices| indices.iter())
1778                .unwrap_or([].iter()),
1779        }
1780    }
1781
1782    /// Return an iterator of all capture groups for all patterns supported by
1783    /// this `GroupInfo`. Each item yielded is a triple of the group's pattern
1784    /// ID, index in the pattern and the group's name, if present.
1785    ///
1786    /// # Example
1787    ///
1788    /// This example shows how to get a list of all capture groups found in
1789    /// one NFA, potentially spanning multiple patterns.
1790    ///
1791    /// ```
1792    /// use regex_automata::{nfa::thompson::NFA, PatternID};
1793    ///
1794    /// let nfa = NFA::new_many(&[
1795    ///     r"(?P<foo>a)",
1796    ///     r"a",
1797    ///     r"(a)",
1798    /// ])?;
1799    /// let expected = vec![
1800    ///     (PatternID::must(0), 0, None),
1801    ///     (PatternID::must(0), 1, Some("foo")),
1802    ///     (PatternID::must(1), 0, None),
1803    ///     (PatternID::must(2), 0, None),
1804    ///     (PatternID::must(2), 1, None),
1805    /// ];
1806    /// let got: Vec<(PatternID, usize, Option<&str>)> =
1807    ///     nfa.group_info().all_names().collect();
1808    /// assert_eq!(expected, got);
1809    ///
1810    /// # Ok::<(), Box<dyn std::error::Error>>(())
1811    /// ```
1812    ///
1813    /// Unlike other capturing group related routines, this routine doesn't
1814    /// panic even if captures aren't enabled on this NFA:
1815    ///
1816    /// ```
1817    /// use regex_automata::nfa::thompson::{NFA, WhichCaptures};
1818    ///
1819    /// let nfa = NFA::compiler()
1820    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
1821    ///     .build_many(&[
1822    ///         r"(?P<foo>a)",
1823    ///         r"a",
1824    ///         r"(a)",
1825    ///     ])?;
1826    /// // When captures aren't enabled, there's nothing to return.
1827    /// assert_eq!(0, nfa.group_info().all_names().count());
1828    ///
1829    /// # Ok::<(), Box<dyn std::error::Error>>(())
1830    /// ```
1831    #[inline]
1832    pub fn all_names(&self) -> GroupInfoAllNames<'_> {
1833        GroupInfoAllNames {
1834            group_info: self,
1835            pids: PatternID::iter(self.pattern_len()),
1836            current_pid: None,
1837            names: None,
1838        }
1839    }
1840
1841    /// Returns the starting and ending slot corresponding to the given
1842    /// capturing group for the given pattern. The ending slot is always one
1843    /// more than the starting slot returned.
1844    ///
1845    /// Note that this is like [`GroupInfo::slot`], except that it also returns
1846    /// the ending slot value for convenience.
1847    ///
1848    /// If either the pattern ID or the capture index is invalid, then this
1849    /// returns None.
1850    ///
1851    /// # Example
1852    ///
1853    /// This example shows that the starting slots for the first capturing
1854    /// group of each pattern are distinct.
1855    ///
1856    /// ```
1857    /// use regex_automata::{nfa::thompson::NFA, PatternID};
1858    ///
1859    /// let nfa = NFA::new_many(&["a", "b"])?;
1860    /// assert_ne!(
1861    ///     nfa.group_info().slots(PatternID::must(0), 0),
1862    ///     nfa.group_info().slots(PatternID::must(1), 0),
1863    /// );
1864    ///
1865    /// // Also, the start and end slot values are never equivalent.
1866    /// let (start, end) = nfa.group_info().slots(PatternID::ZERO, 0).unwrap();
1867    /// assert_ne!(start, end);
1868    ///
1869    /// # Ok::<(), Box<dyn std::error::Error>>(())
1870    /// ```
1871    #[inline]
1872    pub fn slots(
1873        &self,
1874        pid: PatternID,
1875        group_index: usize,
1876    ) -> Option<(usize, usize)> {
1877        // Since 'slot' only even returns valid starting slots, we know that
1878        // there must also be an end slot and that end slot is always one more
1879        // than the start slot.
1880        self.slot(pid, group_index).map(|start| (start, start + 1))
1881    }
1882
1883    /// Returns the starting slot corresponding to the given capturing group
1884    /// for the given pattern. The ending slot is always one more than the
1885    /// value returned.
1886    ///
1887    /// If either the pattern ID or the capture index is invalid, then this
1888    /// returns None.
1889    ///
1890    /// # Example
1891    ///
1892    /// This example shows that the starting slots for the first capturing
1893    /// group of each pattern are distinct.
1894    ///
1895    /// ```
1896    /// use regex_automata::{nfa::thompson::NFA, PatternID};
1897    ///
1898    /// let nfa = NFA::new_many(&["a", "b"])?;
1899    /// assert_ne!(
1900    ///     nfa.group_info().slot(PatternID::must(0), 0),
1901    ///     nfa.group_info().slot(PatternID::must(1), 0),
1902    /// );
1903    ///
1904    /// # Ok::<(), Box<dyn std::error::Error>>(())
1905    /// ```
1906    #[inline]
1907    pub fn slot(&self, pid: PatternID, group_index: usize) -> Option<usize> {
1908        if group_index >= self.group_len(pid) {
1909            return None;
1910        }
1911        // At this point, we know that 'pid' refers to a real pattern and that
1912        // 'group_index' refers to a real group. We therefore also know that
1913        // the pattern and group can be combined to return a correct slot.
1914        // That's why we don't need to use checked arithmetic below.
1915        if group_index == 0 {
1916            Some(pid.as_usize() * 2)
1917        } else {
1918            // As above, we don't need to check that our slot is less than the
1919            // end of our range since we already know the group index is a
1920            // valid index for the given pattern.
1921            let (start, _) = self.0.slot_ranges[pid];
1922            Some(start.as_usize() + ((group_index - 1) * 2))
1923        }
1924    }
1925
1926    /// Returns the total number of patterns in this `GroupInfo`.
1927    ///
1928    /// This may return zero if the `GroupInfo` was constructed with no
1929    /// patterns.
1930    ///
1931    /// This is guaranteed to be no bigger than [`PatternID::LIMIT`] because
1932    /// `GroupInfo` construction will fail if too many patterns are added.
1933    ///
1934    /// # Example
1935    ///
1936    /// ```
1937    /// use regex_automata::nfa::thompson::NFA;
1938    ///
1939    /// let nfa = NFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?;
1940    /// assert_eq!(3, nfa.group_info().pattern_len());
1941    ///
1942    /// let nfa = NFA::never_match();
1943    /// assert_eq!(0, nfa.group_info().pattern_len());
1944    ///
1945    /// let nfa = NFA::always_match();
1946    /// assert_eq!(1, nfa.group_info().pattern_len());
1947    ///
1948    /// # Ok::<(), Box<dyn std::error::Error>>(())
1949    /// ```
1950    #[inline]
1951    pub fn pattern_len(&self) -> usize {
1952        self.0.pattern_len()
1953    }
1954
1955    /// Return the number of capture groups in a pattern.
1956    ///
1957    /// If the pattern ID is invalid, then this returns `0`.
1958    ///
1959    /// # Example
1960    ///
1961    /// This example shows how the values returned by this routine may vary
1962    /// for different patterns and NFA configurations.
1963    ///
1964    /// ```
1965    /// use regex_automata::{nfa::thompson::{NFA, WhichCaptures}, PatternID};
1966    ///
1967    /// let nfa = NFA::new(r"(a)(b)(c)")?;
1968    /// // There are 3 explicit groups in the pattern's concrete syntax and
1969    /// // 1 unnamed and implicit group spanning the entire pattern.
1970    /// assert_eq!(4, nfa.group_info().group_len(PatternID::ZERO));
1971    ///
1972    /// let nfa = NFA::new(r"abc")?;
1973    /// // There is just the unnamed implicit group.
1974    /// assert_eq!(1, nfa.group_info().group_len(PatternID::ZERO));
1975    ///
1976    /// let nfa = NFA::compiler()
1977    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
1978    ///     .build(r"abc")?;
1979    /// // We disabled capturing groups, so there are none.
1980    /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO));
1981    ///
1982    /// let nfa = NFA::compiler()
1983    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
1984    ///     .build(r"(a)(b)(c)")?;
1985    /// // We disabled capturing groups, so there are none, even if there are
1986    /// // explicit groups in the concrete syntax.
1987    /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO));
1988    ///
1989    /// # Ok::<(), Box<dyn std::error::Error>>(())
1990    /// ```
1991    #[inline]
1992    pub fn group_len(&self, pid: PatternID) -> usize {
1993        self.0.group_len(pid)
1994    }
1995
1996    /// Return the total number of capture groups across all patterns.
1997    ///
1998    /// This includes implicit groups that represent the entire match of a
1999    /// pattern.
2000    ///
2001    /// # Example
2002    ///
2003    /// This example shows how the values returned by this routine may vary
2004    /// for different patterns and NFA configurations.
2005    ///
2006    /// ```
2007    /// use regex_automata::{nfa::thompson::{NFA, WhichCaptures}, PatternID};
2008    ///
2009    /// let nfa = NFA::new(r"(a)(b)(c)")?;
2010    /// // There are 3 explicit groups in the pattern's concrete syntax and
2011    /// // 1 unnamed and implicit group spanning the entire pattern.
2012    /// assert_eq!(4, nfa.group_info().all_group_len());
2013    ///
2014    /// let nfa = NFA::new(r"abc")?;
2015    /// // There is just the unnamed implicit group.
2016    /// assert_eq!(1, nfa.group_info().all_group_len());
2017    ///
2018    /// let nfa = NFA::new_many(&["(a)", "b", "(c)"])?;
2019    /// // Each pattern has one implicit groups, and two
2020    /// // patterns have one explicit group each.
2021    /// assert_eq!(5, nfa.group_info().all_group_len());
2022    ///
2023    /// let nfa = NFA::compiler()
2024    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
2025    ///     .build(r"abc")?;
2026    /// // We disabled capturing groups, so there are none.
2027    /// assert_eq!(0, nfa.group_info().all_group_len());
2028    ///
2029    /// let nfa = NFA::compiler()
2030    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
2031    ///     .build(r"(a)(b)(c)")?;
2032    /// // We disabled capturing groups, so there are none, even if there are
2033    /// // explicit groups in the concrete syntax.
2034    /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO));
2035    ///
2036    /// # Ok::<(), Box<dyn std::error::Error>>(())
2037    /// ```
2038    #[inline]
2039    pub fn all_group_len(&self) -> usize {
2040        self.slot_len() / 2
2041    }
2042
2043    /// Returns the total number of slots in this `GroupInfo` across all
2044    /// patterns.
2045    ///
2046    /// The total number of slots is always twice the total number of capturing
2047    /// groups, including both implicit and explicit groups.
2048    ///
2049    /// # Example
2050    ///
2051    /// This example shows the relationship between the number of capturing
2052    /// groups and slots.
2053    ///
2054    /// ```
2055    /// use regex_automata::util::captures::GroupInfo;
2056    ///
2057    /// // There are 11 total groups here.
2058    /// let info = GroupInfo::new(vec![
2059    ///     vec![None, Some("foo")],
2060    ///     vec![None],
2061    ///     vec![None, None, None, Some("bar"), None],
2062    ///     vec![None, None, Some("foo")],
2063    /// ])?;
2064    /// // 2 slots per group gives us 11*2=22 slots.
2065    /// assert_eq!(22, info.slot_len());
2066    ///
2067    /// # Ok::<(), Box<dyn std::error::Error>>(())
2068    /// ```
2069    #[inline]
2070    pub fn slot_len(&self) -> usize {
2071        self.0.small_slot_len().as_usize()
2072    }
2073
2074    /// Returns the total number of slots for implicit capturing groups.
2075    ///
2076    /// This is like [`GroupInfo::slot_len`], except it doesn't include the
2077    /// explicit slots for each pattern. Since there are always exactly 2
2078    /// implicit slots for each pattern, the number of implicit slots is always
2079    /// equal to twice the number of patterns.
2080    ///
2081    /// # Example
2082    ///
2083    /// This example shows the relationship between the number of capturing
2084    /// groups, implicit slots and explicit slots.
2085    ///
2086    /// ```
2087    /// use regex_automata::util::captures::GroupInfo;
2088    ///
2089    /// // There are 11 total groups here.
2090    /// let info = GroupInfo::new(vec![vec![None, Some("foo"), Some("bar")]])?;
2091    /// // 2 slots per group gives us 11*2=22 slots.
2092    /// assert_eq!(6, info.slot_len());
2093    /// // 2 implicit slots per pattern gives us 2 implicit slots since there
2094    /// // is 1 pattern.
2095    /// assert_eq!(2, info.implicit_slot_len());
2096    /// // 2 explicit capturing groups gives us 2*2=4 explicit slots.
2097    /// assert_eq!(4, info.explicit_slot_len());
2098    ///
2099    /// # Ok::<(), Box<dyn std::error::Error>>(())
2100    /// ```
2101    #[inline]
2102    pub fn implicit_slot_len(&self) -> usize {
2103        self.pattern_len() * 2
2104    }
2105
2106    /// Returns the total number of slots for explicit capturing groups.
2107    ///
2108    /// This is like [`GroupInfo::slot_len`], except it doesn't include the
2109    /// implicit slots for each pattern. (There are always 2 implicit slots for
2110    /// each pattern.)
2111    ///
2112    /// For a non-empty `GroupInfo`, it is always the case that `slot_len` is
2113    /// strictly greater than `explicit_slot_len`. For an empty `GroupInfo`,
2114    /// both the total number of slots and the number of explicit slots is
2115    /// `0`.
2116    ///
2117    /// # Example
2118    ///
2119    /// This example shows the relationship between the number of capturing
2120    /// groups, implicit slots and explicit slots.
2121    ///
2122    /// ```
2123    /// use regex_automata::util::captures::GroupInfo;
2124    ///
2125    /// // There are 11 total groups here.
2126    /// let info = GroupInfo::new(vec![vec![None, Some("foo"), Some("bar")]])?;
2127    /// // 2 slots per group gives us 11*2=22 slots.
2128    /// assert_eq!(6, info.slot_len());
2129    /// // 2 implicit slots per pattern gives us 2 implicit slots since there
2130    /// // is 1 pattern.
2131    /// assert_eq!(2, info.implicit_slot_len());
2132    /// // 2 explicit capturing groups gives us 2*2=4 explicit slots.
2133    /// assert_eq!(4, info.explicit_slot_len());
2134    ///
2135    /// # Ok::<(), Box<dyn std::error::Error>>(())
2136    /// ```
2137    #[inline]
2138    pub fn explicit_slot_len(&self) -> usize {
2139        self.slot_len().saturating_sub(self.implicit_slot_len())
2140    }
2141
2142    /// Returns the memory usage, in bytes, of this `GroupInfo`.
2143    ///
2144    /// This does **not** include the stack size used up by this `GroupInfo`.
2145    /// To compute that, use `std::mem::size_of::<GroupInfo>()`.
2146    #[inline]
2147    pub fn memory_usage(&self) -> usize {
2148        use core::mem::size_of as s;
2149
2150        s::<GroupInfoInner>()
2151            + self.0.slot_ranges.len() * s::<(SmallIndex, SmallIndex)>()
2152            + self.0.name_to_index.len() * s::<CaptureNameMap>()
2153            + self.0.index_to_name.len() * s::<Vec<Option<Arc<str>>>>()
2154            + self.0.memory_extra
2155    }
2156}
2157
2158/// A map from capture group name to its corresponding capture group index.
2159///
2160/// This type is actually wrapped inside a Vec indexed by pattern ID on a
2161/// `GroupInfo`, since multiple patterns may have the same capture group name.
2162/// That is, each pattern gets its own namespace of capture group names.
2163///
2164/// Perhaps a more memory efficient representation would be
2165/// HashMap<(PatternID, Arc<str>), usize>, but this makes it difficult to look
2166/// up a capture index by name without producing a `Arc<str>`, which requires
2167/// an allocation. To fix this, I think we'd need to define our own unsized
2168/// type or something? Anyway, I didn't give this much thought since it
2169/// probably doesn't matter much in the grand scheme of things. But it did
2170/// stand out to me as mildly wasteful.
2171#[cfg(feature = "std")]
2172type CaptureNameMap = std::collections::HashMap<Arc<str>, SmallIndex>;
2173#[cfg(not(feature = "std"))]
2174type CaptureNameMap = alloc::collections::BTreeMap<Arc<str>, SmallIndex>;
2175
2176/// The inner guts of `GroupInfo`. This type only exists so that it can
2177/// be wrapped in an `Arc` to make `GroupInfo` reference counted.
2178#[derive(Debug, Default)]
2179struct GroupInfoInner {
2180    slot_ranges: Vec<(SmallIndex, SmallIndex)>,
2181    name_to_index: Vec<CaptureNameMap>,
2182    index_to_name: Vec<Vec<Option<Arc<str>>>>,
2183    memory_extra: usize,
2184}
2185
2186impl GroupInfoInner {
2187    /// This adds the first unnamed group for the given pattern ID. The given
2188    /// pattern ID must be zero if this is the first time this method is
2189    /// called, or must be exactly one more than the pattern ID supplied to the
2190    /// previous call to this method. (This method panics if this rule is
2191    /// violated.)
2192    ///
2193    /// This can be thought of as initializing the GroupInfo state for the
2194    /// given pattern and closing off the state for any previous pattern.
2195    fn add_first_group(&mut self, pid: PatternID) {
2196        assert_eq!(pid.as_usize(), self.slot_ranges.len());
2197        assert_eq!(pid.as_usize(), self.name_to_index.len());
2198        assert_eq!(pid.as_usize(), self.index_to_name.len());
2199        // This is the start of our slots for the explicit capturing groups.
2200        // Note that since the slots for the 0th group for every pattern appear
2201        // before any slots for the nth group (where n > 0) in any pattern, we
2202        // will have to fix up the slot ranges once we know how many patterns
2203        // we've added capture groups for.
2204        let slot_start = self.small_slot_len();
2205        self.slot_ranges.push((slot_start, slot_start));
2206        self.name_to_index.push(CaptureNameMap::new());
2207        self.index_to_name.push(vec![None]);
2208        self.memory_extra += core::mem::size_of::<Option<Arc<str>>>();
2209    }
2210
2211    /// Add an explicit capturing group for the given pattern with the given
2212    /// index. If the group has a name, then that must be given as well.
2213    ///
2214    /// Note that every capturing group except for the first or zeroth group is
2215    /// explicit.
2216    ///
2217    /// This returns an error if adding this group would result in overflowing
2218    /// slot indices or if a capturing group with the same name for this
2219    /// pattern has already been added.
2220    fn add_explicit_group<N: AsRef<str>>(
2221        &mut self,
2222        pid: PatternID,
2223        group: SmallIndex,
2224        maybe_name: Option<N>,
2225    ) -> Result<(), GroupInfoError> {
2226        // We also need to check that the slot index generated for
2227        // this group is also valid. Although, this is a little weird
2228        // because we offset these indices below, at which point, we'll
2229        // have to recheck them. Gosh this is annoying. Note that
2230        // the '+2' below is OK because 'end' is guaranteed to be less
2231        // than isize::MAX.
2232        let end = &mut self.slot_ranges[pid].1;
2233        *end = SmallIndex::new(end.as_usize() + 2).map_err(|_| {
2234            GroupInfoError::too_many_groups(pid, group.as_usize())
2235        })?;
2236        if let Some(name) = maybe_name {
2237            let name = Arc::<str>::from(name.as_ref());
2238            if self.name_to_index[pid].contains_key(&*name) {
2239                return Err(GroupInfoError::duplicate(pid, &name));
2240            }
2241            let len = name.len();
2242            self.name_to_index[pid].insert(Arc::clone(&name), group);
2243            self.index_to_name[pid].push(Some(name));
2244            // Adds the memory used by the Arc<str> in both maps.
2245            self.memory_extra +=
2246                2 * (len + core::mem::size_of::<Option<Arc<str>>>());
2247            // And also the value entry for the 'name_to_index' map.
2248            // This is probably an underestimate for 'name_to_index' since
2249            // hashmaps/btrees likely have some non-zero overhead, but we
2250            // assume here that they have zero overhead.
2251            self.memory_extra += core::mem::size_of::<SmallIndex>();
2252        } else {
2253            self.index_to_name[pid].push(None);
2254            self.memory_extra += core::mem::size_of::<Option<Arc<str>>>();
2255        }
2256        // This is a sanity assert that checks that our group index
2257        // is in line with the number of groups added so far for this
2258        // pattern.
2259        assert_eq!(group.one_more(), self.group_len(pid));
2260        // And is also in line with the 'index_to_name' map.
2261        assert_eq!(group.one_more(), self.index_to_name[pid].len());
2262        Ok(())
2263    }
2264
2265    /// This corrects the slot ranges to account for the slots corresponding
2266    /// to the zeroth group of each pattern. That is, every slot range is
2267    /// offset by 'pattern_len() * 2', since each pattern uses two slots to
2268    /// represent the zeroth group.
2269    fn fixup_slot_ranges(&mut self) -> Result<(), GroupInfoError> {
2270        use crate::util::primitives::IteratorIndexExt;
2271        // Since we know number of patterns fits in PatternID and
2272        // PatternID::MAX < isize::MAX, it follows that multiplying by 2 will
2273        // never overflow usize.
2274        let offset = self.pattern_len().checked_mul(2).unwrap();
2275        for (pid, &mut (ref mut start, ref mut end)) in
2276            self.slot_ranges.iter_mut().with_pattern_ids()
2277        {
2278            let group_len = 1 + ((end.as_usize() - start.as_usize()) / 2);
2279            let new_end = match end.as_usize().checked_add(offset) {
2280                Some(new_end) => new_end,
2281                None => {
2282                    return Err(GroupInfoError::too_many_groups(
2283                        pid, group_len,
2284                    ))
2285                }
2286            };
2287            *end = SmallIndex::new(new_end).map_err(|_| {
2288                GroupInfoError::too_many_groups(pid, group_len)
2289            })?;
2290            // Since start <= end, if end is valid then start must be too.
2291            *start = SmallIndex::new(start.as_usize() + offset).unwrap();
2292        }
2293        Ok(())
2294    }
2295
2296    /// Return the total number of patterns represented by this capture slot
2297    /// info.
2298    fn pattern_len(&self) -> usize {
2299        self.slot_ranges.len()
2300    }
2301
2302    /// Return the total number of capturing groups for the given pattern. If
2303    /// the given pattern isn't valid for this capture slot info, then 0 is
2304    /// returned.
2305    fn group_len(&self, pid: PatternID) -> usize {
2306        let (start, end) = match self.slot_ranges.get(pid.as_usize()) {
2307            None => return 0,
2308            Some(range) => range,
2309        };
2310        // The difference between any two SmallIndex values always fits in a
2311        // usize since we know that SmallIndex::MAX <= isize::MAX-1. We also
2312        // know that start<=end by construction and that the number of groups
2313        // never exceeds SmallIndex and thus never overflows usize.
2314        1 + ((end.as_usize() - start.as_usize()) / 2)
2315    }
2316
2317    /// Return the total number of slots in this capture slot info as a
2318    /// "small index."
2319    fn small_slot_len(&self) -> SmallIndex {
2320        // Since slots are allocated in order of pattern (starting at 0) and
2321        // then in order of capture group, it follows that the number of slots
2322        // is the end of the range of slots for the last pattern. This is
2323        // true even when the last pattern has no capturing groups, since
2324        // 'slot_ranges' will still represent it explicitly with an empty
2325        // range.
2326        self.slot_ranges.last().map_or(SmallIndex::ZERO, |&(_, end)| end)
2327    }
2328}
2329
2330/// An error that may occur when building a `GroupInfo`.
2331///
2332/// Building a `GroupInfo` does a variety of checks to make sure the
2333/// capturing groups satisfy a number of invariants. This includes, but is not
2334/// limited to, ensuring that the first capturing group is unnamed and that
2335/// there are no duplicate capture groups for a specific pattern.
2336#[derive(Clone, Debug)]
2337pub struct GroupInfoError {
2338    kind: GroupInfoErrorKind,
2339}
2340
2341/// The kind of error that occurs when building a `GroupInfo` fails.
2342///
2343/// We keep this un-exported because it's not clear how useful it is to
2344/// export it.
2345#[derive(Clone, Debug)]
2346enum GroupInfoErrorKind {
2347    /// This occurs when too many patterns have been added. i.e., It would
2348    /// otherwise overflow a `PatternID`.
2349    TooManyPatterns { err: PatternIDError },
2350    /// This occurs when too many capturing groups have been added for a
2351    /// particular pattern.
2352    TooManyGroups {
2353        /// The ID of the pattern that had too many groups.
2354        pattern: PatternID,
2355        /// The minimum number of groups that the caller has tried to add for
2356        /// a pattern.
2357        minimum: usize,
2358    },
2359    /// An error that occurs when a pattern has no capture groups. Either the
2360    /// group info must be empty, or all patterns must have at least one group
2361    /// (corresponding to the unnamed group for the entire pattern).
2362    MissingGroups {
2363        /// The ID of the pattern that had no capturing groups.
2364        pattern: PatternID,
2365    },
2366    /// An error that occurs when one tries to provide a name for the capture
2367    /// group at index 0. This capturing group must currently always be
2368    /// unnamed.
2369    FirstMustBeUnnamed {
2370        /// The ID of the pattern that was found to have a named first
2371        /// capturing group.
2372        pattern: PatternID,
2373    },
2374    /// An error that occurs when duplicate capture group names for the same
2375    /// pattern are added.
2376    ///
2377    /// NOTE: At time of writing, this error can never occur if you're using
2378    /// regex-syntax, since the parser itself will reject patterns with
2379    /// duplicate capture group names. This error can only occur when the
2380    /// builder is used to hand construct NFAs.
2381    Duplicate {
2382        /// The pattern in which the duplicate capture group name was found.
2383        pattern: PatternID,
2384        /// The duplicate name.
2385        name: String,
2386    },
2387}
2388
2389impl GroupInfoError {
2390    fn too_many_patterns(err: PatternIDError) -> GroupInfoError {
2391        GroupInfoError { kind: GroupInfoErrorKind::TooManyPatterns { err } }
2392    }
2393
2394    fn too_many_groups(pattern: PatternID, minimum: usize) -> GroupInfoError {
2395        GroupInfoError {
2396            kind: GroupInfoErrorKind::TooManyGroups { pattern, minimum },
2397        }
2398    }
2399
2400    fn missing_groups(pattern: PatternID) -> GroupInfoError {
2401        GroupInfoError { kind: GroupInfoErrorKind::MissingGroups { pattern } }
2402    }
2403
2404    fn first_must_be_unnamed(pattern: PatternID) -> GroupInfoError {
2405        GroupInfoError {
2406            kind: GroupInfoErrorKind::FirstMustBeUnnamed { pattern },
2407        }
2408    }
2409
2410    fn duplicate(pattern: PatternID, name: &str) -> GroupInfoError {
2411        GroupInfoError {
2412            kind: GroupInfoErrorKind::Duplicate {
2413                pattern,
2414                name: String::from(name),
2415            },
2416        }
2417    }
2418}
2419
2420#[cfg(feature = "std")]
2421impl std::error::Error for GroupInfoError {
2422    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
2423        match self.kind {
2424            GroupInfoErrorKind::TooManyPatterns { .. }
2425            | GroupInfoErrorKind::TooManyGroups { .. }
2426            | GroupInfoErrorKind::MissingGroups { .. }
2427            | GroupInfoErrorKind::FirstMustBeUnnamed { .. }
2428            | GroupInfoErrorKind::Duplicate { .. } => None,
2429        }
2430    }
2431}
2432
2433impl core::fmt::Display for GroupInfoError {
2434    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2435        use self::GroupInfoErrorKind::*;
2436
2437        match self.kind {
2438            TooManyPatterns { ref err } => {
2439                write!(f, "too many patterns to build capture info: {err}")
2440            }
2441            TooManyGroups { pattern, minimum } => {
2442                write!(
2443                    f,
2444                    "too many capture groups (at least {}) were \
2445                     found for pattern {}",
2446                    minimum,
2447                    pattern.as_usize()
2448                )
2449            }
2450            MissingGroups { pattern } => write!(
2451                f,
2452                "no capturing groups found for pattern {} \
2453                 (either all patterns have zero groups or all patterns have \
2454                  at least one group)",
2455                pattern.as_usize(),
2456            ),
2457            FirstMustBeUnnamed { pattern } => write!(
2458                f,
2459                "first capture group (at index 0) for pattern {} has a name \
2460                 (it must be unnamed)",
2461                pattern.as_usize(),
2462            ),
2463            Duplicate { pattern, ref name } => write!(
2464                f,
2465                "duplicate capture group name '{}' found for pattern {}",
2466                name,
2467                pattern.as_usize(),
2468            ),
2469        }
2470    }
2471}
2472
2473/// An iterator over capturing groups and their names for a specific pattern.
2474///
2475/// This iterator is created by [`GroupInfo::pattern_names`].
2476///
2477/// The lifetime parameter `'a` refers to the lifetime of the `GroupInfo`
2478/// from which this iterator was created.
2479#[derive(Clone, Debug)]
2480pub struct GroupInfoPatternNames<'a> {
2481    it: core::slice::Iter<'a, Option<Arc<str>>>,
2482}
2483
2484impl GroupInfoPatternNames<'static> {
2485    fn empty() -> GroupInfoPatternNames<'static> {
2486        GroupInfoPatternNames { it: [].iter() }
2487    }
2488}
2489
2490impl<'a> Iterator for GroupInfoPatternNames<'a> {
2491    type Item = Option<&'a str>;
2492
2493    fn next(&mut self) -> Option<Option<&'a str>> {
2494        self.it.next().map(|x| x.as_deref())
2495    }
2496
2497    fn size_hint(&self) -> (usize, Option<usize>) {
2498        self.it.size_hint()
2499    }
2500
2501    fn count(self) -> usize {
2502        self.it.count()
2503    }
2504}
2505
2506impl<'a> ExactSizeIterator for GroupInfoPatternNames<'a> {}
2507impl<'a> core::iter::FusedIterator for GroupInfoPatternNames<'a> {}
2508
2509/// An iterator over capturing groups and their names for a `GroupInfo`.
2510///
2511/// This iterator is created by [`GroupInfo::all_names`].
2512///
2513/// The lifetime parameter `'a` refers to the lifetime of the `GroupInfo`
2514/// from which this iterator was created.
2515#[derive(Debug)]
2516pub struct GroupInfoAllNames<'a> {
2517    group_info: &'a GroupInfo,
2518    pids: PatternIDIter,
2519    current_pid: Option<PatternID>,
2520    names: Option<core::iter::Enumerate<GroupInfoPatternNames<'a>>>,
2521}
2522
2523impl<'a> Iterator for GroupInfoAllNames<'a> {
2524    type Item = (PatternID, usize, Option<&'a str>);
2525
2526    fn next(&mut self) -> Option<(PatternID, usize, Option<&'a str>)> {
2527        // If the group info has no captures, then we never have anything
2528        // to yield. We need to consider this case explicitly (at time of
2529        // writing) because 'pattern_capture_names' will panic if captures
2530        // aren't enabled.
2531        if self.group_info.0.index_to_name.is_empty() {
2532            return None;
2533        }
2534        if self.current_pid.is_none() {
2535            self.current_pid = Some(self.pids.next()?);
2536        }
2537        let pid = self.current_pid.unwrap();
2538        if self.names.is_none() {
2539            self.names = Some(self.group_info.pattern_names(pid).enumerate());
2540        }
2541        let (group_index, name) = match self.names.as_mut().unwrap().next() {
2542            Some((group_index, name)) => (group_index, name),
2543            None => {
2544                self.current_pid = None;
2545                self.names = None;
2546                return self.next();
2547            }
2548        };
2549        Some((pid, group_index, name))
2550    }
2551}