Skip to main content

rexile/advanced/
captures.rs

1//! Capture groups for extracting matched substrings
2//!
3//! Capture groups allow you to extract parts of a matched string for later use.
4//! They are defined using parentheses in the pattern.
5//!
6//! # Types of Groups
7//! - `(pattern)` - Capturing group: captures the matched substring
8//! - `(?:pattern)` - Non-capturing group: groups pattern without capturing
9//!
10//! # Backreferences
11//! - `\1`, `\2`, etc. - Reference to previously captured group
12//!
13//! # Examples
14//! ```
15//! use rexile::Pattern;
16//!
17//! // Extract date components
18//! let pattern = Pattern::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
19//! let text = "Date: 2026-01-22";
20//!
21//! if let Some(caps) = pattern.captures(text) {
22//!     println!("Year: {}", &caps[1]);  // 2026
23//!     println!("Month: {}", &caps[2]); // 01
24//!     println!("Day: {}", &caps[3]);   // 22
25//! }
26//! ```
27
28use std::ops::Index;
29
30/// A capture group in the pattern
31#[derive(Debug, Clone, PartialEq)]
32pub struct Group {
33    /// Index of the capture group (0 = full match, 1+ = capture groups)
34    pub index: usize,
35    /// Whether this is a capturing group (false for non-capturing (?:...))
36    pub is_capturing: bool,
37    /// Name of the group (for named captures like (?P`<name>`...))
38    pub name: Option<String>,
39}
40
41impl Group {
42    /// Create a new capturing group
43    pub fn new(index: usize) -> Self {
44        Self {
45            index,
46            is_capturing: true,
47            name: None,
48        }
49    }
50
51    /// Create a new non-capturing group
52    pub fn non_capturing() -> Self {
53        Self {
54            index: 0,
55            is_capturing: false,
56            name: None,
57        }
58    }
59
60    /// Create a new named capturing group
61    pub fn named(index: usize, name: String) -> Self {
62        Self {
63            index,
64            is_capturing: true,
65            name: Some(name),
66        }
67    }
68}
69
70/// A set of captured substrings from a single match
71#[derive(Debug, Clone)]
72pub struct Captures<'t> {
73    /// The original text that was matched against
74    text: &'t str,
75    /// Vector of captured substring positions (start, end)
76    /// Index 0 is always the full match
77    /// Indices 1+ are the capture groups
78    positions: Vec<Option<(usize, usize)>>,
79}
80
81impl<'t> Captures<'t> {
82    /// Create a new Captures with the full match
83    pub fn new(text: &'t str, full_match: (usize, usize), num_groups: usize) -> Self {
84        let mut positions = vec![None; num_groups + 1];
85        positions[0] = Some(full_match);
86        Self { text, positions }
87    }
88
89    /// Get the matched substring for a capture group
90    ///
91    /// Index 0 returns the full match, indices 1+ return capture groups
92    pub fn get(&self, index: usize) -> Option<&'t str> {
93        self.positions
94            .get(index)?
95            .map(|(start, end)| &self.text[start..end])
96    }
97
98    /// Get the position (start, end) of a capture group
99    pub fn pos(&self, index: usize) -> Option<(usize, usize)> {
100        self.positions.get(index).and_then(|&pos| pos)
101    }
102
103    /// Get the full matched text (equivalent to get(0))
104    pub fn as_str(&self) -> &'t str {
105        self.get(0).unwrap_or("")
106    }
107
108    /// Set a capture group position
109    pub(crate) fn set(&mut self, index: usize, start: usize, end: usize) {
110        if let Some(slot) = self.positions.get_mut(index) {
111            *slot = Some((start, end));
112        }
113    }
114
115    /// Number of capture groups (including the full match at index 0)
116    pub fn len(&self) -> usize {
117        self.positions.len()
118    }
119
120    /// Check if there are no capture groups
121    pub fn is_empty(&self) -> bool {
122        self.positions.is_empty()
123    }
124
125    /// Iterate over all captured substrings
126    pub fn iter(&self) -> CapturesIter<'_, 't> {
127        CapturesIter {
128            captures: self,
129            index: 0,
130        }
131    }
132}
133
134/// Allow indexing Captures by group number
135impl<'t> Index<usize> for Captures<'t> {
136    type Output = str;
137
138    fn index(&self, index: usize) -> &Self::Output {
139        self.get(index)
140            .unwrap_or_else(|| panic!("no capture group at index {}", index))
141    }
142}
143
144/// Iterator over captured substrings
145pub struct CapturesIter<'c, 't> {
146    captures: &'c Captures<'t>,
147    index: usize,
148}
149
150impl<'c, 't> Iterator for CapturesIter<'c, 't> {
151    type Item = Option<&'t str>;
152
153    fn next(&mut self) -> Option<Self::Item> {
154        if self.index >= self.captures.len() {
155            return None;
156        }
157        let result = self.captures.get(self.index);
158        self.index += 1;
159        Some(result)
160    }
161}
162
163/// Iterator that yields Captures for each match in a text
164pub struct CapturesMatches<'r, 't> {
165    text: &'t str,
166    last_end: usize,
167    num_groups: usize,
168    // This would hold a reference to the compiled pattern
169    // For now, we'll keep it simple
170    _phantom: std::marker::PhantomData<&'r ()>,
171}
172
173impl<'r, 't> CapturesMatches<'r, 't> {
174    /// Create a new captures iterator
175    pub fn new(text: &'t str, num_groups: usize) -> Self {
176        Self {
177            text,
178            last_end: 0,
179            num_groups,
180            _phantom: std::marker::PhantomData,
181        }
182    }
183}
184
185impl<'r, 't> Iterator for CapturesMatches<'r, 't> {
186    type Item = Captures<'t>;
187
188    fn next(&mut self) -> Option<Self::Item> {
189        // This is a placeholder - actual implementation would use the pattern
190        // to find the next match starting from last_end
191        None
192    }
193}
194
195#[cfg(test)]
196mod tests {
197    use super::*;
198
199    #[test]
200    fn test_captures_basic() {
201        let text = "Hello, world!";
202        let caps = Captures::new(text, (0, 5), 0);
203
204        assert_eq!(caps.get(0), Some("Hello"));
205        assert_eq!(caps.as_str(), "Hello");
206        assert_eq!(caps.len(), 1);
207    }
208
209    #[test]
210    fn test_captures_groups() {
211        let text = "2026-01-22";
212        let mut caps = Captures::new(text, (0, 10), 3);
213        caps.set(1, 0, 4); // Year
214        caps.set(2, 5, 7); // Month
215        caps.set(3, 8, 10); // Day
216
217        assert_eq!(caps.get(0), Some("2026-01-22"));
218        assert_eq!(caps.get(1), Some("2026"));
219        assert_eq!(caps.get(2), Some("01"));
220        assert_eq!(caps.get(3), Some("22"));
221        assert_eq!(caps.len(), 4);
222    }
223
224    #[test]
225    fn test_captures_indexing() {
226        let text = "foo=123";
227        let mut caps = Captures::new(text, (0, 7), 2);
228        caps.set(1, 0, 3); // "foo"
229        caps.set(2, 4, 7); // "123"
230
231        assert_eq!(&caps[0], "foo=123");
232        assert_eq!(&caps[1], "foo");
233        assert_eq!(&caps[2], "123");
234    }
235
236    #[test]
237    fn test_captures_pos() {
238        let text = "abc123";
239        let mut caps = Captures::new(text, (0, 6), 2);
240        caps.set(1, 0, 3);
241        caps.set(2, 3, 6);
242
243        assert_eq!(caps.pos(0), Some((0, 6)));
244        assert_eq!(caps.pos(1), Some((0, 3)));
245        assert_eq!(caps.pos(2), Some((3, 6)));
246        assert_eq!(caps.pos(3), None);
247    }
248
249    #[test]
250    fn test_group_types() {
251        let capturing = Group::new(1);
252        assert!(capturing.is_capturing);
253        assert_eq!(capturing.index, 1);
254
255        let non_capturing = Group::non_capturing();
256        assert!(!non_capturing.is_capturing);
257
258        let named = Group::named(2, "year".to_string());
259        assert!(named.is_capturing);
260        assert_eq!(named.name, Some("year".to_string()));
261    }
262}