Skip to main content

fuzzy_regex/engine/
captures.rs

1//! Capture group handling during matching.
2
3use smallvec::SmallVec;
4use std::collections::HashMap;
5use std::sync::{Arc, OnceLock};
6
7/// Most patterns have few capture groups, so inline storage for 8 slots avoids heap allocation.
8type SlotVec = SmallVec<[Option<(usize, usize)>; 8]>;
9
10/// Static empty `HashMap` to avoid allocation for patterns without named groups.
11static EMPTY_NAMES: OnceLock<Arc<HashMap<String, usize>>> = OnceLock::new();
12
13fn empty_names() -> Arc<HashMap<String, usize>> {
14    EMPTY_NAMES.get_or_init(|| Arc::new(HashMap::new())).clone()
15}
16
17/// Capture group state during matching.
18#[derive(Debug, Clone)]
19pub struct CaptureState {
20    /// Capture slots: (start, end) for each group (0 = full match).
21    slots: SlotVec,
22    /// Named group mapping (shared across clones - never mutated after setup).
23    names: Arc<HashMap<String, usize>>,
24}
25
26impl CaptureState {
27    /// Create a new capture state for n groups.
28    #[must_use]
29    pub fn new(group_count: usize) -> Self {
30        let mut slots = SlotVec::new();
31        slots.resize(group_count + 1, None); // +1 for group 0 (full match)
32        CaptureState {
33            slots,
34            names: empty_names(),
35        }
36    }
37
38    /// Register a named group.
39    pub fn register_name(&mut self, name: String, index: usize) {
40        Arc::make_mut(&mut self.names).insert(name, index);
41    }
42
43    /// Start a capture at a position.
44    pub fn start_capture(&mut self, group: usize, pos: usize) {
45        if group < self.slots.len() {
46            self.slots[group] = Some((pos, pos));
47        }
48    }
49
50    /// End a capture at a position.
51    pub fn end_capture(&mut self, group: usize, pos: usize) {
52        if group < self.slots.len()
53            && let Some((start, _)) = self.slots[group]
54        {
55            self.slots[group] = Some((start, pos));
56        }
57    }
58
59    /// Get a capture by index.
60    #[must_use]
61    pub fn get(&self, index: usize) -> Option<(usize, usize)> {
62        self.slots.get(index).copied().flatten()
63    }
64
65    /// Get a capture by name.
66    #[must_use]
67    pub fn get_named(&self, name: &str) -> Option<(usize, usize)> {
68        self.names.get(name).and_then(|&idx| self.get(idx))
69    }
70
71    /// Get the captured text for a group.
72    #[must_use]
73    pub fn get_text<'a>(&self, index: usize, text: &'a str) -> Option<&'a str> {
74        self.get(index).map(|(start, end)| &text[start..end])
75    }
76
77    /// Set the full match (group 0).
78    pub fn set_full_match(&mut self, start: usize, end: usize) {
79        self.slots[0] = Some((start, end));
80    }
81
82    /// Get all slots.
83    #[must_use]
84    pub fn slots(&self) -> &[Option<(usize, usize)>] {
85        &self.slots
86    }
87
88    /// Get the name mapping.
89    #[must_use]
90    pub fn names(&self) -> &HashMap<String, usize> {
91        &self.names
92    }
93
94    /// Clear all captures (but keep structure).
95    pub fn clear(&mut self) {
96        for slot in &mut self.slots {
97            *slot = None;
98        }
99    }
100
101    /// Clone with a new position offset.
102    #[must_use]
103    pub fn with_offset(&self, offset: usize) -> Self {
104        CaptureState {
105            slots: self
106                .slots
107                .iter()
108                .map(|s| s.map(|(start, end)| (start + offset, end + offset)))
109                .collect(),
110            names: Arc::clone(&self.names),
111        }
112    }
113}
114
115/// Builder for constructing capture state with names.
116pub struct CaptureStateBuilder {
117    group_count: usize,
118    names: HashMap<String, usize>,
119}
120
121impl CaptureStateBuilder {
122    /// Create a new builder.
123    #[must_use]
124    pub fn new(group_count: usize) -> Self {
125        CaptureStateBuilder {
126            group_count,
127            names: HashMap::new(),
128        }
129    }
130
131    /// Add a named group.
132    #[must_use]
133    pub fn with_name(mut self, name: String, index: usize) -> Self {
134        self.names.insert(name, index);
135        self
136    }
137
138    /// Build the capture state.
139    #[must_use]
140    pub fn build(self) -> CaptureState {
141        let mut slots = SlotVec::new();
142        slots.resize(self.group_count + 1, None);
143        CaptureState {
144            slots,
145            names: Arc::new(self.names),
146        }
147    }
148}