Skip to main content

ix/
regex_pool.rs

1//! Compiled regex pool — caches `Regex` objects to avoid recompilation.
2//!
3//! Thread-safe, size-bounded FIFO cache keyed by pattern string.
4
5use regex::Regex;
6use std::collections::HashMap;
7use std::sync::RwLock;
8
9/// Hit/miss counters for a [`RegexPool`].
10#[derive(Debug, Default, Clone)]
11pub struct PoolStats {
12    /// Number of pool hits (pattern already compiled).
13    pub hits: u64,
14    /// Number of pool misses (pattern compiled on demand).
15    pub misses: u64,
16}
17
18/// Thread-safe, size-bounded FIFO cache for compiled [`Regex`] objects.
19///
20/// Most useful in daemon mode where the same patterns are queried repeatedly.
21/// Set `max_entries` to 0 for unlimited capacity.
22pub struct RegexPool {
23    pool: RwLock<HashMap<String, Regex>>,
24    order: RwLock<Vec<String>>,
25    max_entries: usize,
26    stats: RwLock<PoolStats>,
27}
28
29impl std::fmt::Debug for RegexPool {
30    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31        f.debug_struct("RegexPool")
32            .field("max_entries", &self.max_entries)
33            .field("stats", &self.stats)
34            .finish_non_exhaustive()
35    }
36}
37
38impl RegexPool {
39    /// Create a pool with the given maximum entry count (0 = unlimited).
40    #[must_use]
41    pub fn new(max_entries: usize) -> Self {
42        Self {
43            pool: RwLock::new(HashMap::new()),
44            order: RwLock::new(Vec::new()),
45            max_entries,
46            stats: RwLock::new(PoolStats::default()),
47        }
48    }
49
50    /// Returns a compiled `Regex` for the given pattern, using the cached copy if available.
51    ///
52    /// # Errors
53    ///
54    /// Returns `regex::Error` if the pattern fails to compile.
55    pub fn get_or_compile(&self, pattern: &str) -> Result<Regex, regex::Error> {
56        {
57            let pool = self
58                .pool
59                .read()
60                .unwrap_or_else(std::sync::PoisonError::into_inner);
61            if let Some(re) = pool.get(pattern) {
62                let mut stats = self
63                    .stats
64                    .write()
65                    .unwrap_or_else(std::sync::PoisonError::into_inner);
66                stats.hits += 1;
67                return Ok(re.clone());
68            }
69        }
70
71        let re = Regex::new(pattern)?;
72
73        {
74            let mut pool = self
75                .pool
76                .write()
77                .unwrap_or_else(std::sync::PoisonError::into_inner);
78            let mut order = self
79                .order
80                .write()
81                .unwrap_or_else(std::sync::PoisonError::into_inner);
82            let mut stats = self
83                .stats
84                .write()
85                .unwrap_or_else(std::sync::PoisonError::into_inner);
86
87            if self.max_entries > 0 {
88                while pool.len() >= self.max_entries {
89                    if let Some(evict_key) = order.first().cloned() {
90                        pool.remove(&evict_key);
91                        order.remove(0);
92                    } else {
93                        break;
94                    }
95                }
96            }
97
98            pool.insert(pattern.to_owned(), re.clone());
99            order.push(pattern.to_owned());
100            stats.misses += 1;
101        }
102
103        Ok(re)
104    }
105
106    /// Removes a specific pattern from the pool.
107    pub fn invalidate(&self, pattern: &str) {
108        let mut pool = self
109            .pool
110            .write()
111            .unwrap_or_else(std::sync::PoisonError::into_inner);
112        let mut order = self
113            .order
114            .write()
115            .unwrap_or_else(std::sync::PoisonError::into_inner);
116        pool.remove(pattern);
117        order.retain(|k| k != pattern);
118    }
119
120    /// Clears all entries from the pool.
121    pub fn clear(&self) {
122        let mut pool = self
123            .pool
124            .write()
125            .unwrap_or_else(std::sync::PoisonError::into_inner);
126        let mut order = self
127            .order
128            .write()
129            .unwrap_or_else(std::sync::PoisonError::into_inner);
130        pool.clear();
131        order.clear();
132    }
133
134    /// Returns a snapshot of the pool hit/miss statistics.
135    #[must_use]
136    pub fn stats(&self) -> PoolStats {
137        self.stats
138            .read()
139            .unwrap_or_else(std::sync::PoisonError::into_inner)
140            .clone()
141    }
142
143    /// Returns the number of cached patterns.
144    #[must_use]
145    pub fn len(&self) -> usize {
146        self.pool
147            .read()
148            .unwrap_or_else(std::sync::PoisonError::into_inner)
149            .len()
150    }
151
152    /// Returns `true` if the pool contains no entries.
153    #[must_use]
154    pub fn is_empty(&self) -> bool {
155        self.len() == 0
156    }
157}
158
159#[cfg(test)]
160#[allow(clippy::as_conversions, clippy::unwrap_used, clippy::indexing_slicing)]
161mod tests {
162    use super::RegexPool;
163
164    #[test]
165    fn get_or_compile_hit_and_miss() -> Result<(), Box<dyn std::error::Error>> {
166        let pool = RegexPool::new(10);
167        let re = pool.get_or_compile(r"\d+")?;
168        assert!(re.is_match("123"));
169
170        let re2 = pool.get_or_compile(r"\d+")?;
171        assert!(re2.is_match("456"));
172
173        let stats = pool.stats();
174        assert_eq!(stats.hits, 1);
175        assert_eq!(stats.misses, 1);
176        Ok(())
177    }
178
179    #[test]
180    fn eviction_at_max_entries() -> Result<(), Box<dyn std::error::Error>> {
181        let pool = RegexPool::new(2);
182
183        pool.get_or_compile("a")?;
184        assert_eq!(pool.len(), 1);
185
186        pool.get_or_compile("b")?;
187        assert_eq!(pool.len(), 2);
188
189        pool.get_or_compile("c")?;
190        assert_eq!(pool.len(), 2);
191
192        let stats = pool.stats();
193        assert_eq!(stats.misses, 3);
194        assert_eq!(stats.hits, 0);
195
196        let pool_read = pool
197            .pool
198            .read()
199            .unwrap_or_else(std::sync::PoisonError::into_inner);
200        assert!(pool_read.contains_key("b"));
201        assert!(pool_read.contains_key("c"));
202        assert!(!pool_read.contains_key("a"));
203        Ok(())
204    }
205
206    #[test]
207    fn clear_empties_pool() -> Result<(), Box<dyn std::error::Error>> {
208        let pool = RegexPool::new(10);
209        pool.get_or_compile("x")?;
210        pool.get_or_compile("y")?;
211        assert_eq!(pool.len(), 2);
212
213        pool.clear();
214        assert_eq!(pool.len(), 0);
215        assert!(pool.is_empty());
216
217        let stats = pool.stats();
218        assert_eq!(stats.misses, 2);
219        Ok(())
220    }
221
222    #[test]
223    fn invalidate_removes_specific_entry() -> Result<(), Box<dyn std::error::Error>> {
224        let pool = RegexPool::new(10);
225        pool.get_or_compile("alpha")?;
226        pool.get_or_compile("beta")?;
227        assert_eq!(pool.len(), 2);
228
229        pool.invalidate("alpha");
230        assert_eq!(pool.len(), 1);
231
232        pool.get_or_compile("alpha")?;
233        let stats = pool.stats();
234        assert_eq!(stats.misses, 3);
235        Ok(())
236    }
237
238    #[test]
239    fn unlimited_capacity() -> Result<(), Box<dyn std::error::Error>> {
240        let pool = RegexPool::new(0);
241        for i in 0_u32..50_u32 {
242            pool.get_or_compile(&format!("p{i}"))?;
243        }
244        assert_eq!(pool.len(), 50);
245        Ok(())
246    }
247
248    #[test]
249    fn invalid_pattern_returns_error() {
250        let pool = RegexPool::new(10);
251        assert!(pool.get_or_compile(r"[invalid").is_err());
252    }
253}