1use crate::cache::CacheConfig;
11use crate::cache::policy::{
12 CacheAdmission, CachePolicy, CachePolicyConfig, CachePolicyKind, build_cache_policy,
13};
14use log::debug;
15use lru::LruCache;
16use regex::Regex;
17use std::hash::{Hash, Hasher};
18use std::num::NonZeroUsize;
19use std::sync::atomic::{AtomicU64, Ordering};
20use std::sync::{Arc, Mutex, OnceLock};
21
22const FANCY_REGEX_BACKTRACK_LIMIT: usize = 100_000;
29
30#[derive(Clone)]
32pub enum CompiledRegex {
33 Standard(Arc<Regex>),
35 Fancy(Arc<fancy_regex::Regex>),
37}
38
39impl CompiledRegex {
40 pub fn is_match(&self, text: &str) -> Result<bool, RegexMatchError> {
46 match self {
47 CompiledRegex::Standard(re) => Ok(re.is_match(text)),
48 CompiledRegex::Fancy(re) => re
49 .is_match(text)
50 .map_err(|e| RegexMatchError::Fancy(Box::new(e))),
51 }
52 }
53}
54
55#[derive(Debug)]
59pub enum RegexMatchError {
60 Fancy(Box<fancy_regex::Error>),
63}
64
65impl std::fmt::Display for RegexMatchError {
66 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67 match self {
68 RegexMatchError::Fancy(e) => write!(f, "regex match failed: {e}"),
69 }
70 }
71}
72
73impl std::error::Error for RegexMatchError {}
74
75fn has_lookaround(pattern: &str) -> bool {
77 pattern.contains("(?=")
78 || pattern.contains("(?!")
79 || pattern.contains("(?<=")
80 || pattern.contains("(?<!")
81}
82
83#[derive(Debug)]
85pub enum RegexCompileError {
86 Standard(regex::Error),
88 Fancy(Box<fancy_regex::Error>),
91}
92
93impl std::fmt::Display for RegexCompileError {
94 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
95 match self {
96 RegexCompileError::Standard(e) => write!(f, "{e}"),
97 RegexCompileError::Fancy(e) => write!(f, "{e}"),
98 }
99 }
100}
101
102impl std::error::Error for RegexCompileError {
103 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
104 match self {
105 RegexCompileError::Standard(e) => Some(e),
106 RegexCompileError::Fancy(e) => Some(e.as_ref()),
107 }
108 }
109}
110
111impl From<regex::Error> for RegexCompileError {
112 fn from(err: regex::Error) -> Self {
113 RegexCompileError::Standard(err)
114 }
115}
116
117impl From<fancy_regex::Error> for RegexCompileError {
118 fn from(err: fancy_regex::Error) -> Self {
119 RegexCompileError::Fancy(Box::new(err))
120 }
121}
122
123#[derive(Clone, Eq, PartialEq)]
125struct RegexCacheKey {
126 pattern: String,
127 case_insensitive: bool,
128 multiline: bool,
129 dot_all: bool,
130}
131
132impl Hash for RegexCacheKey {
133 fn hash<H: Hasher>(&self, state: &mut H) {
134 self.pattern.hash(state);
135 self.case_insensitive.hash(state);
136 self.multiline.hash(state);
137 self.dot_all.hash(state);
138 }
139}
140
141pub struct RegexCache {
143 cache: Arc<Mutex<LruCache<RegexCacheKey, CompiledRegex>>>,
144 capacity: usize,
145 hits: AtomicU64,
146 misses: AtomicU64,
147 evictions: AtomicU64,
148 policy: Arc<dyn CachePolicy<RegexCacheKey>>,
149 fancy_backtrack_limit: usize,
152}
153
154impl RegexCache {
155 #[must_use]
157 pub fn new(capacity: usize) -> Self {
158 let (kind, window_ratio) = Self::policy_params_from_env();
159 Self::with_policy(capacity, kind, window_ratio)
160 }
161
162 pub fn get_or_compile(
175 &self,
176 pattern: &str,
177 case_insensitive: bool,
178 multiline: bool,
179 dot_all: bool,
180 ) -> Result<CompiledRegex, RegexCompileError> {
181 let key = RegexCacheKey {
182 pattern: pattern.to_string(),
183 case_insensitive,
184 multiline,
185 dot_all,
186 };
187
188 self.handle_policy_evictions();
189
190 {
191 let mut cache = self.cache.lock().expect("regex cache mutex poisoned");
192 if let Some(regex) = cache.get(&key) {
193 self.hits.fetch_add(1, Ordering::Relaxed);
194 let _ = self.policy.record_hit(&key);
195 return Ok(regex.clone());
196 }
197 }
198
199 self.misses.fetch_add(1, Ordering::Relaxed);
200
201 let compiled = if has_lookaround(pattern) {
204 let mut flag_prefix = String::new();
207 if case_insensitive {
208 flag_prefix.push_str("(?i)");
209 }
210 if multiline {
211 flag_prefix.push_str("(?m)");
212 }
213 if dot_all {
214 flag_prefix.push_str("(?s)");
215 }
216 let full_pattern = format!("{flag_prefix}{pattern}");
217 let fancy_re = fancy_regex::RegexBuilder::new(&full_pattern)
218 .backtrack_limit(self.fancy_backtrack_limit)
219 .build()?;
220 CompiledRegex::Fancy(Arc::new(fancy_re))
221 } else {
222 let mut builder = regex::RegexBuilder::new(pattern);
224 builder
225 .case_insensitive(case_insensitive)
226 .multi_line(multiline)
227 .dot_matches_new_line(dot_all);
228 let re = builder.build()?;
229 CompiledRegex::Standard(Arc::new(re))
230 };
231
232 if matches!(self.policy.admit(&key, 1), CacheAdmission::Rejected) {
233 debug!(
234 "regex cache policy {:?} rejected pattern {:?}",
235 self.policy.kind(),
236 key.pattern
237 );
238 return Ok(compiled);
239 }
240
241 {
243 let mut cache = self.cache.lock().expect("regex cache mutex poisoned");
244 if cache.len() == self.capacity
245 && let Some((evicted_key, _)) = cache.pop_lru()
246 {
247 self.policy.invalidate(&evicted_key);
248 self.evictions.fetch_add(1, Ordering::Relaxed);
249 }
250 cache.put(key, compiled.clone());
251 }
252
253 self.handle_policy_evictions();
254
255 Ok(compiled)
256 }
257
258 #[cfg(test)]
264 pub fn len(&self) -> usize {
265 self.cache.lock().expect("regex cache mutex poisoned").len()
266 }
267
268 #[cfg(test)]
270 pub fn is_empty(&self) -> bool {
271 self.len() == 0
272 }
273
274 fn handle_policy_evictions(&self) {
275 let evicted = self.policy.drain_evictions();
276 if evicted.is_empty() {
277 return;
278 }
279 let mut cache = self.cache.lock().expect("regex cache mutex poisoned");
280 for eviction in evicted {
281 if cache.pop(&eviction.key).is_some() {
282 self.evictions.fetch_add(1, Ordering::Relaxed);
283 }
284 }
285 }
286
287 fn with_policy(capacity: usize, kind: CachePolicyKind, window_ratio: f32) -> Self {
288 let normalized_capacity = capacity.max(1);
289 let config = CachePolicyConfig::new(kind, normalized_capacity as u64, window_ratio);
290 Self {
291 cache: Arc::new(Mutex::new(LruCache::new(
292 NonZeroUsize::new(normalized_capacity).expect("capacity must be > 0"),
293 ))),
294 capacity: normalized_capacity,
295 hits: AtomicU64::new(0),
296 misses: AtomicU64::new(0),
297 evictions: AtomicU64::new(0),
298 policy: build_cache_policy(&config),
299 fancy_backtrack_limit: FANCY_REGEX_BACKTRACK_LIMIT,
300 }
301 }
302
303 #[cfg(test)]
305 fn with_backtrack_limit(capacity: usize, limit: usize) -> Self {
306 let mut cache = Self::new(capacity);
307 cache.fancy_backtrack_limit = limit;
308 cache
309 }
310
311 fn policy_params_from_env() -> (CachePolicyKind, f32) {
312 let cfg = CacheConfig::from_env();
313 (cfg.policy_kind(), cfg.policy_window_ratio())
314 }
315
316 #[cfg(test)]
317 fn with_policy_kind(capacity: usize, kind: CachePolicyKind) -> Self {
318 Self::with_policy(capacity, kind, CacheConfig::DEFAULT_POLICY_WINDOW_RATIO)
319 }
320
321 #[cfg(test)]
322 fn policy_metrics(&self) -> crate::cache::policy::CachePolicyMetrics {
323 self.policy.stats()
324 }
325}
326
327static REGEX_CACHE: OnceLock<RegexCache> = OnceLock::new();
329
330fn get_global_cache() -> &'static RegexCache {
331 REGEX_CACHE.get_or_init(|| {
332 let size = std::env::var("SQRY_REGEX_CACHE_SIZE")
333 .ok()
334 .and_then(|s| s.parse::<usize>().ok())
335 .filter(|&s| (1..=10_000).contains(&s))
336 .unwrap_or(100);
337
338 RegexCache::new(size)
339 })
340}
341
342pub fn get_or_compile_regex(
351 pattern: &str,
352 case_insensitive: bool,
353 multiline: bool,
354 dot_all: bool,
355) -> Result<CompiledRegex, RegexCompileError> {
356 get_global_cache().get_or_compile(pattern, case_insensitive, multiline, dot_all)
359}
360
361#[cfg(test)]
362mod tests {
363 use super::*;
364 use crate::cache::policy::CachePolicyKind;
365
366 #[test]
367 fn test_cache_hit_reuses_compiled_regex() {
368 let cache = RegexCache::new(10);
369
370 let re1 = cache.get_or_compile("foo.*", false, false, false).unwrap();
372 assert_eq!(cache.len(), 1);
373
374 let _re2 = cache.get_or_compile("foo.*", false, false, false).unwrap();
376 assert_eq!(cache.len(), 1);
377 assert!(re1.is_match("foobar").unwrap());
379 }
380
381 #[test]
382 fn test_different_flags_create_separate_entries() {
383 let cache = RegexCache::new(10);
384
385 let re1 = cache.get_or_compile("foo", false, false, false).unwrap();
386 let re2 = cache.get_or_compile("foo", true, false, false).unwrap(); assert_eq!(cache.len(), 2); assert!(re1.is_match("foo").unwrap());
390 assert!(!re1.is_match("FOO").unwrap()); assert!(re2.is_match("FOO").unwrap()); }
393
394 #[test]
395 fn test_lru_eviction_works() {
396 let cache = RegexCache::new(2);
397
398 cache.get_or_compile("a", false, false, false).unwrap();
399 cache.get_or_compile("b", false, false, false).unwrap();
400 assert_eq!(cache.len(), 2);
401
402 cache.get_or_compile("c", false, false, false).unwrap();
404 assert_eq!(cache.len(), 2);
405 }
406
407 #[test]
408 fn test_compilation_errors_not_cached() {
409 let cache = RegexCache::new(10);
410
411 assert!(
413 cache
414 .get_or_compile("[invalid", false, false, false)
415 .is_err()
416 );
417 assert_eq!(cache.len(), 0); }
419
420 #[test]
421 fn tiny_lfu_rejects_cold_bursts() {
422 let cache = RegexCache::with_policy_kind(3, CachePolicyKind::TinyLfu);
423
424 let hot = cache
425 .get_or_compile("hot", false, false, false)
426 .expect("compile hot regex");
427 for _ in 0..10 {
428 let _ = cache
429 .get_or_compile("hot", false, false, false)
430 .expect("warm hot regex");
431 }
432
433 for i in 0..30 {
434 let pattern = format!("cold{i}");
435 let _ = cache
436 .get_or_compile(&pattern, false, false, false)
437 .expect("compile cold regex");
438 }
439
440 let warmed = cache
441 .get_or_compile("hot", false, false, false)
442 .expect("retrieve hot regex");
443 assert!(hot.is_match("hot").unwrap());
445 assert!(warmed.is_match("hot").unwrap());
446
447 let metrics = cache.policy_metrics();
448 assert!(
449 metrics.lfu_rejects > 0,
450 "expected TinyLFU to reject some cold entries"
451 );
452 }
453
454 #[test]
456 fn test_lookahead_pattern_compiles() {
457 let cache = RegexCache::new(10);
458 let re = cache
459 .get_or_compile("foo(?=bar)", false, false, false)
460 .expect("lookahead should compile");
461 assert!(re.is_match("foobar").unwrap());
462 assert!(!re.is_match("foobaz").unwrap());
463 }
464
465 #[test]
466 fn test_lookbehind_pattern_compiles() {
467 let cache = RegexCache::new(10);
468 let re = cache
469 .get_or_compile("(?<=test_)foo", false, false, false)
470 .expect("lookbehind should compile");
471 assert!(re.is_match("test_foo").unwrap());
472 assert!(!re.is_match("prod_foo").unwrap());
473 }
474
475 #[test]
476 fn test_negative_lookahead_pattern() {
477 let cache = RegexCache::new(10);
478 let re = cache
479 .get_or_compile("foo(?!bar)", false, false, false)
480 .expect("negative lookahead should compile");
481 assert!(re.is_match("foobaz").unwrap());
482 assert!(!re.is_match("foobar").unwrap());
483 }
484
485 #[test]
486 fn test_negative_lookbehind_pattern() {
487 let cache = RegexCache::new(10);
488 let re = cache
489 .get_or_compile("(?<!test_)foo", false, false, false)
490 .expect("negative lookbehind should compile");
491 assert!(re.is_match("prod_foo").unwrap());
492 assert!(!re.is_match("test_foo").unwrap());
493 }
494
495 #[test]
496 fn test_lookaround_with_flags() {
497 let cache = RegexCache::new(10);
498 let re = cache
500 .get_or_compile("(?<=TEST_)foo", true, false, false)
501 .expect("lookaround with flags should compile");
502 assert!(re.is_match("TEST_foo").unwrap());
503 assert!(re.is_match("test_foo").unwrap()); assert!(re.is_match("TEST_FOO").unwrap()); }
506
507 #[test]
515 fn test_backtrack_limit_exceeded_returns_error() {
516 let re = fancy_regex::RegexBuilder::new("(?=a*)b")
518 .backtrack_limit(1)
519 .build()
520 .expect("pattern should compile");
521 let compiled = CompiledRegex::Fancy(Arc::new(re));
522
523 let result = compiled.is_match("aaa");
525 assert!(
526 result.is_err(),
527 "expected backtrack-limit error, got {result:?}"
528 );
529 }
530
531 #[test]
538 fn test_cache_compiled_regex_enforces_backtrack_limit() {
539 let cache = RegexCache::with_backtrack_limit(10, 1);
541
542 let re = cache
543 .get_or_compile("(?=a*)b", false, false, false)
544 .expect("lookahead should compile through cache");
545
546 assert!(
547 matches!(re, CompiledRegex::Fancy(_)),
548 "expected Fancy variant for lookaround pattern"
549 );
550
551 let result = re.is_match("aaa");
554 assert!(
555 result.is_err(),
556 "cache-compiled regex must enforce backtrack limit, got {result:?}"
557 );
558 }
559}