1use crate::cache::CacheConfig;
11use crate::cache::policy::{
12 CacheAdmission, CachePolicy, CachePolicyConfig, CachePolicyKind, build_cache_policy,
13};
14use log::debug;
15use lru::LruCache;
16use regex::Regex;
17use std::hash::{Hash, Hasher};
18use std::num::NonZeroUsize;
19use std::sync::atomic::{AtomicU64, Ordering};
20use std::sync::{Arc, Mutex, OnceLock};
21
22const FANCY_REGEX_BACKTRACK_LIMIT: usize = 100_000;
29
30#[derive(Clone)]
32pub enum CompiledRegex {
33 Standard(Arc<Regex>),
35 Fancy(Arc<fancy_regex::Regex>),
37}
38
39impl CompiledRegex {
40 pub fn is_match(&self, text: &str) -> Result<bool, RegexMatchError> {
51 match self {
52 CompiledRegex::Standard(re) => Ok(re.is_match(text)),
53 CompiledRegex::Fancy(re) => re
54 .is_match(text)
55 .map_err(|e| RegexMatchError::Fancy(Box::new(e))),
56 }
57 }
58}
59
60#[derive(Debug)]
64pub enum RegexMatchError {
65 Fancy(Box<fancy_regex::Error>),
68}
69
70impl std::fmt::Display for RegexMatchError {
71 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72 match self {
73 RegexMatchError::Fancy(e) => write!(f, "regex match failed: {e}"),
74 }
75 }
76}
77
78impl std::error::Error for RegexMatchError {}
79
80fn has_lookaround(pattern: &str) -> bool {
82 pattern.contains("(?=")
83 || pattern.contains("(?!")
84 || pattern.contains("(?<=")
85 || pattern.contains("(?<!")
86}
87
88#[derive(Debug)]
90pub enum RegexCompileError {
91 Standard(regex::Error),
93 Fancy(Box<fancy_regex::Error>),
96}
97
98impl std::fmt::Display for RegexCompileError {
99 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
100 match self {
101 RegexCompileError::Standard(e) => write!(f, "{e}"),
102 RegexCompileError::Fancy(e) => write!(f, "{e}"),
103 }
104 }
105}
106
107impl std::error::Error for RegexCompileError {
108 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
109 match self {
110 RegexCompileError::Standard(e) => Some(e),
111 RegexCompileError::Fancy(e) => Some(e.as_ref()),
112 }
113 }
114}
115
116impl From<regex::Error> for RegexCompileError {
117 fn from(err: regex::Error) -> Self {
118 RegexCompileError::Standard(err)
119 }
120}
121
122impl From<fancy_regex::Error> for RegexCompileError {
123 fn from(err: fancy_regex::Error) -> Self {
124 RegexCompileError::Fancy(Box::new(err))
125 }
126}
127
128#[derive(Clone, Eq, PartialEq)]
130struct RegexCacheKey {
131 pattern: String,
132 case_insensitive: bool,
133 multiline: bool,
134 dot_all: bool,
135}
136
137impl Hash for RegexCacheKey {
138 fn hash<H: Hasher>(&self, state: &mut H) {
139 self.pattern.hash(state);
140 self.case_insensitive.hash(state);
141 self.multiline.hash(state);
142 self.dot_all.hash(state);
143 }
144}
145
146pub struct RegexCache {
148 cache: Arc<Mutex<LruCache<RegexCacheKey, CompiledRegex>>>,
149 capacity: usize,
150 hits: AtomicU64,
151 misses: AtomicU64,
152 evictions: AtomicU64,
153 policy: Arc<dyn CachePolicy<RegexCacheKey>>,
154 fancy_backtrack_limit: usize,
157}
158
159impl RegexCache {
160 #[must_use]
162 pub fn new(capacity: usize) -> Self {
163 let (kind, window_ratio) = Self::policy_params_from_env();
164 Self::with_policy(capacity, kind, window_ratio)
165 }
166
167 pub fn get_or_compile(
180 &self,
181 pattern: &str,
182 case_insensitive: bool,
183 multiline: bool,
184 dot_all: bool,
185 ) -> Result<CompiledRegex, RegexCompileError> {
186 let key = RegexCacheKey {
187 pattern: pattern.to_string(),
188 case_insensitive,
189 multiline,
190 dot_all,
191 };
192
193 self.handle_policy_evictions();
194
195 {
196 let mut cache = self.cache.lock().expect("regex cache mutex poisoned");
197 if let Some(regex) = cache.get(&key) {
198 self.hits.fetch_add(1, Ordering::Relaxed);
199 let _ = self.policy.record_hit(&key);
200 return Ok(regex.clone());
201 }
202 }
203
204 self.misses.fetch_add(1, Ordering::Relaxed);
205
206 let compiled = if has_lookaround(pattern) {
209 let mut flag_prefix = String::new();
212 if case_insensitive {
213 flag_prefix.push_str("(?i)");
214 }
215 if multiline {
216 flag_prefix.push_str("(?m)");
217 }
218 if dot_all {
219 flag_prefix.push_str("(?s)");
220 }
221 let full_pattern = format!("{flag_prefix}{pattern}");
222 let fancy_re = fancy_regex::RegexBuilder::new(&full_pattern)
223 .backtrack_limit(self.fancy_backtrack_limit)
224 .build()?;
225 CompiledRegex::Fancy(Arc::new(fancy_re))
226 } else {
227 let mut builder = regex::RegexBuilder::new(pattern);
229 builder
230 .case_insensitive(case_insensitive)
231 .multi_line(multiline)
232 .dot_matches_new_line(dot_all);
233 let re = builder.build()?;
234 CompiledRegex::Standard(Arc::new(re))
235 };
236
237 if matches!(self.policy.admit(&key, 1), CacheAdmission::Rejected) {
238 debug!(
239 "regex cache policy {:?} rejected pattern {:?}",
240 self.policy.kind(),
241 key.pattern
242 );
243 return Ok(compiled);
244 }
245
246 {
248 let mut cache = self.cache.lock().expect("regex cache mutex poisoned");
249 if cache.len() == self.capacity
250 && let Some((evicted_key, _)) = cache.pop_lru()
251 {
252 self.policy.invalidate(&evicted_key);
253 self.evictions.fetch_add(1, Ordering::Relaxed);
254 }
255 cache.put(key, compiled.clone());
256 }
257
258 self.handle_policy_evictions();
259
260 Ok(compiled)
261 }
262
263 #[cfg(test)]
269 pub fn len(&self) -> usize {
270 self.cache.lock().expect("regex cache mutex poisoned").len()
271 }
272
273 #[cfg(test)]
275 pub fn is_empty(&self) -> bool {
276 self.len() == 0
277 }
278
279 fn handle_policy_evictions(&self) {
280 let evicted = self.policy.drain_evictions();
281 if evicted.is_empty() {
282 return;
283 }
284 let mut cache = self.cache.lock().expect("regex cache mutex poisoned");
285 for eviction in evicted {
286 if cache.pop(&eviction.key).is_some() {
287 self.evictions.fetch_add(1, Ordering::Relaxed);
288 }
289 }
290 }
291
292 fn with_policy(capacity: usize, kind: CachePolicyKind, window_ratio: f32) -> Self {
293 let normalized_capacity = capacity.max(1);
294 let config = CachePolicyConfig::new(kind, normalized_capacity as u64, window_ratio);
295 Self {
296 cache: Arc::new(Mutex::new(LruCache::new(
297 NonZeroUsize::new(normalized_capacity).expect("capacity must be > 0"),
298 ))),
299 capacity: normalized_capacity,
300 hits: AtomicU64::new(0),
301 misses: AtomicU64::new(0),
302 evictions: AtomicU64::new(0),
303 policy: build_cache_policy(&config),
304 fancy_backtrack_limit: FANCY_REGEX_BACKTRACK_LIMIT,
305 }
306 }
307
308 #[cfg(test)]
310 fn with_backtrack_limit(capacity: usize, limit: usize) -> Self {
311 let mut cache = Self::new(capacity);
312 cache.fancy_backtrack_limit = limit;
313 cache
314 }
315
316 fn policy_params_from_env() -> (CachePolicyKind, f32) {
317 let cfg = CacheConfig::from_env();
318 (cfg.policy_kind(), cfg.policy_window_ratio())
319 }
320
321 #[cfg(test)]
322 fn with_policy_kind(capacity: usize, kind: CachePolicyKind) -> Self {
323 Self::with_policy(capacity, kind, CacheConfig::DEFAULT_POLICY_WINDOW_RATIO)
324 }
325
326 #[cfg(test)]
327 fn policy_metrics(&self) -> crate::cache::policy::CachePolicyMetrics {
328 self.policy.stats()
329 }
330}
331
332static REGEX_CACHE: OnceLock<RegexCache> = OnceLock::new();
334
335fn get_global_cache() -> &'static RegexCache {
336 REGEX_CACHE.get_or_init(|| {
337 let size = std::env::var("SQRY_REGEX_CACHE_SIZE")
338 .ok()
339 .and_then(|s| s.parse::<usize>().ok())
340 .filter(|&s| (1..=10_000).contains(&s))
341 .unwrap_or(100);
342
343 RegexCache::new(size)
344 })
345}
346
347pub fn get_or_compile_regex(
356 pattern: &str,
357 case_insensitive: bool,
358 multiline: bool,
359 dot_all: bool,
360) -> Result<CompiledRegex, RegexCompileError> {
361 get_global_cache().get_or_compile(pattern, case_insensitive, multiline, dot_all)
364}
365
366#[cfg(test)]
367mod tests {
368 use super::*;
369 use crate::cache::policy::CachePolicyKind;
370
371 #[test]
372 fn test_cache_hit_reuses_compiled_regex() {
373 let cache = RegexCache::new(10);
374
375 let re1 = cache.get_or_compile("foo.*", false, false, false).unwrap();
377 assert_eq!(cache.len(), 1);
378
379 let _re2 = cache.get_or_compile("foo.*", false, false, false).unwrap();
381 assert_eq!(cache.len(), 1);
382 assert!(re1.is_match("foobar").unwrap());
384 }
385
386 #[test]
387 fn test_different_flags_create_separate_entries() {
388 let cache = RegexCache::new(10);
389
390 let re1 = cache.get_or_compile("foo", false, false, false).unwrap();
391 let re2 = cache.get_or_compile("foo", true, false, false).unwrap(); assert_eq!(cache.len(), 2); assert!(re1.is_match("foo").unwrap());
395 assert!(!re1.is_match("FOO").unwrap()); assert!(re2.is_match("FOO").unwrap()); }
398
399 #[test]
400 fn test_lru_eviction_works() {
401 let cache = RegexCache::new(2);
402
403 cache.get_or_compile("a", false, false, false).unwrap();
404 cache.get_or_compile("b", false, false, false).unwrap();
405 assert_eq!(cache.len(), 2);
406
407 cache.get_or_compile("c", false, false, false).unwrap();
409 assert_eq!(cache.len(), 2);
410 }
411
412 #[test]
413 fn test_compilation_errors_not_cached() {
414 let cache = RegexCache::new(10);
415
416 assert!(
418 cache
419 .get_or_compile("[invalid", false, false, false)
420 .is_err()
421 );
422 assert_eq!(cache.len(), 0); }
424
425 #[test]
426 fn tiny_lfu_rejects_cold_bursts() {
427 let cache = RegexCache::with_policy_kind(3, CachePolicyKind::TinyLfu);
428
429 let hot = cache
430 .get_or_compile("hot", false, false, false)
431 .expect("compile hot regex");
432 for _ in 0..10 {
433 let _ = cache
434 .get_or_compile("hot", false, false, false)
435 .expect("warm hot regex");
436 }
437
438 for i in 0..30 {
439 let pattern = format!("cold{i}");
440 let _ = cache
441 .get_or_compile(&pattern, false, false, false)
442 .expect("compile cold regex");
443 }
444
445 let warmed = cache
446 .get_or_compile("hot", false, false, false)
447 .expect("retrieve hot regex");
448 assert!(hot.is_match("hot").unwrap());
450 assert!(warmed.is_match("hot").unwrap());
451
452 let metrics = cache.policy_metrics();
453 assert!(
454 metrics.lfu_rejects > 0,
455 "expected TinyLFU to reject some cold entries"
456 );
457 }
458
459 #[test]
461 fn test_lookahead_pattern_compiles() {
462 let cache = RegexCache::new(10);
463 let re = cache
464 .get_or_compile("foo(?=bar)", false, false, false)
465 .expect("lookahead should compile");
466 assert!(re.is_match("foobar").unwrap());
467 assert!(!re.is_match("foobaz").unwrap());
468 }
469
470 #[test]
471 fn test_lookbehind_pattern_compiles() {
472 let cache = RegexCache::new(10);
473 let re = cache
474 .get_or_compile("(?<=test_)foo", false, false, false)
475 .expect("lookbehind should compile");
476 assert!(re.is_match("test_foo").unwrap());
477 assert!(!re.is_match("prod_foo").unwrap());
478 }
479
480 #[test]
481 fn test_negative_lookahead_pattern() {
482 let cache = RegexCache::new(10);
483 let re = cache
484 .get_or_compile("foo(?!bar)", false, false, false)
485 .expect("negative lookahead should compile");
486 assert!(re.is_match("foobaz").unwrap());
487 assert!(!re.is_match("foobar").unwrap());
488 }
489
490 #[test]
491 fn test_negative_lookbehind_pattern() {
492 let cache = RegexCache::new(10);
493 let re = cache
494 .get_or_compile("(?<!test_)foo", false, false, false)
495 .expect("negative lookbehind should compile");
496 assert!(re.is_match("prod_foo").unwrap());
497 assert!(!re.is_match("test_foo").unwrap());
498 }
499
500 #[test]
501 fn test_lookaround_with_flags() {
502 let cache = RegexCache::new(10);
503 let re = cache
505 .get_or_compile("(?<=TEST_)foo", true, false, false)
506 .expect("lookaround with flags should compile");
507 assert!(re.is_match("TEST_foo").unwrap());
508 assert!(re.is_match("test_foo").unwrap()); assert!(re.is_match("TEST_FOO").unwrap()); }
511
512 #[test]
520 fn test_backtrack_limit_exceeded_returns_error() {
521 let re = fancy_regex::RegexBuilder::new("(?=a*)b")
523 .backtrack_limit(1)
524 .build()
525 .expect("pattern should compile");
526 let compiled = CompiledRegex::Fancy(Arc::new(re));
527
528 let result = compiled.is_match("aaa");
530 assert!(
531 result.is_err(),
532 "expected backtrack-limit error, got {result:?}"
533 );
534 }
535
536 #[test]
543 fn test_cache_compiled_regex_enforces_backtrack_limit() {
544 let cache = RegexCache::with_backtrack_limit(10, 1);
546
547 let re = cache
548 .get_or_compile("(?=a*)b", false, false, false)
549 .expect("lookahead should compile through cache");
550
551 assert!(
552 matches!(re, CompiledRegex::Fancy(_)),
553 "expected Fancy variant for lookaround pattern"
554 );
555
556 let result = re.is_match("aaa");
559 assert!(
560 result.is_err(),
561 "cache-compiled regex must enforce backtrack limit, got {result:?}"
562 );
563 }
564}