use crate::cache::CacheConfig;
use crate::cache::policy::{
CacheAdmission, CachePolicy, CachePolicyConfig, CachePolicyKind, build_cache_policy,
};
use log::debug;
use lru::LruCache;
use regex::Regex;
use std::hash::{Hash, Hasher};
use std::num::NonZeroUsize;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex, OnceLock};
const FANCY_REGEX_BACKTRACK_LIMIT: usize = 100_000;
#[derive(Clone)]
pub enum CompiledRegex {
Standard(Arc<Regex>),
Fancy(Arc<fancy_regex::Regex>),
}
impl CompiledRegex {
pub fn is_match(&self, text: &str) -> Result<bool, RegexMatchError> {
match self {
CompiledRegex::Standard(re) => Ok(re.is_match(text)),
CompiledRegex::Fancy(re) => re
.is_match(text)
.map_err(|e| RegexMatchError::Fancy(Box::new(e))),
}
}
}
#[derive(Debug)]
pub enum RegexMatchError {
Fancy(Box<fancy_regex::Error>),
}
impl std::fmt::Display for RegexMatchError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RegexMatchError::Fancy(e) => write!(f, "regex match failed: {e}"),
}
}
}
impl std::error::Error for RegexMatchError {}
fn has_lookaround(pattern: &str) -> bool {
pattern.contains("(?=")
|| pattern.contains("(?!")
|| pattern.contains("(?<=")
|| pattern.contains("(?<!")
}
#[derive(Debug)]
pub enum RegexCompileError {
Standard(regex::Error),
Fancy(Box<fancy_regex::Error>),
}
impl std::fmt::Display for RegexCompileError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RegexCompileError::Standard(e) => write!(f, "{e}"),
RegexCompileError::Fancy(e) => write!(f, "{e}"),
}
}
}
impl std::error::Error for RegexCompileError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
RegexCompileError::Standard(e) => Some(e),
RegexCompileError::Fancy(e) => Some(e.as_ref()),
}
}
}
impl From<regex::Error> for RegexCompileError {
fn from(err: regex::Error) -> Self {
RegexCompileError::Standard(err)
}
}
impl From<fancy_regex::Error> for RegexCompileError {
fn from(err: fancy_regex::Error) -> Self {
RegexCompileError::Fancy(Box::new(err))
}
}
#[derive(Clone, Eq, PartialEq)]
struct RegexCacheKey {
pattern: String,
case_insensitive: bool,
multiline: bool,
dot_all: bool,
}
impl Hash for RegexCacheKey {
fn hash<H: Hasher>(&self, state: &mut H) {
self.pattern.hash(state);
self.case_insensitive.hash(state);
self.multiline.hash(state);
self.dot_all.hash(state);
}
}
pub struct RegexCache {
cache: Arc<Mutex<LruCache<RegexCacheKey, CompiledRegex>>>,
capacity: usize,
hits: AtomicU64,
misses: AtomicU64,
evictions: AtomicU64,
policy: Arc<dyn CachePolicy<RegexCacheKey>>,
fancy_backtrack_limit: usize,
}
impl RegexCache {
#[must_use]
pub fn new(capacity: usize) -> Self {
let (kind, window_ratio) = Self::policy_params_from_env();
Self::with_policy(capacity, kind, window_ratio)
}
pub fn get_or_compile(
&self,
pattern: &str,
case_insensitive: bool,
multiline: bool,
dot_all: bool,
) -> Result<CompiledRegex, RegexCompileError> {
let key = RegexCacheKey {
pattern: pattern.to_string(),
case_insensitive,
multiline,
dot_all,
};
self.handle_policy_evictions();
{
let mut cache = self.cache.lock().expect("regex cache mutex poisoned");
if let Some(regex) = cache.get(&key) {
self.hits.fetch_add(1, Ordering::Relaxed);
let _ = self.policy.record_hit(&key);
return Ok(regex.clone());
}
}
self.misses.fetch_add(1, Ordering::Relaxed);
let compiled = if has_lookaround(pattern) {
let mut flag_prefix = String::new();
if case_insensitive {
flag_prefix.push_str("(?i)");
}
if multiline {
flag_prefix.push_str("(?m)");
}
if dot_all {
flag_prefix.push_str("(?s)");
}
let full_pattern = format!("{flag_prefix}{pattern}");
let fancy_re = fancy_regex::RegexBuilder::new(&full_pattern)
.backtrack_limit(self.fancy_backtrack_limit)
.build()?;
CompiledRegex::Fancy(Arc::new(fancy_re))
} else {
let mut builder = regex::RegexBuilder::new(pattern);
builder
.case_insensitive(case_insensitive)
.multi_line(multiline)
.dot_matches_new_line(dot_all);
let re = builder.build()?;
CompiledRegex::Standard(Arc::new(re))
};
if matches!(self.policy.admit(&key, 1), CacheAdmission::Rejected) {
debug!(
"regex cache policy {:?} rejected pattern {:?}",
self.policy.kind(),
key.pattern
);
return Ok(compiled);
}
{
let mut cache = self.cache.lock().expect("regex cache mutex poisoned");
if cache.len() == self.capacity
&& let Some((evicted_key, _)) = cache.pop_lru()
{
self.policy.invalidate(&evicted_key);
self.evictions.fetch_add(1, Ordering::Relaxed);
}
cache.put(key, compiled.clone());
}
self.handle_policy_evictions();
Ok(compiled)
}
#[cfg(test)]
pub fn len(&self) -> usize {
self.cache.lock().expect("regex cache mutex poisoned").len()
}
#[cfg(test)]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
fn handle_policy_evictions(&self) {
let evicted = self.policy.drain_evictions();
if evicted.is_empty() {
return;
}
let mut cache = self.cache.lock().expect("regex cache mutex poisoned");
for eviction in evicted {
if cache.pop(&eviction.key).is_some() {
self.evictions.fetch_add(1, Ordering::Relaxed);
}
}
}
fn with_policy(capacity: usize, kind: CachePolicyKind, window_ratio: f32) -> Self {
let normalized_capacity = capacity.max(1);
let config = CachePolicyConfig::new(kind, normalized_capacity as u64, window_ratio);
Self {
cache: Arc::new(Mutex::new(LruCache::new(
NonZeroUsize::new(normalized_capacity).expect("capacity must be > 0"),
))),
capacity: normalized_capacity,
hits: AtomicU64::new(0),
misses: AtomicU64::new(0),
evictions: AtomicU64::new(0),
policy: build_cache_policy(&config),
fancy_backtrack_limit: FANCY_REGEX_BACKTRACK_LIMIT,
}
}
#[cfg(test)]
fn with_backtrack_limit(capacity: usize, limit: usize) -> Self {
let mut cache = Self::new(capacity);
cache.fancy_backtrack_limit = limit;
cache
}
fn policy_params_from_env() -> (CachePolicyKind, f32) {
let cfg = CacheConfig::from_env();
(cfg.policy_kind(), cfg.policy_window_ratio())
}
#[cfg(test)]
fn with_policy_kind(capacity: usize, kind: CachePolicyKind) -> Self {
Self::with_policy(capacity, kind, CacheConfig::DEFAULT_POLICY_WINDOW_RATIO)
}
#[cfg(test)]
fn policy_metrics(&self) -> crate::cache::policy::CachePolicyMetrics {
self.policy.stats()
}
}
static REGEX_CACHE: OnceLock<RegexCache> = OnceLock::new();
fn get_global_cache() -> &'static RegexCache {
REGEX_CACHE.get_or_init(|| {
let size = std::env::var("SQRY_REGEX_CACHE_SIZE")
.ok()
.and_then(|s| s.parse::<usize>().ok())
.filter(|&s| (1..=10_000).contains(&s))
.unwrap_or(100);
RegexCache::new(size)
})
}
pub fn get_or_compile_regex(
pattern: &str,
case_insensitive: bool,
multiline: bool,
dot_all: bool,
) -> Result<CompiledRegex, RegexCompileError> {
get_global_cache().get_or_compile(pattern, case_insensitive, multiline, dot_all)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cache::policy::CachePolicyKind;
#[test]
fn test_cache_hit_reuses_compiled_regex() {
let cache = RegexCache::new(10);
let re1 = cache.get_or_compile("foo.*", false, false, false).unwrap();
assert_eq!(cache.len(), 1);
let _re2 = cache.get_or_compile("foo.*", false, false, false).unwrap();
assert_eq!(cache.len(), 1);
assert!(re1.is_match("foobar").unwrap());
}
#[test]
fn test_different_flags_create_separate_entries() {
let cache = RegexCache::new(10);
let re1 = cache.get_or_compile("foo", false, false, false).unwrap();
let re2 = cache.get_or_compile("foo", true, false, false).unwrap();
assert_eq!(cache.len(), 2); assert!(re1.is_match("foo").unwrap());
assert!(!re1.is_match("FOO").unwrap()); assert!(re2.is_match("FOO").unwrap()); }
#[test]
fn test_lru_eviction_works() {
let cache = RegexCache::new(2);
cache.get_or_compile("a", false, false, false).unwrap();
cache.get_or_compile("b", false, false, false).unwrap();
assert_eq!(cache.len(), 2);
cache.get_or_compile("c", false, false, false).unwrap();
assert_eq!(cache.len(), 2);
}
#[test]
fn test_compilation_errors_not_cached() {
let cache = RegexCache::new(10);
assert!(
cache
.get_or_compile("[invalid", false, false, false)
.is_err()
);
assert_eq!(cache.len(), 0); }
#[test]
fn tiny_lfu_rejects_cold_bursts() {
let cache = RegexCache::with_policy_kind(3, CachePolicyKind::TinyLfu);
let hot = cache
.get_or_compile("hot", false, false, false)
.expect("compile hot regex");
for _ in 0..10 {
let _ = cache
.get_or_compile("hot", false, false, false)
.expect("warm hot regex");
}
for i in 0..30 {
let pattern = format!("cold{i}");
let _ = cache
.get_or_compile(&pattern, false, false, false)
.expect("compile cold regex");
}
let warmed = cache
.get_or_compile("hot", false, false, false)
.expect("retrieve hot regex");
assert!(hot.is_match("hot").unwrap());
assert!(warmed.is_match("hot").unwrap());
let metrics = cache.policy_metrics();
assert!(
metrics.lfu_rejects > 0,
"expected TinyLFU to reject some cold entries"
);
}
#[test]
fn test_lookahead_pattern_compiles() {
let cache = RegexCache::new(10);
let re = cache
.get_or_compile("foo(?=bar)", false, false, false)
.expect("lookahead should compile");
assert!(re.is_match("foobar").unwrap());
assert!(!re.is_match("foobaz").unwrap());
}
#[test]
fn test_lookbehind_pattern_compiles() {
let cache = RegexCache::new(10);
let re = cache
.get_or_compile("(?<=test_)foo", false, false, false)
.expect("lookbehind should compile");
assert!(re.is_match("test_foo").unwrap());
assert!(!re.is_match("prod_foo").unwrap());
}
#[test]
fn test_negative_lookahead_pattern() {
let cache = RegexCache::new(10);
let re = cache
.get_or_compile("foo(?!bar)", false, false, false)
.expect("negative lookahead should compile");
assert!(re.is_match("foobaz").unwrap());
assert!(!re.is_match("foobar").unwrap());
}
#[test]
fn test_negative_lookbehind_pattern() {
let cache = RegexCache::new(10);
let re = cache
.get_or_compile("(?<!test_)foo", false, false, false)
.expect("negative lookbehind should compile");
assert!(re.is_match("prod_foo").unwrap());
assert!(!re.is_match("test_foo").unwrap());
}
#[test]
fn test_lookaround_with_flags() {
let cache = RegexCache::new(10);
let re = cache
.get_or_compile("(?<=TEST_)foo", true, false, false)
.expect("lookaround with flags should compile");
assert!(re.is_match("TEST_foo").unwrap());
assert!(re.is_match("test_foo").unwrap()); assert!(re.is_match("TEST_FOO").unwrap()); }
#[test]
fn test_backtrack_limit_exceeded_returns_error() {
let re = fancy_regex::RegexBuilder::new("(?=a*)b")
.backtrack_limit(1)
.build()
.expect("pattern should compile");
let compiled = CompiledRegex::Fancy(Arc::new(re));
let result = compiled.is_match("aaa");
assert!(
result.is_err(),
"expected backtrack-limit error, got {result:?}"
);
}
#[test]
fn test_cache_compiled_regex_enforces_backtrack_limit() {
let cache = RegexCache::with_backtrack_limit(10, 1);
let re = cache
.get_or_compile("(?=a*)b", false, false, false)
.expect("lookahead should compile through cache");
assert!(
matches!(re, CompiledRegex::Fancy(_)),
"expected Fancy variant for lookaround pattern"
);
let result = re.is_match("aaa");
assert!(
result.is_err(),
"cache-compiled regex must enforce backtrack limit, got {result:?}"
);
}
}