radiate_utils/
regex.rs

1use crate::LruCache;
2use regex::{Regex, RegexBuilder};
3use std::cell::RefCell;
4
5#[macro_export]
6macro_rules! cached_regex {
7    () => {};
8    ($vis:vis static $name:ident = $regex:expr; $($rest:tt)*) => {
9        #[allow(clippy::disallowed_methods)]
10        $vis static $name: std::sync::LazyLock<regex::Regex> = std::sync::LazyLock::new(|| regex::Regex::new($regex).unwrap());
11        $crate::cached_regex!($($rest)*);
12    };
13}
14
15thread_local! {
16    static LOCAL_REGEX_CACHE: RefCell<RegexCache> = RefCell::new(RegexCache::new());
17}
18
19pub fn compile_regex(re: &str) -> Result<Regex, regex::Error> {
20    LOCAL_REGEX_CACHE.with_borrow_mut(|cache| cache.compile(re).cloned())
21}
22
23pub fn with_regex_cache<R, F: FnOnce(&mut RegexCache) -> R>(f: F) -> R {
24    LOCAL_REGEX_CACHE.with_borrow_mut(f)
25}
26
27fn get_size_limit() -> Option<usize> {
28    Some(
29        std::env::var("RADIATE_REGEX_SIZE_LIMIT")
30            .ok()
31            .filter(|l| !l.is_empty())?
32            .parse()
33            .expect("invalid RADIATE_REGEX_SIZE_LIMIT"),
34    )
35}
36
37/// A cache for compiled regular expressions.
38pub struct RegexCache {
39    cache: LruCache<String, Regex>,
40    size_limit: Option<usize>,
41}
42
43impl RegexCache {
44    fn new() -> Self {
45        Self {
46            cache: LruCache::with_capacity(32),
47            size_limit: get_size_limit(),
48        }
49    }
50
51    pub fn compile(&mut self, re: &str) -> Result<&Regex, regex::Error> {
52        let r = self.cache.try_get_or_insert_with(re, |re| {
53            // We do this little loop to only check RADIATE_REGEX_SIZE_LIMIT when
54            // a regex fails to compile due to the size limit.
55            loop {
56                let mut builder = RegexBuilder::new(re);
57                if let Some(bytes) = self.size_limit {
58                    builder.size_limit(bytes);
59                }
60                match builder.build() {
61                    err @ Err(regex::Error::CompiledTooBig(_)) => {
62                        let new_size_limit = get_size_limit();
63                        if new_size_limit != self.size_limit {
64                            self.size_limit = new_size_limit;
65                            continue; // Try to compile again.
66                        }
67                        break err;
68                    }
69                    r => break r,
70                };
71            }
72        });
73        Ok(&*r?)
74    }
75}
76
77#[cfg(test)]
78mod tests {
79    use super::RegexCache;
80
81    #[test]
82    fn caches_regexes() {
83        let mut cache = RegexCache::new();
84
85        let r1 = cache.compile(r"^\d+$").unwrap();
86        assert!(r1.is_match("123"));
87        let r1_ptr = r1 as *const _;
88
89        // Should hit cache
90        let r2 = cache.compile(r"^\d+$").unwrap();
91        assert!(std::ptr::eq(r1_ptr, r2 as *const _));
92
93        let r3 = cache.compile(r"^[a-z]+$").unwrap();
94        assert!(r3.is_match("abc"));
95        assert!(!r3.is_match("123"));
96    }
97}