Skip to main content

zeph_tools/compression/
regex_safe.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! DoS-safe regex compilation (S6 fix).
5//!
6//! The `regex` crate has no compile-time deadline. `safe_compile` bounds compilation
7//! by running it in a `spawn_blocking` task raced against `tokio::time::timeout`.
8//!
9//! ## Thread cap (H2)
10//!
11//! `spawn_blocking` threads are not cancelled on timeout — they continue running until
12//! the pattern compiles or the blocking thread pool shuts down. To bound the maximum
13//! number of simultaneously live compile threads, a global `AtomicUsize` counter gates
14//! entry: if `MAX_COMPILE_TASKS` threads are already running, `safe_compile` returns
15//! [`CompressionError::CompileTimeout`] immediately without spawning a new one.
16
17use std::sync::atomic::{AtomicUsize, Ordering};
18use std::time::Duration;
19
20use super::CompressionError;
21
22/// Maximum number of regex compile tasks allowed in-flight simultaneously.
23const MAX_COMPILE_TASKS: usize = 4;
24
25static ACTIVE_COMPILE_TASKS: AtomicUsize = AtomicUsize::new(0);
26
27/// Compile a regex pattern with `DoS` protection.
28///
29/// Applies NFA size limit (64 KiB), DFA size limit (1 MiB), and a `timeout_ms`
30/// deadline enforced via `spawn_blocking` + `tokio::time::timeout`.
31///
32/// Returns [`CompressionError::CompileTimeout`] immediately when
33/// [`MAX_COMPILE_TASKS`] concurrent compilations are already in-flight.
34///
35/// On timeout or panic from the blocking task, returns a typed error that allows
36/// the evolver's failure counter to distinguish DoS-risk patterns from syntax errors.
37///
38/// # Errors
39///
40/// - [`CompressionError::BadPattern`] for syntax errors or task panics.
41/// - [`CompressionError::CompileTimeout`] when the in-flight limit is reached or
42///   compilation exceeds `timeout_ms`.
43pub async fn safe_compile(pat: &str, timeout_ms: u64) -> Result<regex::Regex, CompressionError> {
44    // Reject immediately if the thread cap is saturated.
45    let prev = ACTIVE_COMPILE_TASKS.fetch_add(1, Ordering::Relaxed);
46    if prev >= MAX_COMPILE_TASKS {
47        ACTIVE_COMPILE_TASKS.fetch_sub(1, Ordering::Relaxed);
48        return Err(CompressionError::CompileTimeout);
49    }
50
51    let pat = pat.to_owned();
52    let join = tokio::task::spawn_blocking(move || {
53        let result = regex::RegexBuilder::new(&pat)
54            .size_limit(64 * 1024)
55            .dfa_size_limit(1024 * 1024)
56            .build();
57        // Always decrement the counter when the blocking thread finishes.
58        ACTIVE_COMPILE_TASKS.fetch_sub(1, Ordering::Relaxed);
59        result
60    });
61
62    match tokio::time::timeout(Duration::from_millis(timeout_ms), join).await {
63        Err(_elapsed) => Err(CompressionError::CompileTimeout),
64        Ok(Err(_join_err)) => Err(CompressionError::BadPattern("compile task panicked".into())),
65        Ok(Ok(Err(regex_err))) => Err(CompressionError::BadPattern(regex_err.to_string())),
66        Ok(Ok(Ok(re))) => Ok(re),
67    }
68}
69
70#[cfg(test)]
71mod tests {
72    use super::*;
73
74    #[tokio::test]
75    async fn compiles_simple_pattern() {
76        let re = safe_compile(r"\d+", 500).await.unwrap();
77        assert!(re.is_match("123"));
78    }
79
80    #[tokio::test]
81    async fn rejects_invalid_pattern() {
82        let err = safe_compile(r"[invalid", 500).await.unwrap_err();
83        assert!(matches!(err, CompressionError::BadPattern(_)));
84    }
85}