zeph_tools/compression/regex_safe.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! DoS-safe regex compilation (S6 fix).
5//!
6//! The `regex` crate has no compile-time deadline. `safe_compile` bounds compilation
7//! by running it in a `spawn_blocking` task raced against `tokio::time::timeout`.
8//!
9//! ## Thread cap (H2)
10//!
11//! `spawn_blocking` threads are not cancelled on timeout — they continue running until
12//! the pattern compiles or the blocking thread pool shuts down. To bound the maximum
13//! number of simultaneously live compile threads, a global `AtomicUsize` counter gates
14//! entry: if `MAX_COMPILE_TASKS` threads are already running, `safe_compile` returns
15//! [`CompressionError::CompileTimeout`] immediately without spawning a new one.
16
17use std::sync::atomic::{AtomicUsize, Ordering};
18use std::time::Duration;
19
20use super::CompressionError;
21
22/// Maximum number of regex compile tasks allowed in-flight simultaneously.
23const MAX_COMPILE_TASKS: usize = 4;
24
25static ACTIVE_COMPILE_TASKS: AtomicUsize = AtomicUsize::new(0);
26
27/// Compile a regex pattern with `DoS` protection.
28///
29/// Applies NFA size limit (64 KiB), DFA size limit (1 MiB), and a `timeout_ms`
30/// deadline enforced via `spawn_blocking` + `tokio::time::timeout`.
31///
32/// Returns [`CompressionError::CompileTimeout`] immediately when
33/// [`MAX_COMPILE_TASKS`] concurrent compilations are already in-flight.
34///
35/// On timeout or panic from the blocking task, returns a typed error that allows
36/// the evolver's failure counter to distinguish DoS-risk patterns from syntax errors.
37///
38/// # Errors
39///
40/// - [`CompressionError::BadPattern`] for syntax errors or task panics.
41/// - [`CompressionError::CompileTimeout`] when the in-flight limit is reached or
42/// compilation exceeds `timeout_ms`.
43pub async fn safe_compile(pat: &str, timeout_ms: u64) -> Result<regex::Regex, CompressionError> {
44 // Reject immediately if the thread cap is saturated.
45 let prev = ACTIVE_COMPILE_TASKS.fetch_add(1, Ordering::Relaxed);
46 if prev >= MAX_COMPILE_TASKS {
47 ACTIVE_COMPILE_TASKS.fetch_sub(1, Ordering::Relaxed);
48 return Err(CompressionError::CompileTimeout);
49 }
50
51 let pat = pat.to_owned();
52 let join = tokio::task::spawn_blocking(move || {
53 let result = regex::RegexBuilder::new(&pat)
54 .size_limit(64 * 1024)
55 .dfa_size_limit(1024 * 1024)
56 .build();
57 // Always decrement the counter when the blocking thread finishes.
58 ACTIVE_COMPILE_TASKS.fetch_sub(1, Ordering::Relaxed);
59 result
60 });
61
62 match tokio::time::timeout(Duration::from_millis(timeout_ms), join).await {
63 Err(_elapsed) => Err(CompressionError::CompileTimeout),
64 Ok(Err(_join_err)) => Err(CompressionError::BadPattern("compile task panicked".into())),
65 Ok(Ok(Err(regex_err))) => Err(CompressionError::BadPattern(regex_err.to_string())),
66 Ok(Ok(Ok(re))) => Ok(re),
67 }
68}
69
70#[cfg(test)]
71mod tests {
72 use super::*;
73
74 #[tokio::test]
75 async fn compiles_simple_pattern() {
76 let re = safe_compile(r"\d+", 500).await.unwrap();
77 assert!(re.is_match("123"));
78 }
79
80 #[tokio::test]
81 async fn rejects_invalid_pattern() {
82 let err = safe_compile(r"[invalid", 500).await.unwrap_err();
83 assert!(matches!(err, CompressionError::BadPattern(_)));
84 }
85}