use crate::filters::network::{NetworkFilterMask, NetworkFilterMaskHelper};
use regex::{
bytes::Regex as BytesRegex, bytes::RegexBuilder as BytesRegexBuilder,
bytes::RegexSet as BytesRegexSet, bytes::RegexSetBuilder as BytesRegexSetBuilder, Regex,
};
use std::sync::LazyLock;
use std::collections::HashMap;
use std::fmt;
use std::time::Duration;
#[cfg(test)]
#[cfg(not(target_arch = "wasm32"))]
use mock_instant::thread_local::Instant;
#[cfg(not(test))]
#[cfg(not(target_arch = "wasm32"))]
use std::time::Instant;
#[cfg(target_arch = "wasm32")]
#[derive(Clone, Copy)]
pub struct Instant;
#[cfg(target_arch = "wasm32")]
impl Instant {
pub fn now() -> Self {
Self
}
}
unsafe impl Send for RegexManager {}
const DEFAULT_CLEAN_UP_INTERVAL: Duration = Duration::from_secs(30);
const DEFAULT_DISCARD_UNUSED_TIME: Duration = Duration::from_secs(180);
#[cfg(feature = "debug-info")]
pub struct RegexDebugInfo {
pub regex_data: Vec<RegexDebugEntry>,
pub compiled_regex_count: usize,
}
#[cfg(feature = "debug-info")]
pub struct RegexDebugEntry {
pub id: u64,
pub regex: Option<String>,
pub last_used: Instant,
pub usage_count: usize,
}
#[derive(Debug, Clone)]
pub enum CompiledRegex {
Compiled(BytesRegex),
CompiledSet(BytesRegexSet),
MatchAll,
RegexParsingError(regex::Error),
}
impl CompiledRegex {
pub fn is_match(&self, pattern: &str) -> bool {
match &self {
CompiledRegex::MatchAll => true, CompiledRegex::RegexParsingError(_e) => false, CompiledRegex::Compiled(r) => r.is_match(pattern.as_bytes()),
CompiledRegex::CompiledSet(r) => {
r.is_match(pattern.as_bytes())
}
}
}
}
impl fmt::Display for CompiledRegex {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match &self {
CompiledRegex::MatchAll => write!(f, ".*"), CompiledRegex::RegexParsingError(_e) => write!(f, "ERROR"), CompiledRegex::Compiled(r) => write!(f, "{}", r.as_str()),
CompiledRegex::CompiledSet(r) => write!(f, "{}", r.patterns().join(" | ")),
}
}
}
struct RegexEntry {
regex: Option<CompiledRegex>,
last_used: Instant,
usage_count: usize,
}
pub struct RegexManagerDiscardPolicy {
pub cleanup_interval: Duration,
pub discard_unused_time: Duration,
}
impl Default for RegexManagerDiscardPolicy {
fn default() -> Self {
Self {
cleanup_interval: DEFAULT_CLEAN_UP_INTERVAL,
discard_unused_time: DEFAULT_DISCARD_UNUSED_TIME,
}
}
}
type RandomState = std::hash::BuildHasherDefault<seahash::SeaHasher>;
pub struct RegexManager {
map: HashMap<u64, RegexEntry, RandomState>,
compiled_regex_count: usize,
now: Instant,
#[cfg_attr(target_arch = "wasm32", allow(unused))]
last_cleanup: Instant,
discard_policy: RegexManagerDiscardPolicy,
}
impl Default for RegexManager {
fn default() -> Self {
Self {
map: Default::default(),
compiled_regex_count: 0,
now: Instant::now(),
last_cleanup: Instant::now(),
discard_policy: Default::default(),
}
}
}
fn make_regexp<'a, FiltersIter>(mask: NetworkFilterMask, filters: FiltersIter) -> CompiledRegex
where
FiltersIter: Iterator<Item = &'a str> + ExactSizeIterator,
{
compile_regex(
filters,
mask.is_right_anchor(),
mask.is_left_anchor(),
mask.is_complete_regex(),
)
}
#[allow(clippy::trivial_regex)]
pub(crate) fn compile_regex<'a, I>(
filters: I,
is_right_anchor: bool,
is_left_anchor: bool,
is_complete_regex: bool,
) -> CompiledRegex
where
I: Iterator<Item = &'a str> + ExactSizeIterator,
{
static SPECIAL_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"([\|\.\$\+\?\{\}\(\)\[\]])").unwrap());
static WILDCARD_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*").unwrap());
static ANCHOR_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\^(.)").unwrap());
static ANCHOR_RE_EOL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\^$").unwrap());
let mut escaped_patterns = Vec::with_capacity(filters.len());
for filter_str in filters {
if filter_str.is_empty() {
return CompiledRegex::MatchAll;
}
if is_complete_regex {
let unescaped = filter_str[1..filter_str.len() - 1]
.replace("\\/", "/")
.replace("\\:", ":");
escaped_patterns.push(unescaped);
} else {
let repl = SPECIAL_RE.replace_all(filter_str, "\\$1");
let repl = WILDCARD_RE.replace_all(&repl, ".*");
let repl = ANCHOR_RE.replace_all(&repl, "(?:[^\\w\\d\\._%-])$1");
let repl = ANCHOR_RE_EOL.replace_all(&repl, "(?:[^\\w\\d\\._%-]|$)");
let left_anchor = if is_left_anchor { "^" } else { "" };
let right_anchor = if is_right_anchor { "$" } else { "" };
let filter = format!("{left_anchor}{repl}{right_anchor}");
escaped_patterns.push(filter);
}
}
if escaped_patterns.is_empty() {
CompiledRegex::MatchAll
} else if escaped_patterns.len() == 1 {
let pattern = &escaped_patterns[0];
match BytesRegexBuilder::new(pattern).unicode(false).build() {
Ok(compiled) => CompiledRegex::Compiled(compiled),
Err(e) => {
CompiledRegex::RegexParsingError(e)
}
}
} else {
match BytesRegexSetBuilder::new(escaped_patterns)
.unicode(false)
.build()
{
Ok(compiled) => CompiledRegex::CompiledSet(compiled),
Err(e) => CompiledRegex::RegexParsingError(e),
}
}
}
impl RegexManager {
pub fn matches<'a, FiltersIter>(
&mut self,
mask: NetworkFilterMask,
filters: FiltersIter,
key: u64,
pattern: &str,
) -> bool
where
FiltersIter: Iterator<Item = &'a str> + ExactSizeIterator,
{
if !mask.is_regex() && !mask.is_complete_regex() {
return true;
}
use std::collections::hash_map::Entry;
match self.map.entry(key) {
Entry::Occupied(mut e) => {
let v = e.get_mut();
v.usage_count += 1;
v.last_used = self.now;
if v.regex.is_none() {
v.regex = Some(make_regexp(mask, filters));
self.compiled_regex_count += 1;
}
v.regex.as_ref().unwrap().is_match(pattern)
}
Entry::Vacant(e) => {
self.compiled_regex_count += 1;
let new_entry = RegexEntry {
regex: Some(make_regexp(mask, filters)),
last_used: self.now,
usage_count: 1,
};
e.insert(new_entry)
.regex
.as_ref()
.unwrap()
.is_match(pattern)
}
}
}
#[cfg(not(target_arch = "wasm32"))]
pub fn update_time(&mut self) {
self.now = Instant::now();
if !self.discard_policy.cleanup_interval.is_zero()
&& self.now - self.last_cleanup >= self.discard_policy.cleanup_interval
{
self.last_cleanup = self.now;
self.cleanup();
}
}
#[cfg(not(target_arch = "wasm32"))]
pub(crate) fn cleanup(&mut self) {
let now = self.now;
for v in self.map.values_mut() {
if now - v.last_used >= self.discard_policy.discard_unused_time {
v.regex = None;
}
}
}
pub fn set_discard_policy(&mut self, new_discard_policy: RegexManagerDiscardPolicy) {
self.discard_policy = new_discard_policy;
}
#[cfg(feature = "debug-info")]
pub fn discard_regex(&mut self, regex_id: u64) {
self.map
.iter_mut()
.filter(|(k, _)| { **k } == regex_id)
.for_each(|(_, v)| {
v.regex = None;
});
}
#[cfg(feature = "debug-info")]
pub(crate) fn get_debug_regex_data(&self) -> Vec<RegexDebugEntry> {
use itertools::Itertools;
self.map
.iter()
.map(|(k, e)| RegexDebugEntry {
id: { *k },
regex: e.regex.as_ref().map(|x| x.to_string()),
last_used: e.last_used,
usage_count: e.usage_count,
})
.collect_vec()
}
#[cfg(feature = "debug-info")]
pub(crate) fn get_compiled_regex_count(&self) -> usize {
self.compiled_regex_count
}
#[cfg(feature = "debug-info")]
pub fn get_debug_info(&self) -> RegexDebugInfo {
RegexDebugInfo {
regex_data: self.get_debug_regex_data(),
compiled_regex_count: self.get_compiled_regex_count(),
}
}
}
#[cfg(test)]
#[path = "../tests/unit/regex_manager.rs"]
mod unit_tests;