use std::{
collections::HashMap,
sync::{Arc, LazyLock, OnceLock},
};
use anyhow::Result;
use regex::Regex;
struct LazyRegex {
regex: OnceLock<Result<Regex, regex::Error>>,
regex_str: &'static str,
pattern_name: &'static str,
}
impl LazyRegex {
fn new(regex_str: &'static str, pattern_name: &'static str) -> Arc<Self> {
Arc::new(Self {
regex: OnceLock::new(),
regex_str,
pattern_name,
})
}
fn get(&self) -> Result<&Regex, regex::Error> {
let result = self.regex.get_or_init(|| {
let start = std::time::Instant::now();
let compile_result = Regex::new(self.regex_str);
let elapsed = start.elapsed();
if tracing::enabled!(tracing::Level::TRACE) {
match &compile_result {
Ok(_) => tracing::trace!(
"Compiled regex for pattern '{}' in {:?}",
self.pattern_name,
elapsed
),
Err(e) => tracing::trace!(
"Failed to compile regex for pattern '{}': {}",
self.pattern_name,
e
),
}
}
compile_result
});
match result {
Ok(r) => Ok(r),
Err(e) => Err(e.clone()),
}
}
}
#[derive(Clone)]
pub struct CompiledPattern {
pub index: usize,
pub name: Arc<str>,
regex: Arc<LazyRegex>,
pub description: Arc<str>,
pub keywords: Vec<String>,
pub priority: u8,
pub entropy_threshold: Option<f32>,
}
impl CompiledPattern {
pub fn get_regex(&self) -> Result<&Regex> {
self.regex
.get()
.map_err(|e| anyhow::anyhow!("Regex compilation failed for '{}': {}", self.name, e))
}
#[cfg(test)]
pub fn test_pattern(
name: &'static str,
regex_str: &'static str,
description: &'static str,
) -> Self {
Self {
index: 0,
name: name.into(),
regex: LazyRegex::new(regex_str, name),
description: description.into(),
keywords: vec![],
priority: 5,
entropy_threshold: None,
}
}
}
impl std::fmt::Debug for CompiledPattern {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("CompiledPattern")
.field("index", &self.index)
.field("name", &self.name)
.field("description", &self.description)
.field("keywords", &self.keywords)
.field("priority", &self.priority)
.field("entropy_threshold", &self.entropy_threshold)
.finish()
}
}
pub struct PatternLibrary {
patterns: Vec<CompiledPattern>,
keywords: Vec<String>,
pattern_map: HashMap<usize, Arc<CompiledPattern>>,
}
impl PatternLibrary {
fn new() -> Result<Self> {
let start = std::time::Instant::now();
let mut all_patterns = Vec::new();
let mut keywords = Vec::new();
let mut pattern_map = HashMap::new();
use super::base_patterns::BASE_PATTERNS;
for (index, base_pattern) in BASE_PATTERNS.iter().enumerate() {
let compiled = Self::compile_base_pattern(index, base_pattern)?;
keywords.extend(compiled.keywords.clone());
let arc_pattern = Arc::new(compiled.clone());
pattern_map.insert(index, arc_pattern);
all_patterns.push(compiled);
}
all_patterns.sort_by(|a, b| b.priority.cmp(&a.priority));
keywords.sort();
keywords.dedup();
tracing::debug!(
"Pattern library initialized with {} patterns in {:?}",
all_patterns.len(),
start.elapsed()
);
Ok(Self {
patterns: all_patterns,
keywords,
pattern_map,
})
}
fn compile_base_pattern(
index: usize,
base: &super::base_patterns::BasePattern,
) -> Result<CompiledPattern> {
let entropy_threshold = match base.name {
"UUID Identifier" => Some(1e-30), _ => None, };
Ok(CompiledPattern {
index,
name: Arc::from(base.name),
regex: LazyRegex::new(base.regex, base.name),
description: Arc::from(base.description),
keywords: base.keywords.iter().map(|&s| s.to_string()).collect(),
priority: base.priority,
entropy_threshold,
})
}
pub fn patterns(&self) -> &[CompiledPattern] {
&self.patterns
}
pub fn keywords(&self) -> &[String] {
&self.keywords
}
pub fn get_pattern(&self, index: usize) -> Option<Arc<CompiledPattern>> {
self.pattern_map.get(&index).cloned()
}
pub fn count(&self) -> usize {
self.patterns.len()
}
}
pub static PATTERN_LIBRARY: LazyLock<Arc<PatternLibrary>> = LazyLock::new(|| {
Arc::new(PatternLibrary::new().expect("Failed to initialize pattern library - this is fatal"))
});
pub fn get_pattern_library() -> Arc<PatternLibrary> {
PATTERN_LIBRARY.clone()
}