use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
use std::sync::OnceLock;
#[derive(Clone)]
pub struct PatternMatcher {
automaton: AhoCorasick,
patterns: Vec<String>,
}
impl PatternMatcher {
pub fn new<S: AsRef<str>>(patterns: &[S]) -> Self {
let patterns: Vec<String> = patterns.iter().map(|s| s.as_ref().to_string()).collect();
let automaton = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.match_kind(MatchKind::LeftmostFirst)
.build(&patterns)
.expect("Failed to build Aho-Corasick automaton");
Self { automaton, patterns }
}
pub fn from_static(patterns: &[&str]) -> Self {
Self::new(patterns)
}
#[inline]
pub fn is_match(&self, haystack: &str) -> bool {
self.automaton.is_match(haystack)
}
#[inline]
pub fn find_first_index(&self, haystack: &str) -> Option<usize> {
self.automaton
.find(haystack)
.map(|m| m.pattern().as_usize())
}
#[inline]
pub fn find_first(&self, haystack: &str) -> Option<&str> {
self.find_first_index(haystack)
.map(|idx| self.patterns[idx].as_str())
}
pub fn find_all_indices(&self, haystack: &str) -> Vec<usize> {
self.automaton
.find_iter(haystack)
.map(|m| m.pattern().as_usize())
.collect()
}
pub fn find_all(&self, haystack: &str) -> Vec<&str> {
self.find_all_indices(haystack)
.into_iter()
.map(|idx| self.patterns[idx].as_str())
.collect()
}
#[inline]
pub fn pattern(&self, index: usize) -> Option<&str> {
self.patterns.get(index).map(|s| s.as_str())
}
#[inline]
pub fn len(&self) -> usize {
self.patterns.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.patterns.is_empty()
}
}
impl std::fmt::Debug for PatternMatcher {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PatternMatcher")
.field("pattern_count", &self.patterns.len())
.finish()
}
}
pub mod prebuilt {
use super::*;
pub fn logging_functions() -> &'static PatternMatcher {
static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
MATCHER.get_or_init(|| {
PatternMatcher::from_static(&[
"print", "println", "eprint", "eprintln", "write", "writeln", "log", "trace",
"debug", "info", "warn", "error", "emit", "record", "metric", "telemetry", "audit",
])
})
}
pub fn command_like() -> &'static PatternMatcher {
static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
MATCHER.get_or_init(|| {
PatternMatcher::from_static(&[
"cmd",
"command",
"action",
"op",
"operation",
"verb",
"method",
"mode",
"type",
"kind",
"variant",
"arg",
"args",
"argument",
"argv",
"flag",
"flags",
"option",
"opt",
"route",
"path",
"endpoint",
"uri",
"url",
"resource",
"message_type",
"msg_type",
"event_type",
"packet_type",
"frame_type",
])
})
}
pub fn auth_like() -> &'static PatternMatcher {
static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
MATCHER.get_or_init(|| {
PatternMatcher::from_static(&[
"token",
"auth_token",
"access_token",
"bearer",
"jwt",
"session_token",
"password",
"passwd",
"pwd",
"pass",
"secret",
"credential",
"credentials",
"key",
"api_key",
"apikey",
"secret_key",
"private_key",
"signing_key",
"auth",
"authorization",
"authenticate",
"hash",
"digest",
"signature",
])
})
}
pub fn input_like() -> &'static PatternMatcher {
static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
MATCHER.get_or_init(|| {
PatternMatcher::from_static(&[
"input",
"stdin",
"request",
"req",
"query",
"param",
"params",
"body",
"payload",
"data",
"content",
"message",
"msg",
"buffer",
"buf",
"reader",
"stream",
])
})
}
pub fn auth_functions() -> &'static PatternMatcher {
static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
MATCHER.get_or_init(|| {
PatternMatcher::from_static(&[
"authenticate",
"authorize",
"login",
"logout",
"sign_in",
"sign_out",
"verify",
"validate",
"check_auth",
"check_permission",
"grant",
"revoke",
"set_password",
"change_password",
"reset_password",
])
})
}
pub fn process_functions() -> &'static PatternMatcher {
static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
MATCHER.get_or_init(|| {
PatternMatcher::from_static(&[
"exit", "abort", "panic", "terminate", "kill", "spawn", "exec", "fork",
])
})
}
pub fn auth_fields() -> &'static PatternMatcher {
static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
MATCHER.get_or_init(|| {
PatternMatcher::from_static(&[
"authenticated",
"authorized",
"logged_in",
"is_admin",
"is_authenticated",
"is_authorized",
"has_permission",
"access_granted",
"valid",
"verified",
"session_valid",
"token_valid",
"auth_status",
"permission",
"role",
])
})
}
pub fn placeholders() -> &'static PatternMatcher {
static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
MATCHER.get_or_init(|| {
PatternMatcher::from_static(&[
"xxx",
"your_",
"replace",
"changeme",
"todo",
"example",
"placeholder",
"dummy",
"test",
"<your",
"[your",
"${",
"{{",
])
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_pattern_matcher_basic() {
let matcher = PatternMatcher::from_static(&["foo", "bar", "baz"]);
assert!(matcher.is_match("contains foo here"));
assert!(matcher.is_match("BAR uppercase"));
assert!(!matcher.is_match("no match"));
}
#[test]
fn test_case_insensitive() {
let matcher = PatternMatcher::from_static(&["password"]);
assert!(matcher.is_match("PASSWORD"));
assert!(matcher.is_match("Password"));
assert!(matcher.is_match("pAsSwOrD"));
}
#[test]
fn test_find_first() {
let matcher = PatternMatcher::from_static(&["alpha", "beta", "gamma"]);
assert_eq!(matcher.find_first("test beta value"), Some("beta"));
assert_eq!(matcher.find_first("no match"), None);
}
#[test]
fn test_find_all() {
let matcher = PatternMatcher::from_static(&["a", "b", "c"]);
let matches = matcher.find_all("a b c a");
assert!(matches.contains(&"a"));
assert!(matches.contains(&"b"));
assert!(matches.contains(&"c"));
}
#[test]
fn test_prebuilt_logging() {
let matcher = prebuilt::logging_functions();
assert!(matcher.is_match("println"));
assert!(matcher.is_match("log_error"));
assert!(matcher.is_match("debug_info"));
}
#[test]
fn test_prebuilt_auth_like() {
let matcher = prebuilt::auth_like();
assert!(matcher.is_match("user_password"));
assert!(matcher.is_match("api_key_value"));
assert!(matcher.is_match("AUTH_TOKEN"));
}
#[test]
fn test_prebuilt_command_like() {
let matcher = prebuilt::command_like();
assert!(matcher.is_match("command_type"));
assert!(matcher.is_match("request_method"));
assert!(matcher.is_match("action_kind"));
}
#[test]
fn test_empty_matcher() {
let matcher = PatternMatcher::from_static(&[]);
assert!(matcher.is_empty());
assert!(!matcher.is_match("anything"));
}
}