vsec 0.0.1

Detect secrets and in Rust codebases
Documentation
//! Aho-Corasick multi-pattern matcher wrapper.
//!
//! Provides O(n) multi-pattern matching instead of O(n*m) iterative contains() calls.
//! The aho-corasick crate uses SIMD internally when available.

use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
use std::sync::OnceLock;

/// A compiled multi-pattern matcher using the Aho-Corasick algorithm.
///
/// This provides O(n) matching against multiple patterns simultaneously,
/// which is much faster than checking each pattern individually with contains().
#[derive(Clone)]
pub struct PatternMatcher {
    automaton: AhoCorasick,
    patterns: Vec<String>,
}

impl PatternMatcher {
    /// Build a new pattern matcher from a list of patterns.
    /// Patterns are matched case-insensitively.
    pub fn new<S: AsRef<str>>(patterns: &[S]) -> Self {
        let patterns: Vec<String> = patterns.iter().map(|s| s.as_ref().to_string()).collect();

        let automaton = AhoCorasickBuilder::new()
            .ascii_case_insensitive(true)
            .match_kind(MatchKind::LeftmostFirst)
            .build(&patterns)
            .expect("Failed to build Aho-Corasick automaton");

        Self { automaton, patterns }
    }

    /// Build a matcher from static string slices.
    pub fn from_static(patterns: &[&str]) -> Self {
        Self::new(patterns)
    }

    /// Check if the input contains any of the patterns.
    #[inline]
    pub fn is_match(&self, haystack: &str) -> bool {
        self.automaton.is_match(haystack)
    }

    /// Find the first matching pattern, returning its index.
    #[inline]
    pub fn find_first_index(&self, haystack: &str) -> Option<usize> {
        self.automaton
            .find(haystack)
            .map(|m| m.pattern().as_usize())
    }

    /// Find the first matching pattern string.
    #[inline]
    pub fn find_first(&self, haystack: &str) -> Option<&str> {
        self.find_first_index(haystack)
            .map(|idx| self.patterns[idx].as_str())
    }

    /// Find all matching pattern indices.
    pub fn find_all_indices(&self, haystack: &str) -> Vec<usize> {
        self.automaton
            .find_iter(haystack)
            .map(|m| m.pattern().as_usize())
            .collect()
    }

    /// Find all matching pattern strings.
    pub fn find_all(&self, haystack: &str) -> Vec<&str> {
        self.find_all_indices(haystack)
            .into_iter()
            .map(|idx| self.patterns[idx].as_str())
            .collect()
    }

    /// Get the pattern at a specific index.
    #[inline]
    pub fn pattern(&self, index: usize) -> Option<&str> {
        self.patterns.get(index).map(|s| s.as_str())
    }

    /// Get the number of patterns in this matcher.
    #[inline]
    pub fn len(&self) -> usize {
        self.patterns.len()
    }

    /// Check if this matcher has no patterns.
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.patterns.is_empty()
    }
}

impl std::fmt::Debug for PatternMatcher {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("PatternMatcher")
            .field("pattern_count", &self.patterns.len())
            .finish()
    }
}

/// Pre-built matchers for common use cases.
/// These are lazily initialized on first use.
pub mod prebuilt {
    use super::*;

    /// Logging function patterns.
    pub fn logging_functions() -> &'static PatternMatcher {
        static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
        MATCHER.get_or_init(|| {
            PatternMatcher::from_static(&[
                "print", "println", "eprint", "eprintln", "write", "writeln", "log", "trace",
                "debug", "info", "warn", "error", "emit", "record", "metric", "telemetry", "audit",
            ])
        })
    }

    /// Command-like variable name patterns.
    pub fn command_like() -> &'static PatternMatcher {
        static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
        MATCHER.get_or_init(|| {
            PatternMatcher::from_static(&[
                "cmd",
                "command",
                "action",
                "op",
                "operation",
                "verb",
                "method",
                "mode",
                "type",
                "kind",
                "variant",
                "arg",
                "args",
                "argument",
                "argv",
                "flag",
                "flags",
                "option",
                "opt",
                "route",
                "path",
                "endpoint",
                "uri",
                "url",
                "resource",
                "message_type",
                "msg_type",
                "event_type",
                "packet_type",
                "frame_type",
            ])
        })
    }

    /// Auth-like variable name patterns.
    pub fn auth_like() -> &'static PatternMatcher {
        static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
        MATCHER.get_or_init(|| {
            PatternMatcher::from_static(&[
                "token",
                "auth_token",
                "access_token",
                "bearer",
                "jwt",
                "session_token",
                "password",
                "passwd",
                "pwd",
                "pass",
                "secret",
                "credential",
                "credentials",
                "key",
                "api_key",
                "apikey",
                "secret_key",
                "private_key",
                "signing_key",
                "auth",
                "authorization",
                "authenticate",
                "hash",
                "digest",
                "signature",
            ])
        })
    }

    /// Input-like variable name patterns.
    pub fn input_like() -> &'static PatternMatcher {
        static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
        MATCHER.get_or_init(|| {
            PatternMatcher::from_static(&[
                "input",
                "stdin",
                "request",
                "req",
                "query",
                "param",
                "params",
                "body",
                "payload",
                "data",
                "content",
                "message",
                "msg",
                "buffer",
                "buf",
                "reader",
                "stream",
            ])
        })
    }

    /// Auth function patterns for consequence analysis.
    pub fn auth_functions() -> &'static PatternMatcher {
        static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
        MATCHER.get_or_init(|| {
            PatternMatcher::from_static(&[
                "authenticate",
                "authorize",
                "login",
                "logout",
                "sign_in",
                "sign_out",
                "verify",
                "validate",
                "check_auth",
                "check_permission",
                "grant",
                "revoke",
                "set_password",
                "change_password",
                "reset_password",
            ])
        })
    }

    /// Process control function patterns.
    pub fn process_functions() -> &'static PatternMatcher {
        static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
        MATCHER.get_or_init(|| {
            PatternMatcher::from_static(&[
                "exit", "abort", "panic", "terminate", "kill", "spawn", "exec", "fork",
            ])
        })
    }

    /// Auth field patterns for assignment detection.
    pub fn auth_fields() -> &'static PatternMatcher {
        static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
        MATCHER.get_or_init(|| {
            PatternMatcher::from_static(&[
                "authenticated",
                "authorized",
                "logged_in",
                "is_admin",
                "is_authenticated",
                "is_authorized",
                "has_permission",
                "access_granted",
                "valid",
                "verified",
                "session_valid",
                "token_valid",
                "auth_status",
                "permission",
                "role",
            ])
        })
    }

    /// Placeholder patterns.
    pub fn placeholders() -> &'static PatternMatcher {
        static MATCHER: OnceLock<PatternMatcher> = OnceLock::new();
        MATCHER.get_or_init(|| {
            PatternMatcher::from_static(&[
                "xxx",
                "your_",
                "replace",
                "changeme",
                "todo",
                "example",
                "placeholder",
                "dummy",
                "test",
                "<your",
                "[your",
                "${",
                "{{",
            ])
        })
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_pattern_matcher_basic() {
        let matcher = PatternMatcher::from_static(&["foo", "bar", "baz"]);
        assert!(matcher.is_match("contains foo here"));
        assert!(matcher.is_match("BAR uppercase"));
        assert!(!matcher.is_match("no match"));
    }

    #[test]
    fn test_case_insensitive() {
        let matcher = PatternMatcher::from_static(&["password"]);
        assert!(matcher.is_match("PASSWORD"));
        assert!(matcher.is_match("Password"));
        assert!(matcher.is_match("pAsSwOrD"));
    }

    #[test]
    fn test_find_first() {
        let matcher = PatternMatcher::from_static(&["alpha", "beta", "gamma"]);
        assert_eq!(matcher.find_first("test beta value"), Some("beta"));
        assert_eq!(matcher.find_first("no match"), None);
    }

    #[test]
    fn test_find_all() {
        let matcher = PatternMatcher::from_static(&["a", "b", "c"]);
        let matches = matcher.find_all("a b c a");
        assert!(matches.contains(&"a"));
        assert!(matches.contains(&"b"));
        assert!(matches.contains(&"c"));
    }

    #[test]
    fn test_prebuilt_logging() {
        let matcher = prebuilt::logging_functions();
        assert!(matcher.is_match("println"));
        assert!(matcher.is_match("log_error"));
        assert!(matcher.is_match("debug_info"));
    }

    #[test]
    fn test_prebuilt_auth_like() {
        let matcher = prebuilt::auth_like();
        assert!(matcher.is_match("user_password"));
        assert!(matcher.is_match("api_key_value"));
        assert!(matcher.is_match("AUTH_TOKEN"));
    }

    #[test]
    fn test_prebuilt_command_like() {
        let matcher = prebuilt::command_like();
        assert!(matcher.is_match("command_type"));
        assert!(matcher.is_match("request_method"));
        assert!(matcher.is_match("action_kind"));
    }

    #[test]
    fn test_empty_matcher() {
        let matcher = PatternMatcher::from_static(&[]);
        assert!(matcher.is_empty());
        assert!(!matcher.is_match("anything"));
    }
}