multimatch 0.1.1

Multi-pattern matching engine — Aho-Corasick + regex with optional Hyperscan SIMD acceleration
Documentation
//! # santh-match
//!
//! Multi-pattern matching engine for security scanning.
//!
//! Every security tool needs to search text for many patterns simultaneously:
//! secret scanners match 1000+ credential regexes, vulnerability scanners match
//! response signatures, taint analyzers match sink patterns.
//!
//! This crate provides a unified interface with two backends:
//!
//! - **Aho-Corasick + regex** — always available, zero external dependencies
//! - **Hyperscan SIMD** — 3-5x faster, requires `libhs` (feature `simd`)
//!
//! # Usage
//!
//! ```rust
//! use multimatch::{PatternSet, MatchResult, Scanner};
//!
//! let patterns = PatternSet::builder()
//!     .add_literal("password", 0)
//!     .add_literal("secret", 1)
//!     .add_regex(r"[A-Za-z0-9]{32,}", 2)
//!     .build()
//!     .unwrap();
//!
//! let matches = patterns.scan(b"my password is abc123secretXYZ");
//! assert!(matches.iter().any(|m| m.pattern_id == 0)); // "password"
//! assert!(matches.iter().any(|m| m.pattern_id == 1)); // "secret"
//! ```



#![forbid(unsafe_code)]

#![warn(missing_docs)]

mod engine;
mod pattern;

#[cfg(test)]
mod adversarial_tests;

pub use engine::{MatchEngine, MatchResult};
pub use pattern::{PatternDef, PatternKind, PatternSet, PatternSetBuilder};

/// Trait for types that can scan inputs for multiple patterns.
pub trait Scanner {
    /// Scan a byte slice for all matching patterns.
    fn scan(&self, input: &[u8]) -> Vec<MatchResult>;
    
    /// Check if ANY pattern matches (short-circuits on first match).
    fn is_match(&self, input: &[u8]) -> bool;
    
    /// Number of compiled patterns.
    fn pattern_count(&self) -> usize;
}

/// Errors from pattern compilation or scanning.
#[derive(Debug, thiserror::Error)]
pub enum MatchError {
    /// A regex pattern failed to compile.
    #[error("invalid regex pattern {id}: {source}")]
    InvalidRegex {
        /// Pattern ID.
        id: usize,
        /// The regex error.
        source: regex::Error,
    },
    /// Aho-Corasick build failed.
    #[error("aho-corasick build failed: {0}")]
    AhoCorasick(String),
    /// No patterns provided.
    #[error("no patterns to compile")]
    Empty,
}

/// Convenience: compile a set of literal strings for matching.
///
/// Each literal gets an ID equal to its index in the slice.
pub fn from_literals(literals: &[&str]) -> Result<PatternSet, MatchError> {
    let mut builder = PatternSet::builder();
    for (i, lit) in literals.iter().enumerate() {
        builder = builder.add_literal(lit, i);
    }
    builder.build()
}

/// Convenience: compile a set of regex patterns for matching.
///
/// Each regex gets an ID equal to its index in the slice.
pub fn from_regexes(regexes: &[&str]) -> Result<PatternSet, MatchError> {
    let mut builder = PatternSet::builder();
    for (i, re) in regexes.iter().enumerate() {
        builder = builder.add_regex(re, i);
    }
    builder.build()
}

/// Convenience: compile a set of patterns given as (pattern, id) pairs.
///
/// Auto-detects whether each pattern is a literal or a regex based on the
/// presence of regex metacharacters.
pub fn from_pairs(pairs: &[(&str, usize)]) -> Result<PatternSet, MatchError> {
    let mut builder = PatternSet::builder();
    for &(pattern, id) in pairs {
        if pattern.chars().any(|c| ".*+?^${}()|[]\\".contains(c)) {
            builder = builder.add_regex(pattern, id);
        } else {
            builder = builder.add_literal(pattern, id);
        }
    }
    builder.build()
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::Scanner;

    #[test]
    fn from_literals_convenience() {
        let ps = from_literals(&["hello", "world"]).unwrap();
        let matches = ps.scan(b"hello world");
        assert_eq!(matches.len(), 2);
    }

    #[test]
    fn from_regexes_convenience() {
        let ps = from_regexes(&[r"\d+", r"[a-z]+"]).unwrap();
        let matches = ps.scan(b"abc123");
        assert!(matches.iter().any(|m| m.pattern_id == 0)); // digits
        assert!(matches.iter().any(|m| m.pattern_id == 1)); // letters
    }

    #[test]
    fn from_literals_empty_fails() {
        assert!(from_literals(&[]).is_err());
    }

    struct MockScanner;
    impl Scanner for MockScanner {
        fn scan(&self, _input: &[u8]) -> Vec<MatchResult> {
            vec![MatchResult { pattern_id: 42, start: 0, end: 1 }]
        }
        fn is_match(&self, _input: &[u8]) -> bool {
            true
        }
        fn pattern_count(&self) -> usize {
            1
        }
    }

    #[test]
    fn custom_scanner_impl() {
        let scanner = MockScanner;
        assert_eq!(scanner.pattern_count(), 1);
        assert!(scanner.is_match(b"foo"));
        let matches = scanner.scan(b"foo");
        assert_eq!(matches.len(), 1);
        assert_eq!(matches[0].pattern_id, 42);
    }
}

/// Convenience re-exports for common usage.
///
/// ```rust
/// use multimatch::prelude::*;
/// ```
pub mod prelude {
    pub use crate::{PatternSet, PatternSetBuilder, PatternDef, PatternKind, MatchResult, MatchError};
    pub use crate::{Scanner, from_literals, from_regexes};
}