#![forbid(unsafe_code)]
#![warn(missing_docs)]
mod engine;
mod pattern;
#[cfg(test)]
mod adversarial_tests;
pub use engine::{MatchEngine, MatchResult};
pub use pattern::{PatternDef, PatternKind, PatternSet, PatternSetBuilder};
pub trait Scanner {
fn scan(&self, input: &[u8]) -> Vec<MatchResult>;
fn is_match(&self, input: &[u8]) -> bool;
fn pattern_count(&self) -> usize;
}
#[derive(Debug, thiserror::Error)]
pub enum MatchError {
#[error("invalid regex pattern {id}: {source}")]
InvalidRegex {
id: usize,
source: regex::Error,
},
#[error("aho-corasick build failed: {0}")]
AhoCorasick(String),
#[error("no patterns to compile")]
Empty,
}
pub fn from_literals(literals: &[&str]) -> Result<PatternSet, MatchError> {
let mut builder = PatternSet::builder();
for (i, lit) in literals.iter().enumerate() {
builder = builder.add_literal(lit, i);
}
builder.build()
}
pub fn from_regexes(regexes: &[&str]) -> Result<PatternSet, MatchError> {
let mut builder = PatternSet::builder();
for (i, re) in regexes.iter().enumerate() {
builder = builder.add_regex(re, i);
}
builder.build()
}
pub fn from_pairs(pairs: &[(&str, usize)]) -> Result<PatternSet, MatchError> {
let mut builder = PatternSet::builder();
for &(pattern, id) in pairs {
if pattern.chars().any(|c| ".*+?^${}()|[]\\".contains(c)) {
builder = builder.add_regex(pattern, id);
} else {
builder = builder.add_literal(pattern, id);
}
}
builder.build()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Scanner;
#[test]
fn from_literals_convenience() {
let ps = from_literals(&["hello", "world"]).unwrap();
let matches = ps.scan(b"hello world");
assert_eq!(matches.len(), 2);
}
#[test]
fn from_regexes_convenience() {
let ps = from_regexes(&[r"\d+", r"[a-z]+"]).unwrap();
let matches = ps.scan(b"abc123");
assert!(matches.iter().any(|m| m.pattern_id == 0)); assert!(matches.iter().any(|m| m.pattern_id == 1)); }
#[test]
fn from_literals_empty_fails() {
assert!(from_literals(&[]).is_err());
}
struct MockScanner;
impl Scanner for MockScanner {
fn scan(&self, _input: &[u8]) -> Vec<MatchResult> {
vec![MatchResult { pattern_id: 42, start: 0, end: 1 }]
}
fn is_match(&self, _input: &[u8]) -> bool {
true
}
fn pattern_count(&self) -> usize {
1
}
}
#[test]
fn custom_scanner_impl() {
let scanner = MockScanner;
assert_eq!(scanner.pattern_count(), 1);
assert!(scanner.is_match(b"foo"));
let matches = scanner.scan(b"foo");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pattern_id, 42);
}
}
pub mod prelude {
pub use crate::{PatternSet, PatternSetBuilder, PatternDef, PatternKind, MatchResult, MatchError};
pub use crate::{Scanner, from_literals, from_regexes};
}