use crate::engine::MatchEngine;
use crate::MatchError;
#[derive(Debug, Clone)]
pub struct PatternDef {
pub id: usize,
pub kind: PatternKind,
pub case_insensitive: bool,
}
#[derive(Debug, Clone)]
pub enum PatternKind {
Literal(String),
Regex(String),
}
pub struct PatternSet {
engine: MatchEngine,
pattern_count: usize,
}
impl crate::Scanner for PatternSet {
fn scan(&self, input: &[u8]) -> Vec<crate::MatchResult> {
self.engine.scan(input)
}
fn is_match(&self, input: &[u8]) -> bool {
self.engine.is_match(input)
}
fn pattern_count(&self) -> usize {
self.pattern_count
}
}
impl PatternSet {
pub fn builder() -> PatternSetBuilder {
PatternSetBuilder::new()
}
pub fn scan_str(&self, input: &str) -> Vec<crate::MatchResult> {
crate::Scanner::scan(self, input.as_bytes())
}
}
pub struct PatternSetBuilder {
patterns: Vec<PatternDef>,
}
impl PatternSetBuilder {
pub fn new() -> Self {
Self {
patterns: Vec::new(),
}
}
pub fn add_literal(mut self, literal: &str, id: usize) -> Self {
self.patterns.push(PatternDef {
id,
kind: PatternKind::Literal(literal.to_string()),
case_insensitive: false,
});
self
}
pub fn add_literal_ci(mut self, literal: &str, id: usize) -> Self {
self.patterns.push(PatternDef {
id,
kind: PatternKind::Literal(literal.to_string()),
case_insensitive: true,
});
self
}
pub fn add_regex(mut self, regex: &str, id: usize) -> Self {
self.patterns.push(PatternDef {
id,
kind: PatternKind::Regex(regex.to_string()),
case_insensitive: false,
});
self
}
pub fn add_regex_ci(mut self, regex: &str, id: usize) -> Self {
self.patterns.push(PatternDef {
id,
kind: PatternKind::Regex(regex.to_string()),
case_insensitive: true,
});
self
}
pub fn add(mut self, pattern: PatternDef) -> Self {
self.patterns.push(pattern);
self
}
pub fn build(self) -> Result<PatternSet, MatchError> {
if self.patterns.is_empty() {
return Err(MatchError::Empty);
}
let count = self.patterns.len();
let engine = MatchEngine::compile(self.patterns)?;
Ok(PatternSet {
engine,
pattern_count: count,
})
}
}
impl Default for PatternSetBuilder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Scanner;
#[test]
fn builder_empty_fails() {
assert!(PatternSetBuilder::new().build().is_err());
}
#[test]
fn builder_literal() {
let ps = PatternSet::builder()
.add_literal("hello", 0)
.build()
.unwrap();
assert_eq!(ps.pattern_count(), 1);
}
#[test]
fn builder_regex() {
let ps = PatternSet::builder()
.add_regex(r"\d+", 0)
.build()
.unwrap();
assert_eq!(ps.pattern_count(), 1);
}
#[test]
fn builder_mixed() {
let ps = PatternSet::builder()
.add_literal("token", 0)
.add_regex(r"[A-Z]{5}", 1)
.add_literal_ci("SECRET", 2)
.build()
.unwrap();
assert_eq!(ps.pattern_count(), 3);
}
#[test]
fn edge_case_empty_literal() {
let ps = PatternSet::builder()
.add_literal("", 0)
.build()
.unwrap();
assert_eq!(ps.pattern_count(), 1);
let _res = ps.scan_str("test");
}
#[test]
fn edge_case_large_pattern_count() {
let mut builder = PatternSet::builder();
for i in 0..100 {
builder = builder.add_literal(&format!("literal{}", i), i);
}
let ps = builder.build().unwrap();
assert_eq!(ps.pattern_count(), 100);
}
#[test]
fn edge_case_complex_regex() {
let ps = PatternSet::builder()
.add_regex(r"^(abc|def)*[0-9]+$", 99)
.build()
.unwrap();
assert_eq!(ps.pattern_count(), 1);
}
#[test]
fn edge_case_regex_ci_mixed() {
let ps = PatternSet::builder()
.add_regex_ci(r"[a-z]", 1)
.add_literal_ci("TeSt", 2)
.build()
.unwrap();
assert_eq!(ps.pattern_count(), 2);
}
#[test]
fn edge_case_multiple_same_id() {
let ps = PatternSet::builder()
.add_literal("foo", 10)
.add_literal("bar", 10)
.build()
.unwrap();
assert_eq!(ps.pattern_count(), 2);
let matches = ps.scan_str("foobar");
assert_eq!(matches.len(), 2);
assert_eq!(matches[0].pattern_id, 10);
assert_eq!(matches[1].pattern_id, 10);
}
#[test]
fn edge_case_scan_str_vs_bytes() {
let ps = PatternSet::builder()
.add_literal("rust", 1)
.build()
.unwrap();
let s = "learning rust is fun";
assert_eq!(ps.scan_str(s).len(), 1);
assert_eq!(ps.scan(s.as_bytes()).len(), 1);
assert!(ps.is_match(s.as_bytes()));
}
#[test]
fn edge_case_add_raw_pattern_def() {
let def = PatternDef {
id: 42,
kind: PatternKind::Literal("raw".to_string()),
case_insensitive: false,
};
let ps = PatternSet::builder().add(def).build().unwrap();
assert_eq!(ps.pattern_count(), 1);
}
#[test]
fn edge_case_special_chars_literal() {
let ps = PatternSet::builder()
.add_literal("!@#$%^&*()_+", 5)
.build()
.unwrap();
assert_eq!(ps.pattern_count(), 1);
let m = ps.scan_str("here is !@#$%^&*()_+ special");
assert_eq!(m.len(), 1);
}
#[test]
fn edge_case_special_chars_regex() {
let ps = PatternSet::builder()
.add_regex(r"\!\@#\$\%\^\&\*\(\)_\+", 5)
.build()
.unwrap();
assert_eq!(ps.pattern_count(), 1);
let m = ps.scan_str("here is !@#$%^&*()_+ special");
assert_eq!(m.len(), 1);
}
#[test]
fn edge_case_very_long_input_no_match() {
let ps = PatternSet::builder().add_literal("FINDME", 1).build().unwrap();
let input = "x".repeat(100_000);
assert!(!ps.is_match(input.as_bytes()));
}
#[test]
fn edge_case_default_builder() {
let builder = PatternSetBuilder::default();
assert!(builder.build().is_err());
}
}