1use crate::engine::MatchEngine;
4use crate::MatchError;
5
6#[derive(Debug, Clone)]
8pub struct PatternDef {
9 pub id: usize,
11 pub kind: PatternKind,
13 pub case_insensitive: bool,
15}
16
17#[derive(Debug, Clone)]
19pub enum PatternKind {
20 Literal(String),
22 Regex(String),
24}
25
26pub struct PatternSet {
28 engine: MatchEngine,
29 pattern_count: usize,
30}
31
32impl crate::Scanner for PatternSet {
33 fn scan(&self, input: &[u8]) -> Vec<crate::MatchResult> {
34 self.engine.scan(input)
35 }
36
37 fn is_match(&self, input: &[u8]) -> bool {
38 self.engine.is_match(input)
39 }
40
41 fn pattern_count(&self) -> usize {
42 self.pattern_count
43 }
44}
45
46impl PatternSet {
47 pub fn builder() -> PatternSetBuilder {
49 PatternSetBuilder::new()
50 }
51
52 pub fn scan_str(&self, input: &str) -> Vec<crate::MatchResult> {
54 crate::Scanner::scan(self, input.as_bytes())
55 }
56}
57
58pub struct PatternSetBuilder {
60 patterns: Vec<PatternDef>,
61}
62
63impl PatternSetBuilder {
64 pub fn new() -> Self {
66 Self {
67 patterns: Vec::new(),
68 }
69 }
70
71 pub fn add_literal(mut self, literal: &str, id: usize) -> Self {
73 self.patterns.push(PatternDef {
74 id,
75 kind: PatternKind::Literal(literal.to_string()),
76 case_insensitive: false,
77 });
78 self
79 }
80
81 pub fn add_literal_ci(mut self, literal: &str, id: usize) -> Self {
83 self.patterns.push(PatternDef {
84 id,
85 kind: PatternKind::Literal(literal.to_string()),
86 case_insensitive: true,
87 });
88 self
89 }
90
91 pub fn add_regex(mut self, regex: &str, id: usize) -> Self {
93 self.patterns.push(PatternDef {
94 id,
95 kind: PatternKind::Regex(regex.to_string()),
96 case_insensitive: false,
97 });
98 self
99 }
100
101 pub fn add_regex_ci(mut self, regex: &str, id: usize) -> Self {
103 self.patterns.push(PatternDef {
104 id,
105 kind: PatternKind::Regex(regex.to_string()),
106 case_insensitive: true,
107 });
108 self
109 }
110
111 pub fn add(mut self, pattern: PatternDef) -> Self {
113 self.patterns.push(pattern);
114 self
115 }
116
117 pub fn build(self) -> Result<PatternSet, MatchError> {
119 if self.patterns.is_empty() {
120 return Err(MatchError::Empty);
121 }
122 let count = self.patterns.len();
123 let engine = MatchEngine::compile(self.patterns)?;
124 Ok(PatternSet {
125 engine,
126 pattern_count: count,
127 })
128 }
129}
130
131impl Default for PatternSetBuilder {
132 fn default() -> Self {
133 Self::new()
134 }
135}
136
137#[cfg(test)]
138mod tests {
139 use super::*;
140 use crate::Scanner;
141
142 #[test]
143 fn builder_empty_fails() {
144 assert!(PatternSetBuilder::new().build().is_err());
145 }
146
147 #[test]
148 fn builder_literal() {
149 let ps = PatternSet::builder()
150 .add_literal("hello", 0)
151 .build()
152 .unwrap();
153 assert_eq!(ps.pattern_count(), 1);
154 }
155
156 #[test]
157 fn builder_regex() {
158 let ps = PatternSet::builder()
159 .add_regex(r"\d+", 0)
160 .build()
161 .unwrap();
162 assert_eq!(ps.pattern_count(), 1);
163 }
164
165 #[test]
166 fn builder_mixed() {
167 let ps = PatternSet::builder()
168 .add_literal("token", 0)
169 .add_regex(r"[A-Z]{5}", 1)
170 .add_literal_ci("SECRET", 2)
171 .build()
172 .unwrap();
173 assert_eq!(ps.pattern_count(), 3);
174 }
175
176 #[test]
177 fn edge_case_empty_literal() {
178 let ps = PatternSet::builder()
179 .add_literal("", 0)
180 .build()
181 .unwrap();
182 assert_eq!(ps.pattern_count(), 1);
183 let _res = ps.scan_str("test");
184 }
186
187 #[test]
188 fn edge_case_large_pattern_count() {
189 let mut builder = PatternSet::builder();
190 for i in 0..100 {
191 builder = builder.add_literal(&format!("literal{}", i), i);
192 }
193 let ps = builder.build().unwrap();
194 assert_eq!(ps.pattern_count(), 100);
195 }
196
197 #[test]
198 fn edge_case_complex_regex() {
199 let ps = PatternSet::builder()
200 .add_regex(r"^(abc|def)*[0-9]+$", 99)
201 .build()
202 .unwrap();
203 assert_eq!(ps.pattern_count(), 1);
204 }
205
206 #[test]
207 fn edge_case_regex_ci_mixed() {
208 let ps = PatternSet::builder()
209 .add_regex_ci(r"[a-z]", 1)
210 .add_literal_ci("TeSt", 2)
211 .build()
212 .unwrap();
213 assert_eq!(ps.pattern_count(), 2);
214 }
215
216 #[test]
217 fn edge_case_multiple_same_id() {
218 let ps = PatternSet::builder()
219 .add_literal("foo", 10)
220 .add_literal("bar", 10)
221 .build()
222 .unwrap();
223 assert_eq!(ps.pattern_count(), 2);
224 let matches = ps.scan_str("foobar");
225 assert_eq!(matches.len(), 2);
226 assert_eq!(matches[0].pattern_id, 10);
227 assert_eq!(matches[1].pattern_id, 10);
228 }
229
230 #[test]
231 fn edge_case_scan_str_vs_bytes() {
232 let ps = PatternSet::builder()
233 .add_literal("rust", 1)
234 .build()
235 .unwrap();
236 let s = "learning rust is fun";
237 assert_eq!(ps.scan_str(s).len(), 1);
238 assert_eq!(ps.scan(s.as_bytes()).len(), 1);
239 assert!(ps.is_match(s.as_bytes()));
240 }
241
242 #[test]
243 fn edge_case_add_raw_pattern_def() {
244 let def = PatternDef {
245 id: 42,
246 kind: PatternKind::Literal("raw".to_string()),
247 case_insensitive: false,
248 };
249 let ps = PatternSet::builder().add(def).build().unwrap();
250 assert_eq!(ps.pattern_count(), 1);
251 }
252
253 #[test]
254 fn edge_case_special_chars_literal() {
255 let ps = PatternSet::builder()
256 .add_literal("!@#$%^&*()_+", 5)
257 .build()
258 .unwrap();
259 assert_eq!(ps.pattern_count(), 1);
260 let m = ps.scan_str("here is !@#$%^&*()_+ special");
261 assert_eq!(m.len(), 1);
262 }
263
264 #[test]
265 fn edge_case_special_chars_regex() {
266 let ps = PatternSet::builder()
267 .add_regex(r"\!\@#\$\%\^\&\*\(\)_\+", 5)
268 .build()
269 .unwrap();
270 assert_eq!(ps.pattern_count(), 1);
271 let m = ps.scan_str("here is !@#$%^&*()_+ special");
272 assert_eq!(m.len(), 1);
273 }
274
275 #[test]
276 fn edge_case_very_long_input_no_match() {
277 let ps = PatternSet::builder().add_literal("FINDME", 1).build().unwrap();
278 let input = "x".repeat(100_000);
279 assert!(!ps.is_match(input.as_bytes()));
280 }
281
282 #[test]
283 fn edge_case_default_builder() {
284 let builder = PatternSetBuilder::default();
285 assert!(builder.build().is_err());
286 }
287}