1use super::DetectorSpec;
4use regex_syntax::ast::{self, Ast};
5use serde::Serialize;
6
7const MAX_REGEX_PATTERN_LEN: usize = 4096;
8const MAX_REGEX_AST_NODES: usize = 512;
9const MAX_REGEX_ALTERNATION_BRANCHES: usize = 64;
10const MAX_REGEX_REPEAT_BOUND: u32 = 10_000;
11
12#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
23pub enum QualityIssue {
24 Error(String),
25 Warning(String),
26}
27
28pub fn validate_detector(spec: &DetectorSpec) -> Vec<QualityIssue> {
53 let mut issues = Vec::new();
54 validate_patterns_present(spec, &mut issues);
55 validate_regexes(spec, &mut issues);
56 validate_keywords(spec, &mut issues);
57 validate_pattern_specificity(spec, &mut issues);
58 validate_companion(spec, &mut issues);
59 validate_verify_spec(spec, &mut issues);
60 issues
61}
62
63fn validate_patterns_present(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
64 if spec.patterns.is_empty() {
65 issues.push(QualityIssue::Error("no patterns defined".into()));
66 }
67}
68
69fn validate_regexes(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
70 for (i, pat) in spec.patterns.iter().enumerate() {
71 if pat.regex.len() > MAX_REGEX_PATTERN_LEN {
72 issues.push(QualityIssue::Error(format!(
73 "pattern {} regex is too large ({} bytes > {} byte limit)",
74 i,
75 pat.regex.len(),
76 MAX_REGEX_PATTERN_LEN
77 )));
78 continue;
79 }
80
81 match ast::parse::Parser::new().parse(&pat.regex) {
82 Ok(ast) => validate_regex_complexity(i, &ast, issues),
83 Err(_) => issues.push(QualityIssue::Error(format!(
84 "pattern {} regex does not compile: {}",
85 i, pat.regex
86 ))),
87 }
88 }
89}
90
91fn validate_keywords(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
92 if spec.keywords.is_empty() {
93 issues.push(QualityIssue::Warning(
94 "no keywords defined — pattern may produce false positives".into(),
95 ));
96 }
97}
98
99fn validate_pattern_specificity(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
100 for (i, pat) in spec.patterns.iter().enumerate() {
101 let has_prefix = has_literal_prefix(&pat.regex, 3);
102 let has_group = pat.group.is_some();
103 let is_pure_charclass = is_pure_character_class(&pat.regex);
104
105 if is_pure_charclass && !has_group {
106 issues.push(QualityIssue::Error(format!(
107 "pattern {} is a pure character class ({}) — too broad without context anchoring. \
108 Use a capture group or add a literal prefix.",
109 i, pat.regex
110 )));
111 } else if !has_prefix && !has_group && spec.keywords.is_empty() {
112 issues.push(QualityIssue::Warning(format!(
113 "pattern {} has no literal prefix and no capture group — may false-positive",
114 i
115 )));
116 }
117 }
118}
119
120fn validate_companion(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
121 if let Some(companion) = &spec.companion
122 && companion.name.trim().is_empty()
123 {
124 issues.push(QualityIssue::Error(
125 "companion name must not be empty".into(),
126 ));
127 }
128}
129
130fn validate_verify_spec(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
131 if let Some(ref verify) = spec.verify {
132 if verify.url.is_empty() {
133 issues.push(QualityIssue::Error("verify URL is empty".into()));
134 }
135 if verify.url.starts_with("http://") && !verify.url.contains("localhost") {
136 issues.push(QualityIssue::Warning(
137 "verify URL uses HTTP instead of HTTPS".into(),
138 ));
139 }
140 }
141}
142
143fn has_literal_prefix(pattern: &str, min_len: usize) -> bool {
144 let mut count = 0;
145 for ch in pattern.chars() {
146 match ch {
147 '[' | '(' | '.' | '*' | '+' | '?' | '{' | '\\' | '|' | '^' | '$' => break,
148 _ => count += 1,
149 }
150 }
151 count >= min_len
152}
153
154fn is_pure_character_class(pattern: &str) -> bool {
155 let trimmed = pattern.trim();
156 if !trimmed.starts_with('[') {
157 return false;
158 }
159
160 let Some(close) = trimmed.find(']') else {
161 return false;
162 };
163 let remainder = trimmed[close + 1..].trim();
164 if remainder.is_empty() {
165 return true;
166 }
167 if remainder == "+" || remainder == "*" || remainder == "?" {
168 return true;
169 }
170 if remainder.starts_with('{')
171 && let Some(qclose) = remainder.find('}')
172 {
173 let after_quantifier = remainder[qclose + 1..].trim();
174 return after_quantifier.is_empty();
175 }
176
177 false
178}
179
180fn validate_regex_complexity(index: usize, ast: &Ast, issues: &mut Vec<QualityIssue>) {
181 let mut stats = RegexComplexityStats::default();
182 collect_regex_complexity(ast, &mut stats);
183 collect_redos_risks(ast, &mut stats, false);
184
185 if stats.nodes > MAX_REGEX_AST_NODES {
186 issues.push(QualityIssue::Error(format!(
187 "pattern {} regex is too complex ({} AST nodes > {} limit)",
188 index, stats.nodes, MAX_REGEX_AST_NODES
189 )));
190 }
191
192 if stats.max_alternation_branches > MAX_REGEX_ALTERNATION_BRANCHES {
193 issues.push(QualityIssue::Error(format!(
194 "pattern {} regex has too many alternation branches ({} > {} limit)",
195 index, stats.max_alternation_branches, MAX_REGEX_ALTERNATION_BRANCHES
196 )));
197 }
198
199 if stats.max_repeat_bound > MAX_REGEX_REPEAT_BOUND {
200 issues.push(QualityIssue::Error(format!(
201 "pattern {} regex has an excessive counted repetition bound ({} > {} limit)",
202 index, stats.max_repeat_bound, MAX_REGEX_REPEAT_BOUND
203 )));
204 }
205
206 if stats.has_nested_quantifier {
207 issues.push(QualityIssue::Error(format!(
208 "pattern {} regex contains nested quantifiers that can trigger pathological matching",
209 index
210 )));
211 }
212
213 if stats.has_quantified_overlapping_alternation {
214 issues.push(QualityIssue::Error(format!(
215 "pattern {} regex repeats overlapping alternations; use unambiguous branches instead",
216 index
217 )));
218 }
219}
220
221#[derive(Default)]
222struct RegexComplexityStats {
223 nodes: usize,
224 max_alternation_branches: usize,
225 max_repeat_bound: u32,
226 has_nested_quantifier: bool,
227 has_quantified_overlapping_alternation: bool,
228}
229
230fn collect_regex_complexity(ast: &Ast, stats: &mut RegexComplexityStats) {
231 stats.nodes += 1;
232 match ast {
233 Ast::Repetition(repetition) => {
234 update_repeat_bound(&repetition.op.kind, stats);
235 collect_regex_complexity(&repetition.ast, stats);
236 }
237 Ast::Group(group) => collect_regex_complexity(&group.ast, stats),
238 Ast::Alternation(alternation) => {
239 stats.max_alternation_branches =
240 stats.max_alternation_branches.max(alternation.asts.len());
241 for ast in &alternation.asts {
242 collect_regex_complexity(ast, stats);
243 }
244 }
245 Ast::Concat(concat) => {
246 for ast in &concat.asts {
247 collect_regex_complexity(ast, stats);
248 }
249 }
250 Ast::Empty(_)
251 | Ast::Flags(_)
252 | Ast::Literal(_)
253 | Ast::Dot(_)
254 | Ast::Assertion(_)
255 | Ast::ClassUnicode(_)
256 | Ast::ClassPerl(_)
257 | Ast::ClassBracketed(_) => {}
258 }
259}
260
261fn collect_redos_risks(ast: &Ast, stats: &mut RegexComplexityStats, inside_repetition: bool) {
262 match ast {
263 Ast::Repetition(repetition) => {
264 let this_is_simple_atom = matches!(
278 &*repetition.ast,
279 Ast::Literal(_)
280 | Ast::Dot(_)
281 | Ast::ClassBracketed(_)
282 | Ast::ClassPerl(_)
283 | Ast::ClassUnicode(_)
284 );
285 let this_is_unbounded = matches!(
286 repetition.op.kind,
287 ast::RepetitionKind::ZeroOrMore
288 | ast::RepetitionKind::OneOrMore
289 | ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast { .. })
290 );
291 if inside_repetition && !this_is_simple_atom && this_is_unbounded {
295 stats.has_nested_quantifier = true;
296 }
297 if !inside_repetition
298 && this_is_unbounded
299 && !this_is_simple_atom
300 && ast_contains_repetition(&repetition.ast)
301 {
302 stats.has_nested_quantifier = true;
303 }
304 if alternation_has_overlapping_prefixes(&repetition.ast) {
305 stats.has_quantified_overlapping_alternation = true;
306 }
307 collect_redos_risks(
309 &repetition.ast,
310 stats,
311 inside_repetition || this_is_unbounded,
312 );
313 }
314 Ast::Group(group) => collect_redos_risks(&group.ast, stats, inside_repetition),
315 Ast::Alternation(alternation) => {
316 for ast in &alternation.asts {
317 collect_redos_risks(ast, stats, inside_repetition);
318 }
319 }
320 Ast::Concat(concat) => {
321 for ast in &concat.asts {
322 collect_redos_risks(ast, stats, inside_repetition);
323 }
324 }
325 Ast::Empty(_)
326 | Ast::Flags(_)
327 | Ast::Literal(_)
328 | Ast::Dot(_)
329 | Ast::Assertion(_)
330 | Ast::ClassUnicode(_)
331 | Ast::ClassPerl(_)
332 | Ast::ClassBracketed(_) => {}
333 }
334}
335
336fn ast_contains_repetition(ast: &Ast) -> bool {
337 match ast {
338 Ast::Repetition(_) => true,
339 Ast::Group(group) => ast_contains_repetition(&group.ast),
340 Ast::Alternation(alternation) => alternation.asts.iter().any(ast_contains_repetition),
341 Ast::Concat(concat) => concat.asts.iter().any(ast_contains_repetition),
342 Ast::Empty(_)
343 | Ast::Flags(_)
344 | Ast::Literal(_)
345 | Ast::Dot(_)
346 | Ast::Assertion(_)
347 | Ast::ClassUnicode(_)
348 | Ast::ClassPerl(_)
349 | Ast::ClassBracketed(_) => false,
350 }
351}
352
353fn alternation_has_overlapping_prefixes(ast: &Ast) -> bool {
354 let alternatives = match ast {
355 Ast::Alternation(alternation) => &alternation.asts,
356 Ast::Group(group) => return alternation_has_overlapping_prefixes(&group.ast),
357 _ => return false,
358 };
359
360 let prefixes = alternatives
361 .iter()
362 .filter_map(literalish_prefix)
363 .collect::<Vec<_>>();
364 for (idx, prefix) in prefixes.iter().enumerate() {
365 for other in prefixes.iter().skip(idx + 1) {
366 if prefix.starts_with(other) || other.starts_with(prefix) {
367 return true;
368 }
369 }
370 }
371 false
372}
373
374fn literalish_prefix(ast: &Ast) -> Option<String> {
375 match ast {
376 Ast::Literal(literal) => Some(literal.c.to_string()),
377 Ast::Concat(concat) => {
378 let mut prefix = String::new();
379 for node in &concat.asts {
380 match node {
381 Ast::Literal(literal) => prefix.push(literal.c),
382 Ast::Group(group) => prefix.push_str(&literalish_prefix(&group.ast)?),
383 _ => break,
384 }
385 }
386 (!prefix.is_empty()).then_some(prefix)
387 }
388 Ast::Group(group) => literalish_prefix(&group.ast),
389 _ => None,
390 }
391}
392
393fn update_repeat_bound(kind: &ast::RepetitionKind, stats: &mut RegexComplexityStats) {
394 let bound = match kind {
395 ast::RepetitionKind::ZeroOrOne => 1,
396 ast::RepetitionKind::ZeroOrMore | ast::RepetitionKind::OneOrMore => MAX_REGEX_REPEAT_BOUND,
397 ast::RepetitionKind::Range(range) => match range {
398 ast::RepetitionRange::Exactly(max)
399 | ast::RepetitionRange::AtLeast(max)
400 | ast::RepetitionRange::Bounded(_, max) => *max,
401 },
402 };
403 stats.max_repeat_bound = stats.max_repeat_bound.max(bound);
404}
405
406#[cfg(test)]
407mod tests {
408 use super::*;
409 use crate::Severity;
410
411 fn detector_with_pattern(regex: &str) -> DetectorSpec {
412 DetectorSpec {
413 id: "test-detector".into(),
414 name: "Test Detector".into(),
415 service: "test".into(),
416 severity: Severity::High,
417 keywords: vec!["token".into()],
418 patterns: vec![crate::PatternSpec {
419 regex: regex.into(),
420 description: None,
421 group: None,
422 }],
423 verify: None,
424 companion: None,
425 }
426 }
427
428 #[test]
429 fn rejects_excessive_alternation_fanout() {
430 let regex = (0..65)
431 .map(|i| format!("opt{i}"))
432 .collect::<Vec<_>>()
433 .join("|");
434 let issues = validate_detector(&detector_with_pattern(®ex));
435
436 assert!(issues.iter().any(|issue| matches!(
437 issue,
438 QualityIssue::Error(message) if message.contains("alternation branches")
439 )));
440 }
441
442 #[test]
443 fn rejects_excessive_counted_repetition() {
444 let issues = validate_detector(&detector_with_pattern("token[a-z]{10001}"));
445
446 assert!(issues.iter().any(|issue| matches!(
447 issue,
448 QualityIssue::Error(message) if message.contains("counted repetition bound")
449 )));
450 }
451
452 #[test]
453 fn rejects_nested_quantifiers() {
454 let issues = validate_detector(&detector_with_pattern("(a+)+b"));
455
456 assert!(issues.iter().any(|issue| matches!(
457 issue,
458 QualityIssue::Error(message) if message.contains("nested quantifiers")
459 )));
460 }
461
462 #[test]
463 fn rejects_quantified_overlapping_alternation() {
464 let issues = validate_detector(&detector_with_pattern("(ab|a)+z"));
465
466 assert!(issues.iter().any(|issue| matches!(
467 issue,
468 QualityIssue::Error(message) if message.contains("overlapping alternations")
469 )));
470 }
471}