1use super::{DetectorSpec, VerifySpec};
4use regex_syntax::ast::{self, Ast};
5use serde::Serialize;
6
7const MAX_REGEX_PATTERN_LEN: usize = 4096;
8const MAX_REGEX_AST_NODES: usize = 512;
9const MAX_REGEX_ALTERNATION_BRANCHES: usize = 64;
10const MAX_REGEX_REPEAT_BOUND: u32 = 1_000;
11
12#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
23pub enum QualityIssue {
24 Error(String),
25 Warning(String),
26}
27
28pub fn validate_detector(spec: &DetectorSpec) -> Vec<QualityIssue> {
53 let mut issues = Vec::new();
54 validate_patterns_present(spec, &mut issues);
55 validate_regexes(spec, &mut issues);
56 validate_keywords(spec, &mut issues);
57 validate_pattern_specificity(spec, &mut issues);
58 validate_companions(spec, &mut issues);
59 validate_verify_spec(spec, &mut issues);
60 issues
61}
62
63fn validate_patterns_present(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
64 if spec.patterns.is_empty() {
65 issues.push(QualityIssue::Error("no patterns defined".into()));
66 }
67}
68
69fn validate_regexes(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
70 for (i, pat) in spec.patterns.iter().enumerate() {
71 validate_regex_definition("pattern", i, &pat.regex, issues);
72 }
73}
74
75fn validate_keywords(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
76 if spec.keywords.is_empty() {
77 issues.push(QualityIssue::Warning(
78 "no keywords defined — pattern may produce false positives".into(),
79 ));
80 }
81}
82
83fn validate_pattern_specificity(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
84 for (i, pat) in spec.patterns.iter().enumerate() {
85 let has_prefix = has_literal_prefix(&pat.regex, 3);
86 let has_group = pat.group.is_some();
87 let is_pure_charclass = is_pure_character_class(&pat.regex);
88
89 if is_pure_charclass && !has_group {
90 issues.push(QualityIssue::Error(format!(
91 "pattern {} is a pure character class ({}) — too broad without context anchoring. \
92 Use a capture group or add a literal prefix.",
93 i, pat.regex
94 )));
95 } else if !has_prefix && !has_group && spec.keywords.is_empty() {
96 issues.push(QualityIssue::Warning(format!(
97 "pattern {} has no literal prefix and no capture group — may false-positive",
98 i
99 )));
100 }
101 }
102}
103
104fn validate_companions(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
105 for (i, companion) in spec.companions.iter().enumerate() {
106 if companion.name.trim().is_empty() {
107 issues.push(QualityIssue::Error(format!(
108 "companion {} name must not be empty",
109 i
110 )));
111 }
112 validate_regex_definition("companion", i, &companion.regex, issues);
113 if is_pure_character_class(&companion.regex) {
119 if companion.within_lines <= TIGHT_COMPANION_RADIUS {
120 issues.push(QualityIssue::Warning(format!(
121 "companion {} regex '{}' is a pure character class; \
122 allowed because within_lines={} ≤ {} (positional anchoring).",
123 i, companion.regex, companion.within_lines, TIGHT_COMPANION_RADIUS
124 )));
125 } else {
126 issues.push(QualityIssue::Error(format!(
127 "companion {} regex '{}' is a pure character class with within_lines={} \
128 (> {}) — the wide search radius needs a literal context anchor",
129 i, companion.regex, companion.within_lines, TIGHT_COMPANION_RADIUS
130 )));
131 }
132 } else if !has_substantial_literal(&companion.regex, 3) {
133 issues.push(QualityIssue::Warning(format!(
134 "companion {} regex '{}' is too broad — may produce false positives. \
135 Add a context anchor like 'KEY_NAME='.",
136 i, companion.regex
137 )));
138 }
139 }
140}
141
142const TIGHT_COMPANION_RADIUS: usize = 5;
145
146fn validate_regex_definition(
147 kind: &str,
148 index: usize,
149 regex: &str,
150 issues: &mut Vec<QualityIssue>,
151) {
152 if regex.len() > MAX_REGEX_PATTERN_LEN {
153 issues.push(QualityIssue::Error(format!(
154 "{kind} {index} regex is too large ({} bytes > {} byte limit)",
155 regex.len(),
156 MAX_REGEX_PATTERN_LEN
157 )));
158 return;
159 }
160
161 match ast::parse::Parser::new().parse(regex) {
162 Ok(ast) => validate_regex_complexity(kind, index, &ast, issues),
163 Err(error) => issues.push(QualityIssue::Error(format!(
164 "{kind} {index} regex does not compile: {error}"
165 ))),
166 }
167}
168
169fn has_substantial_literal(pattern: &str, min_len: usize) -> bool {
170 let mut max_literal_len = 0;
171 let mut current_literal_len = 0;
172 let mut in_escape = false;
173 let mut in_char_class = false;
174
175 for ch in pattern.chars() {
176 if in_escape {
177 if is_escaped_literal(ch) {
178 current_literal_len += 1;
179 } else {
180 max_literal_len = max_literal_len.max(current_literal_len);
181 current_literal_len = 0;
182 }
183 in_escape = false;
184 continue;
185 }
186
187 match ch {
188 '\\' => in_escape = true,
189 '[' => {
190 max_literal_len = max_literal_len.max(current_literal_len);
191 current_literal_len = 0;
192 in_char_class = true;
193 }
194 ']' => {
195 in_char_class = false;
196 }
197 '(' | ')' | '.' | '*' | '+' | '?' | '{' | '}' | '|' | '^' | '$' => {
198 max_literal_len = max_literal_len.max(current_literal_len);
199 current_literal_len = 0;
200 }
201 _ => {
202 if !in_char_class {
203 current_literal_len += 1;
204 }
205 }
206 }
207 }
208 max_literal_len = max_literal_len.max(current_literal_len);
209 max_literal_len >= min_len
210}
211
212fn is_escaped_literal(ch: char) -> bool {
213 matches!(
214 ch,
215 '[' | ']' | '(' | ')' | '.' | '*' | '+' | '?' | '{' | '}' | '\\' | '|' | '^' | '$'
216 )
217}
218
219fn validate_verify_spec(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
220 if let Some(ref verify) = spec.verify {
221 if !verify.steps.is_empty() {
223 for step in &verify.steps {
224 validate_url(&step.url, issues);
225 check_url_exfil_risk(&step.url, &verify.allowed_domains, issues);
226 }
227 } else if let Some(ref url) = verify.url {
228 validate_url(url, issues);
229 check_url_exfil_risk(url, &verify.allowed_domains, issues);
230 } else {
231 issues.push(QualityIssue::Error(
232 "verify spec has no steps and no default URL".into(),
233 ));
234 }
235 check_oob_consistency(verify, issues);
236 }
237 check_reserved_companion_names(spec, issues);
238}
239
240const RESERVED_COMPANION_NAMES: &[&str] =
247 &["__keyhog_oob_url", "__keyhog_oob_host", "__keyhog_oob_id"];
248
249fn check_reserved_companion_names(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
250 for (i, c) in spec.companions.iter().enumerate() {
251 if RESERVED_COMPANION_NAMES.contains(&c.name.as_str()) {
252 issues.push(QualityIssue::Error(format!(
253 "companion {} name '{}' is reserved for the OOB interpolator. \
254 Pick a different name; this collision would corrupt verify templates.",
255 i, c.name,
256 )));
257 }
258 }
259}
260
261fn check_oob_consistency(verify: &VerifySpec, issues: &mut Vec<QualityIssue>) {
274 let mut interactsh_referenced = false;
275 let mut scan = |s: &str| {
276 if s.contains("{{interactsh") {
277 interactsh_referenced = true;
278 }
279 };
280 if let Some(ref url) = verify.url {
281 scan(url);
282 }
283 if let Some(ref body) = verify.body {
284 scan(body);
285 }
286 for h in &verify.headers {
287 scan(&h.value);
288 }
289 for step in &verify.steps {
290 scan(&step.url);
291 if let Some(ref body) = step.body {
292 scan(body);
293 }
294 for h in &step.headers {
295 scan(&h.value);
296 }
297 }
298 let oob_configured = verify.oob.is_some();
299 match (oob_configured, interactsh_referenced) {
300 (true, false) => issues.push(QualityIssue::Error(
301 "verify.oob is set but no `{{interactsh}}` / `{{interactsh.host}}` / \
302 `{{interactsh.url}}` / `{{interactsh.id}}` token appears in any verify \
303 template — the OOB callback URL has nowhere to land, so the wait_for \
304 would always time out. Either embed an interactsh token in the body, \
305 URL, or a header — or remove the [detector.verify.oob] block."
306 .into(),
307 )),
308 (false, true) => issues.push(QualityIssue::Error(
309 "an `{{interactsh*}}` token is referenced in a verify template but no \
310 [detector.verify.oob] block is set — the token will resolve to an empty \
311 string at runtime and ship a malformed request to the service. Either \
312 add a [detector.verify.oob] block or remove the token."
313 .into(),
314 )),
315 _ => {}
316 }
317}
318
319fn check_url_exfil_risk(url: &str, allowed_domains: &[String], issues: &mut Vec<QualityIssue>) {
326 let trimmed = url.trim();
331 let after_scheme = trimmed
332 .strip_prefix("https://")
333 .or_else(|| trimmed.strip_prefix("http://"))
334 .unwrap_or(trimmed);
335 let host_starts_with_template =
336 after_scheme.starts_with("{{") || after_scheme.starts_with("{") || trimmed == "{{match}}";
337 if host_starts_with_template && allowed_domains.is_empty() {
338 issues.push(QualityIssue::Error(
339 "verify URL host is templated and no `allowed_domains` is set — \
340 attacker-controlled interpolation could exfil credentials. \
341 Either hardcode the authoritative host in the URL or set \
342 `allowed_domains` explicitly. See kimi-wave3 §1."
343 .into(),
344 ));
345 }
346 if url.contains('{') && !url.contains("{{") {
349 issues.push(QualityIssue::Error(
350 "verify URL uses single-brace `{var}` template syntax which the \
351 interpolator does NOT honor (only `{{var}}` works); the URL will \
352 be sent to a literal-string host. Use `{{companion.var}}`."
353 .into(),
354 ));
355 }
356}
357
358fn validate_url(url: &str, issues: &mut Vec<QualityIssue>) {
359 if url.is_empty() {
360 issues.push(QualityIssue::Error("verify URL is empty".into()));
361 }
362 if url.starts_with("http://") && !url.contains("localhost") {
363 issues.push(QualityIssue::Warning(
364 "verify URL uses HTTP instead of HTTPS".into(),
365 ));
366 }
367}
368
369fn has_literal_prefix(pattern: &str, min_len: usize) -> bool {
370 let mut count = 0;
371 for ch in pattern.chars() {
372 match ch {
373 '[' | '(' | '.' | '*' | '+' | '?' | '{' | '\\' | '|' | '^' | '$' => break,
374 _ => count += 1,
375 }
376 }
377 count >= min_len
378}
379
380fn is_pure_character_class(pattern: &str) -> bool {
381 let trimmed = pattern.trim();
382 if !trimmed.starts_with('[') {
383 return false;
384 }
385
386 let Some(close) = trimmed.find(']') else {
387 return false;
388 };
389 let remainder = trimmed[close + 1..].trim();
390 if remainder.is_empty() {
391 return true;
392 }
393 if remainder == "+" || remainder == "*" || remainder == "?" {
394 return true;
395 }
396 if remainder.starts_with('{') {
397 if let Some(qclose) = remainder.find('}') {
398 let after_quantifier = remainder[qclose + 1..].trim();
399 return after_quantifier.is_empty();
400 }
401 }
402
403 false
404}
405
406fn validate_regex_complexity(kind: &str, index: usize, ast: &Ast, issues: &mut Vec<QualityIssue>) {
407 let mut stats = RegexComplexityStats::default();
408 collect_regex_complexity(ast, &mut stats);
409 collect_redos_risks(ast, &mut stats, false);
410
411 if stats.nodes > MAX_REGEX_AST_NODES {
412 issues.push(QualityIssue::Error(format!(
413 "{kind} {index} regex is too complex ({} AST nodes > {} limit)",
414 stats.nodes, MAX_REGEX_AST_NODES
415 )));
416 }
417
418 if stats.max_alternation_branches > MAX_REGEX_ALTERNATION_BRANCHES {
419 issues.push(QualityIssue::Error(format!(
420 "{kind} {index} regex has too many alternation branches ({} > {} limit)",
421 stats.max_alternation_branches, MAX_REGEX_ALTERNATION_BRANCHES
422 )));
423 }
424
425 if stats.max_repeat_bound > MAX_REGEX_REPEAT_BOUND {
426 issues.push(QualityIssue::Error(format!(
427 "{kind} {index} regex has an excessive counted repetition bound ({} > {} limit)",
428 stats.max_repeat_bound, MAX_REGEX_REPEAT_BOUND
429 )));
430 }
431
432 if stats.has_nested_quantifier {
433 issues.push(QualityIssue::Error(format!(
434 "{kind} {index} regex contains nested quantifiers that can trigger pathological matching"
435 )));
436 }
437
438 if stats.has_quantified_overlapping_alternation {
439 issues.push(QualityIssue::Error(format!(
440 "{kind} {index} regex repeats overlapping alternations; use unambiguous branches instead"
441 )));
442 }
443}
444
445#[derive(Default)]
446struct RegexComplexityStats {
447 nodes: usize,
448 max_alternation_branches: usize,
449 max_repeat_bound: u32,
450 has_nested_quantifier: bool,
451 has_quantified_overlapping_alternation: bool,
452}
453
454fn collect_regex_complexity(ast: &Ast, stats: &mut RegexComplexityStats) {
455 stats.nodes += 1;
456 match ast {
457 Ast::Repetition(repetition) => {
458 update_repeat_bound(&repetition.op.kind, stats);
459 collect_regex_complexity(&repetition.ast, stats);
460 }
461 Ast::Group(group) => collect_regex_complexity(&group.ast, stats),
462 Ast::Alternation(alternation) => {
463 stats.max_alternation_branches =
464 stats.max_alternation_branches.max(alternation.asts.len());
465 for ast in &alternation.asts {
466 collect_regex_complexity(ast, stats);
467 }
468 }
469 Ast::Concat(concat) => {
470 for ast in &concat.asts {
471 collect_regex_complexity(ast, stats);
472 }
473 }
474 Ast::Empty(_)
475 | Ast::Flags(_)
476 | Ast::Literal(_)
477 | Ast::Dot(_)
478 | Ast::Assertion(_)
479 | Ast::ClassUnicode(_)
480 | Ast::ClassPerl(_)
481 | Ast::ClassBracketed(_) => {}
482 }
483}
484
485fn collect_redos_risks(ast: &Ast, stats: &mut RegexComplexityStats, inside_repetition: bool) {
486 match ast {
487 Ast::Repetition(repetition) => {
488 let this_is_simple_atom = matches!(
502 &*repetition.ast,
503 Ast::Literal(_)
504 | Ast::Dot(_)
505 | Ast::ClassBracketed(_)
506 | Ast::ClassPerl(_)
507 | Ast::ClassUnicode(_)
508 );
509 let this_is_unbounded = matches!(
510 repetition.op.kind,
511 ast::RepetitionKind::ZeroOrMore
512 | ast::RepetitionKind::OneOrMore
513 | ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast { .. })
514 );
515 if inside_repetition && !this_is_simple_atom && this_is_unbounded {
519 stats.has_nested_quantifier = true;
520 }
521 if !inside_repetition
522 && this_is_unbounded
523 && !this_is_simple_atom
524 && ast_contains_repetition(&repetition.ast)
525 {
526 stats.has_nested_quantifier = true;
527 }
528 if alternation_has_overlapping_prefixes(&repetition.ast) {
529 stats.has_quantified_overlapping_alternation = true;
530 }
531 collect_redos_risks(
533 &repetition.ast,
534 stats,
535 inside_repetition || this_is_unbounded,
536 );
537 }
538 Ast::Group(group) => collect_redos_risks(&group.ast, stats, inside_repetition),
539 Ast::Alternation(alternation) => {
540 for ast in &alternation.asts {
541 collect_redos_risks(ast, stats, inside_repetition);
542 }
543 }
544 Ast::Concat(concat) => {
545 for ast in &concat.asts {
546 collect_redos_risks(ast, stats, inside_repetition);
547 }
548 }
549 Ast::Empty(_)
550 | Ast::Flags(_)
551 | Ast::Literal(_)
552 | Ast::Dot(_)
553 | Ast::Assertion(_)
554 | Ast::ClassUnicode(_)
555 | Ast::ClassPerl(_)
556 | Ast::ClassBracketed(_) => {}
557 }
558}
559
560fn ast_contains_repetition(ast: &Ast) -> bool {
561 match ast {
562 Ast::Repetition(_) => true,
563 Ast::Group(group) => ast_contains_repetition(&group.ast),
564 Ast::Alternation(alternation) => alternation.asts.iter().any(ast_contains_repetition),
565 Ast::Concat(concat) => concat.asts.iter().any(ast_contains_repetition),
566 Ast::Empty(_)
567 | Ast::Flags(_)
568 | Ast::Literal(_)
569 | Ast::Dot(_)
570 | Ast::Assertion(_)
571 | Ast::ClassUnicode(_)
572 | Ast::ClassPerl(_)
573 | Ast::ClassBracketed(_) => false,
574 }
575}
576
577fn alternation_has_overlapping_prefixes(ast: &Ast) -> bool {
578 let alternatives = match ast {
579 Ast::Alternation(alternation) => &alternation.asts,
580 Ast::Group(group) => return alternation_has_overlapping_prefixes(&group.ast),
581 _ => return false,
582 };
583
584 let prefixes = alternatives
585 .iter()
586 .filter_map(literalish_prefix)
587 .collect::<Vec<_>>();
588 for (idx, prefix) in prefixes.iter().enumerate() {
589 for other in prefixes.iter().skip(idx + 1) {
590 if prefix.starts_with(other) || other.starts_with(prefix) {
591 return true;
592 }
593 }
594 }
595 false
596}
597
598fn literalish_prefix(ast: &Ast) -> Option<String> {
599 match ast {
600 Ast::Literal(literal) => Some(literal.c.to_string()),
601 Ast::Concat(concat) => {
602 let mut prefix = String::new();
603 for node in &concat.asts {
604 match node {
605 Ast::Literal(literal) => prefix.push(literal.c),
606 Ast::Group(group) => prefix.push_str(&literalish_prefix(&group.ast)?),
607 _ => break,
608 }
609 }
610 (!prefix.is_empty()).then_some(prefix)
611 }
612 Ast::Group(group) => literalish_prefix(&group.ast),
613 _ => None,
614 }
615}
616
617fn update_repeat_bound(kind: &ast::RepetitionKind, stats: &mut RegexComplexityStats) {
618 let bound = match kind {
619 ast::RepetitionKind::ZeroOrOne => 1,
620 ast::RepetitionKind::ZeroOrMore | ast::RepetitionKind::OneOrMore => MAX_REGEX_REPEAT_BOUND,
621 ast::RepetitionKind::Range(range) => match range {
622 ast::RepetitionRange::Exactly(max)
623 | ast::RepetitionRange::AtLeast(max)
624 | ast::RepetitionRange::Bounded(_, max) => *max,
625 },
626 };
627 stats.max_repeat_bound = stats.max_repeat_bound.max(bound);
628}
629
630#[cfg(test)]
631mod oob_validation_tests {
632 use super::*;
633 use crate::spec::load_detectors_from_str;
634
635 fn errors_for(toml_src: &str) -> Vec<String> {
636 let detectors = load_detectors_from_str(toml_src).expect("toml parses");
637 let mut errs = Vec::new();
638 for d in &detectors {
639 for issue in validate_detector(d) {
640 if let QualityIssue::Error(msg) = issue {
641 errs.push(msg);
642 }
643 }
644 }
645 errs
646 }
647
648 #[test]
649 fn oob_block_without_interactsh_token_is_error() {
650 let toml_src = r#"
651[detector]
652id = "oob-no-token"
653name = "OOB without token"
654service = "github"
655severity = "high"
656keywords = ["GHTOKEN"]
657
658[[detector.patterns]]
659regex = "GHTOKEN_[A-Z0-9]{16}"
660
661[detector.verify]
662method = "POST"
663url = "https://api.github.com/probe"
664body = '{"static":"payload"}'
665
666[detector.verify.oob]
667protocol = "http"
668"#;
669 let errs = errors_for(toml_src);
670 assert!(
671 errs.iter().any(|e| e.contains("verify.oob is set but no")),
672 "expected oob-without-token error; got {errs:?}"
673 );
674 }
675
676 #[test]
677 fn interactsh_token_without_oob_block_is_error() {
678 let toml_src = r#"
679[detector]
680id = "token-no-oob"
681name = "Token without OOB"
682service = "github"
683severity = "high"
684keywords = ["GHTOKEN"]
685
686[[detector.patterns]]
687regex = "GHTOKEN_[A-Z0-9]{16}"
688
689[detector.verify]
690method = "POST"
691url = "https://api.github.com/probe"
692body = '{"target":"https://{{interactsh}}/x"}'
693"#;
694 let errs = errors_for(toml_src);
695 assert!(
696 errs.iter().any(
697 |e| e.contains("token is referenced") && e.contains("no [detector.verify.oob]")
698 ),
699 "expected token-without-oob error; got {errs:?}"
700 );
701 }
702
703 #[test]
704 fn oob_with_interactsh_token_passes() {
705 let toml_src = r#"
706[detector]
707id = "oob-good"
708name = "OOB with token"
709service = "github"
710severity = "high"
711keywords = ["GHTOKEN"]
712
713[[detector.patterns]]
714regex = "GHTOKEN_[A-Z0-9]{16}"
715
716[detector.verify]
717method = "POST"
718url = "https://api.github.com/probe"
719body = '{"target":"https://{{interactsh}}/x"}'
720
721[detector.verify.oob]
722protocol = "http"
723"#;
724 let errs = errors_for(toml_src);
725 let oob_related: Vec<_> = errs
726 .iter()
727 .filter(|e| e.contains("oob") || e.contains("interactsh"))
728 .collect();
729 assert!(
730 oob_related.is_empty(),
731 "unexpected OOB errors: {oob_related:?}"
732 );
733 }
734
735 #[test]
736 fn reserved_companion_name_is_error() {
737 let toml_src = r#"
738[detector]
739id = "reserved-name"
740name = "Reserved name collision"
741service = "github"
742severity = "high"
743keywords = ["GHTOKEN"]
744
745[[detector.patterns]]
746regex = "GHTOKEN_[A-Z0-9]{16}"
747
748[[detector.companions]]
749name = "__keyhog_oob_url"
750regex = "(?:URL=)([a-z]{4,})"
751within_lines = 5
752"#;
753 let errs = errors_for(toml_src);
754 assert!(
755 errs.iter()
756 .any(|e| e.contains("__keyhog_oob_url") && e.contains("reserved")),
757 "expected reserved-name error; got {errs:?}"
758 );
759 }
760
761 #[test]
776 fn audit_companion_substitutions_have_capture_groups() {
777 use crate::spec::load_detectors_from_str;
778 let mut suspicious = Vec::new();
779 for (filename, toml_src) in crate::embedded_detector_tomls() {
780 let Ok(detectors) = load_detectors_from_str(toml_src) else {
781 continue;
782 };
783 for d in &detectors {
784 let Some(verify) = d.verify.as_ref() else {
785 continue;
786 };
787 let mut substituted: std::collections::HashSet<String> =
790 std::collections::HashSet::new();
791 let mut scan = |s: &str| {
792 let mut rest = s;
793 while let Some(start) = rest.find("{{companion.") {
794 let after = &rest[start + "{{companion.".len()..];
795 if let Some(end) = after.find("}}") {
796 substituted.insert(after[..end].to_string());
797 rest = &after[end + 2..];
798 } else {
799 break;
800 }
801 }
802 };
803 if let Some(ref u) = verify.url {
804 scan(u);
805 }
806 if let Some(ref b) = verify.body {
807 scan(b);
808 }
809 for h in &verify.headers {
810 scan(&h.value);
811 }
812 for step in &verify.steps {
813 scan(&step.url);
814 if let Some(ref b) = step.body {
815 scan(b);
816 }
817 for h in &step.headers {
818 scan(&h.value);
819 }
820 if let crate::AuthSpec::Header { template, .. } = &step.auth {
821 scan(template);
822 }
823 }
824 if let Some(crate::AuthSpec::Header { template, .. }) = &verify.auth {
825 scan(template);
826 }
827
828 for c in &d.companions {
829 if !substituted.contains(&c.name) {
830 continue;
831 }
832 let has_group = regex_has_capture_group(&c.regex);
838 if has_group {
839 continue;
840 }
841 if regex_likely_includes_anchor_prefix(&c.regex) {
846 suspicious.push(format!(
847 "{} (companion {} regex {:?})",
848 filename, c.name, c.regex
849 ));
850 }
851 }
852 }
853 }
854 assert!(
855 suspicious.is_empty(),
856 "companions referenced in verify substitutions but lacking a capture group \
857 on a context-anchored regex (would substitute `KEY=value` instead of just \
858 `value`):\n {}",
859 suspicious.join("\n ")
860 );
861 }
862
863 fn regex_has_capture_group(pattern: &str) -> bool {
870 let bytes = pattern.as_bytes();
871 let mut i = 0;
872 let mut in_class = false;
873 let mut escape = false;
874 while i < bytes.len() {
875 let b = bytes[i];
876 if escape {
877 escape = false;
878 i += 1;
879 continue;
880 }
881 match b {
882 b'\\' => {
883 escape = true;
884 }
885 b'[' if !in_class => {
886 in_class = true;
887 }
888 b']' if in_class => {
889 in_class = false;
890 }
891 b'(' if !in_class => {
892 if i + 1 < bytes.len() && bytes[i + 1] == b'?' {
894 let after = &bytes[i + 2..];
905 if after.starts_with(b"P<") {
906 return true;
907 }
908 if after.starts_with(b"<") {
909 if after.starts_with(b"<=") || after.starts_with(b"<!") {
913 } else {
915 return true;
916 }
917 }
918 } else {
921 return true; }
923 }
924 _ => {}
925 }
926 i += 1;
927 }
928 false
929 }
930
931 fn regex_likely_includes_anchor_prefix(pattern: &str) -> bool {
943 let bytes = pattern.as_bytes();
944 let mut i = 0;
945 let mut in_class = false;
946 let mut escape = false;
947 while i < bytes.len() {
948 let b = bytes[i];
949 if escape {
950 escape = false;
951 i += 1;
952 continue;
953 }
954 match b {
955 b'\\' => {
956 escape = true;
957 }
958 b'[' if !in_class => {
959 in_class = true;
960 }
961 b']' if in_class => {
962 in_class = false;
963 }
964 b'=' if !in_class => return true,
965 _ => {}
966 }
967 i += 1;
968 }
969 false
970 }
971
972 #[test]
988 fn audit_auth_field_references_resolve() {
989 use crate::spec::load_detectors_from_str;
990 use crate::AuthSpec;
991
992 let mut errors: Vec<String> = Vec::new();
993 for (filename, toml_src) in crate::embedded_detector_tomls() {
994 let Ok(detectors) = load_detectors_from_str(toml_src) else {
995 continue;
996 };
997 for d in &detectors {
998 let companion_names: std::collections::HashSet<&str> =
999 d.companions.iter().map(|c| c.name.as_str()).collect();
1000
1001 let check = |label: &str, field: &str| -> Option<String> {
1002 if field.contains("{{") {
1003 return Some(format!(
1004 "{filename}: {label} field {field:?} contains `{{...}}` template — \
1005 field-style slots use `match`/`companion.<name>`/literal, NOT `{{...}}`. \
1006 It silently resolves to the literal string."
1007 ));
1008 }
1009 if field == "companion" {
1010 return Some(format!(
1011 "{filename}: {label} field is bare `\"companion\"` with no \
1012 `.<name>` — silently resolves to the literal string \"companion\"."
1013 ));
1014 }
1015 if let Some(name) = field.strip_prefix("companion.") {
1016 if !companion_names.contains(name) {
1017 return Some(format!(
1018 "{filename}: {label} field {field:?} references companion \
1019 {name:?} which is not declared on this detector."
1020 ));
1021 }
1022 }
1023 None
1024 };
1025
1026 if let Some(verify) = d.verify.as_ref() {
1027 let mut audit_auth = |auth: &AuthSpec, ctx: &str| {
1028 match auth {
1029 AuthSpec::Bearer { field } => {
1030 if let Some(e) = check(&format!("{ctx} bearer.field"), field) {
1031 errors.push(e);
1032 }
1033 }
1034 AuthSpec::Basic { username, password } => {
1035 if let Some(e) = check(&format!("{ctx} basic.username"), username) {
1036 errors.push(e);
1037 }
1038 if let Some(e) = check(&format!("{ctx} basic.password"), password) {
1039 errors.push(e);
1040 }
1041 }
1042 AuthSpec::Query { field, .. } => {
1043 if let Some(e) = check(&format!("{ctx} query.field"), field) {
1044 errors.push(e);
1045 }
1046 }
1047 AuthSpec::AwsV4 {
1048 access_key,
1049 secret_key,
1050 session_token,
1051 ..
1052 } => {
1053 if let Some(e) =
1054 check(&format!("{ctx} awsv4.access_key"), access_key)
1055 {
1056 errors.push(e);
1057 }
1058 if let Some(e) =
1059 check(&format!("{ctx} awsv4.secret_key"), secret_key)
1060 {
1061 errors.push(e);
1062 }
1063 if let Some(tok) = session_token {
1064 if let Some(e) =
1065 check(&format!("{ctx} awsv4.session_token"), tok)
1066 {
1067 errors.push(e);
1068 }
1069 }
1070 }
1071 AuthSpec::Header { .. } | AuthSpec::None | AuthSpec::Script { .. } => {}
1075 }
1076 };
1077 if let Some(ref auth) = verify.auth {
1078 audit_auth(auth, "verify.auth");
1079 }
1080 for (i, step) in verify.steps.iter().enumerate() {
1081 audit_auth(&step.auth, &format!("verify.steps[{i}].auth"));
1082 }
1083 }
1084 }
1085 }
1086 assert!(
1087 errors.is_empty(),
1088 "auth field reference audit found broken detectors:\n {}",
1089 errors.join("\n ")
1090 );
1091 }
1092
1093 #[test]
1094 fn interactsh_token_in_header_value_counts() {
1095 let toml_src = r#"
1098[detector]
1099id = "header-oob"
1100name = "OOB via header"
1101service = "github"
1102severity = "high"
1103keywords = ["GHTOKEN"]
1104
1105[[detector.patterns]]
1106regex = "GHTOKEN_[A-Z0-9]{16}"
1107
1108[detector.verify]
1109method = "POST"
1110url = "https://api.github.com/probe"
1111
1112[[detector.verify.headers]]
1113name = "X-Callback"
1114value = "https://{{interactsh}}/x"
1115
1116[detector.verify.oob]
1117protocol = "http"
1118"#;
1119 let errs = errors_for(toml_src);
1120 let oob_related: Vec<_> = errs
1121 .iter()
1122 .filter(|e| e.contains("oob") || e.contains("interactsh"))
1123 .collect();
1124 assert!(
1125 oob_related.is_empty(),
1126 "header-token detection failed: {oob_related:?}"
1127 );
1128 }
1129}