1use super::{DetectorSpec, VerifySpec};
4use regex_syntax::ast;
5use serde::Serialize;
6
7const MAX_REGEX_PATTERN_LEN: usize = 4096;
8#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
26pub enum QualityIssue {
27 Error(String),
28 Warning(String),
29}
30
31pub fn validate_detector(spec: &DetectorSpec) -> Vec<QualityIssue> {
55 let mut issues = Vec::new();
56 validate_patterns_present(spec, &mut issues);
57 validate_regexes(spec, &mut issues);
58 validate_keywords(spec, &mut issues);
59 validate_pattern_specificity(spec, &mut issues);
60 validate_companions(spec, &mut issues);
61 validate_verify_spec(spec, &mut issues);
62 issues
63}
64
65fn validate_patterns_present(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
66 if spec.patterns.is_empty() {
67 issues.push(QualityIssue::Error("no patterns defined".into()));
68 }
69}
70
71fn validate_regexes(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
72 for (i, pat) in spec.patterns.iter().enumerate() {
73 validate_regex_definition("pattern", i, &pat.regex, issues);
74 }
75}
76
77fn validate_keywords(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
78 if spec.keywords.is_empty() {
79 issues.push(QualityIssue::Warning(
80 "no keywords defined - pattern may produce false positives".into(),
81 ));
82 }
83}
84
85fn validate_pattern_specificity(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
86 for (i, pat) in spec.patterns.iter().enumerate() {
87 let has_prefix = has_literal_prefix(&pat.regex, 3);
88 let has_group = pat.group.is_some();
89 let is_pure_charclass = is_pure_character_class(&pat.regex);
90
91 if is_pure_charclass && !has_group {
92 issues.push(QualityIssue::Error(format!(
93 "pattern {} is a pure character class ({}) - too broad without context anchoring. \
94 Use a capture group or add a literal prefix.",
95 i, pat.regex
96 )));
97 } else if !has_prefix && !has_group && spec.keywords.is_empty() {
98 issues.push(QualityIssue::Warning(format!(
99 "pattern {} has no literal prefix and no capture group - may false-positive",
100 i
101 )));
102 }
103 }
104}
105
106fn validate_companions(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
107 for (i, companion) in spec.companions.iter().enumerate() {
108 if companion.name.trim().is_empty() {
109 issues.push(QualityIssue::Error(format!(
110 "companion {} name must not be empty",
111 i
112 )));
113 }
114 validate_regex_definition("companion", i, &companion.regex, issues);
115 if is_pure_character_class(&companion.regex) {
121 if companion.within_lines <= TIGHT_COMPANION_RADIUS {
122 issues.push(QualityIssue::Warning(format!(
123 "companion {} regex '{}' is a pure character class; \
124 allowed because within_lines={} ≤ {} (positional anchoring).",
125 i, companion.regex, companion.within_lines, TIGHT_COMPANION_RADIUS
126 )));
127 } else {
128 issues.push(QualityIssue::Error(format!(
129 "companion {} regex '{}' is a pure character class with within_lines={} \
130 (> {}) - the wide search radius needs a literal context anchor",
131 i, companion.regex, companion.within_lines, TIGHT_COMPANION_RADIUS
132 )));
133 }
134 } else if !has_substantial_literal(&companion.regex, 3) {
135 issues.push(QualityIssue::Warning(format!(
136 "companion {} regex '{}' is too broad - may produce false positives. \
137 Add a context anchor like 'KEY_NAME='.",
138 i, companion.regex
139 )));
140 }
141 }
142}
143
144const TIGHT_COMPANION_RADIUS: usize = 5;
147
148fn validate_regex_definition(
149 kind: &str,
150 index: usize,
151 regex: &str,
152 issues: &mut Vec<QualityIssue>,
153) {
154 if regex.len() > MAX_REGEX_PATTERN_LEN {
155 issues.push(QualityIssue::Error(format!(
156 "{kind} {index} regex is too large ({} bytes > {} byte limit)",
157 regex.len(),
158 MAX_REGEX_PATTERN_LEN
159 )));
160 return;
161 }
162
163 match ast::parse::Parser::new().parse(regex) {
164 Ok(ast) => validate_regex_complexity(kind, index, &ast, issues),
165 Err(error) => issues.push(QualityIssue::Error(format!(
166 "{kind} {index} regex does not compile: {error}"
167 ))),
168 }
169}
170
171fn has_substantial_literal(pattern: &str, min_len: usize) -> bool {
172 let mut max_literal_len = 0;
173 let mut current_literal_len = 0;
174 let mut in_escape = false;
175 let mut in_char_class = false;
176
177 for ch in pattern.chars() {
178 if in_escape {
179 if is_escaped_literal(ch) {
180 current_literal_len += 1;
181 } else {
182 max_literal_len = max_literal_len.max(current_literal_len);
183 current_literal_len = 0;
184 }
185 in_escape = false;
186 continue;
187 }
188
189 match ch {
190 '\\' => in_escape = true,
191 '[' => {
192 max_literal_len = max_literal_len.max(current_literal_len);
193 current_literal_len = 0;
194 in_char_class = true;
195 }
196 ']' => {
197 in_char_class = false;
198 }
199 '(' | ')' | '.' | '*' | '+' | '?' | '{' | '}' | '|' | '^' | '$' => {
200 max_literal_len = max_literal_len.max(current_literal_len);
201 current_literal_len = 0;
202 }
203 _ => {
204 if !in_char_class {
205 current_literal_len += 1;
206 }
207 }
208 }
209 }
210 max_literal_len = max_literal_len.max(current_literal_len);
211 max_literal_len >= min_len
212}
213
214fn is_escaped_literal(ch: char) -> bool {
215 matches!(
216 ch,
217 '[' | ']' | '(' | ')' | '.' | '*' | '+' | '?' | '{' | '}' | '\\' | '|' | '^' | '$'
218 )
219}
220
221fn validate_verify_spec(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
222 if let Some(ref verify) = spec.verify {
223 if !verify.steps.is_empty() {
225 for step in &verify.steps {
226 validate_url(&step.url, issues);
227 check_url_exfil_risk(&step.url, &verify.allowed_domains, issues);
228 }
229 } else if let Some(ref url) = verify.url {
230 validate_url(url, issues);
231 check_url_exfil_risk(url, &verify.allowed_domains, issues);
232 } else {
233 issues.push(QualityIssue::Error(
234 "verify spec has no steps and no default URL".into(),
235 ));
236 }
237 check_oob_consistency(verify, issues);
238 }
239 check_reserved_companion_names(spec, issues);
240}
241
242const RESERVED_COMPANION_NAMES: &[&str] =
249 &["__keyhog_oob_url", "__keyhog_oob_host", "__keyhog_oob_id"];
250
251fn check_reserved_companion_names(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
252 for (i, c) in spec.companions.iter().enumerate() {
253 if RESERVED_COMPANION_NAMES.contains(&c.name.as_str()) {
254 issues.push(QualityIssue::Error(format!(
255 "companion {} name '{}' is reserved for the OOB interpolator. \
256 Pick a different name; this collision would corrupt verify templates.",
257 i, c.name,
258 )));
259 }
260 }
261}
262
263fn check_oob_consistency(verify: &VerifySpec, issues: &mut Vec<QualityIssue>) {
276 let mut interactsh_referenced = false;
277 let mut scan = |s: &str| {
278 if s.contains("{{interactsh") {
279 interactsh_referenced = true;
280 }
281 };
282 if let Some(ref url) = verify.url {
283 scan(url);
284 }
285 if let Some(ref body) = verify.body {
286 scan(body);
287 }
288 for h in &verify.headers {
289 scan(&h.value);
290 }
291 for step in &verify.steps {
292 scan(&step.url);
293 if let Some(ref body) = step.body {
294 scan(body);
295 }
296 for h in &step.headers {
297 scan(&h.value);
298 }
299 }
300 let oob_configured = verify.oob.is_some();
301 match (oob_configured, interactsh_referenced) {
302 (true, false) => issues.push(QualityIssue::Error(
303 "verify.oob is set but no `{{interactsh}}` / `{{interactsh.host}}` / \
304 `{{interactsh.url}}` / `{{interactsh.id}}` token appears in any verify \
305 template - the OOB callback URL has nowhere to land, so the wait_for \
306 would always time out. Either embed an interactsh token in the body, \
307 URL, or a header - or remove the [detector.verify.oob] block."
308 .into(),
309 )),
310 (false, true) => issues.push(QualityIssue::Error(
311 "an `{{interactsh*}}` token is referenced in a verify template but no \
312 [detector.verify.oob] block is set - the token will resolve to an empty \
313 string at runtime and ship a malformed request to the service. Either \
314 add a [detector.verify.oob] block or remove the token."
315 .into(),
316 )),
317 _ => {}
318 }
319}
320
321fn check_url_exfil_risk(url: &str, allowed_domains: &[String], issues: &mut Vec<QualityIssue>) {
328 let trimmed = url.trim();
333 let after_scheme = trimmed
334 .strip_prefix("https://")
335 .or_else(|| trimmed.strip_prefix("http://"))
336 .unwrap_or(trimmed);
337 let host_starts_with_template =
338 after_scheme.starts_with("{{") || after_scheme.starts_with("{") || trimmed == "{{match}}";
339 if host_starts_with_template && allowed_domains.is_empty() {
340 issues.push(QualityIssue::Error(
341 "verify URL host is templated and no `allowed_domains` is set - \
342 attacker-controlled interpolation could exfil credentials. \
343 Either hardcode the authoritative host in the URL or set \
344 `allowed_domains` explicitly. See kimi-wave3 §1."
345 .into(),
346 ));
347 }
348 if url.contains('{') && !url.contains("{{") {
351 issues.push(QualityIssue::Error(
352 "verify URL uses single-brace `{var}` template syntax which the \
353 interpolator does NOT honor (only `{{var}}` works); the URL will \
354 be sent to a literal-string host. Use `{{companion.var}}`."
355 .into(),
356 ));
357 }
358}
359
360fn validate_url(url: &str, issues: &mut Vec<QualityIssue>) {
361 if url.is_empty() {
362 issues.push(QualityIssue::Error("verify URL is empty".into()));
363 }
364 if url.starts_with("http://") && !url.contains("localhost") {
365 issues.push(QualityIssue::Warning(
366 "verify URL uses HTTP instead of HTTPS".into(),
367 ));
368 }
369}
370
371fn has_literal_prefix(pattern: &str, min_len: usize) -> bool {
372 let mut count = 0;
373 for ch in pattern.chars() {
374 match ch {
375 '[' | '(' | '.' | '*' | '+' | '?' | '{' | '\\' | '|' | '^' | '$' => break,
376 _ => count += 1,
377 }
378 }
379 count >= min_len
380}
381
382fn is_pure_character_class(pattern: &str) -> bool {
383 let trimmed = pattern.trim();
384 if !trimmed.starts_with('[') {
385 return false;
386 }
387
388 let Some(close) = trimmed.find(']') else {
389 return false;
390 };
391 let remainder = trimmed[close + 1..].trim();
392 if remainder.is_empty() {
393 return true;
394 }
395 if remainder == "+" || remainder == "*" || remainder == "?" {
396 return true;
397 }
398 if remainder.starts_with('{') {
399 if let Some(qclose) = remainder.find('}') {
400 let after_quantifier = remainder[qclose + 1..].trim();
401 return after_quantifier.is_empty();
402 }
403 }
404
405 false
406}
407
408#[path = "validate_regex.rs"]
409mod validate_regex;
410use validate_regex::validate_regex_complexity;