1use super::{DetectorSpec, VerifySpec};
4use regex_syntax::ast;
5use serde::Serialize;
6
7const MAX_REGEX_PATTERN_LEN: usize = 4096;
8#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
26pub enum QualityIssue {
27 Error(String),
28 Warning(String),
29}
30
31pub fn validate_detector(spec: &DetectorSpec) -> Vec<QualityIssue> {
56 let mut issues = Vec::new();
57 validate_patterns_present(spec, &mut issues);
58 validate_regexes(spec, &mut issues);
59 validate_keywords(spec, &mut issues);
60 validate_pattern_specificity(spec, &mut issues);
61 validate_companions(spec, &mut issues);
62 validate_verify_spec(spec, &mut issues);
63 issues
64}
65
66fn validate_patterns_present(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
67 if spec.patterns.is_empty() {
68 issues.push(QualityIssue::Error("no patterns defined".into()));
69 }
70}
71
72fn validate_regexes(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
73 for (i, pat) in spec.patterns.iter().enumerate() {
74 validate_regex_definition("pattern", i, &pat.regex, issues);
75 }
76}
77
78fn validate_keywords(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
79 if spec.keywords.is_empty() {
80 issues.push(QualityIssue::Warning(
81 "no keywords defined - pattern may produce false positives".into(),
82 ));
83 }
84}
85
86fn validate_pattern_specificity(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
87 for (i, pat) in spec.patterns.iter().enumerate() {
88 let has_prefix = has_literal_prefix(&pat.regex, 3);
89 let has_group = pat.group.is_some();
90 let is_pure_charclass = is_pure_character_class(&pat.regex);
91
92 if is_pure_charclass && !has_group {
93 issues.push(QualityIssue::Error(format!(
94 "pattern {} is a pure character class ({}) - too broad without context anchoring. \
95 Use a capture group or add a literal prefix.",
96 i, pat.regex
97 )));
98 } else if !has_prefix && !has_group && spec.keywords.is_empty() {
99 issues.push(QualityIssue::Warning(format!(
100 "pattern {} has no literal prefix and no capture group - may false-positive",
101 i
102 )));
103 }
104 }
105}
106
107fn validate_companions(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
108 for (i, companion) in spec.companions.iter().enumerate() {
109 if companion.name.trim().is_empty() {
110 issues.push(QualityIssue::Error(format!(
111 "companion {} name must not be empty",
112 i
113 )));
114 }
115 validate_regex_definition("companion", i, &companion.regex, issues);
116 if is_pure_character_class(&companion.regex) {
122 if companion.within_lines <= TIGHT_COMPANION_RADIUS {
123 issues.push(QualityIssue::Warning(format!(
124 "companion {} regex '{}' is a pure character class; \
125 allowed because within_lines={} ≤ {} (positional anchoring).",
126 i, companion.regex, companion.within_lines, TIGHT_COMPANION_RADIUS
127 )));
128 } else {
129 issues.push(QualityIssue::Error(format!(
130 "companion {} regex '{}' is a pure character class with within_lines={} \
131 (> {}) - the wide search radius needs a literal context anchor",
132 i, companion.regex, companion.within_lines, TIGHT_COMPANION_RADIUS
133 )));
134 }
135 } else if !has_substantial_literal(&companion.regex, 3) {
136 issues.push(QualityIssue::Warning(format!(
137 "companion {} regex '{}' is too broad - may produce false positives. \
138 Add a context anchor like 'KEY_NAME='.",
139 i, companion.regex
140 )));
141 }
142 }
143}
144
145const TIGHT_COMPANION_RADIUS: usize = 5;
148
149fn validate_regex_definition(
150 kind: &str,
151 index: usize,
152 regex: &str,
153 issues: &mut Vec<QualityIssue>,
154) {
155 if regex.len() > MAX_REGEX_PATTERN_LEN {
156 issues.push(QualityIssue::Error(format!(
157 "{kind} {index} regex is too large ({} bytes > {} byte limit)",
158 regex.len(),
159 MAX_REGEX_PATTERN_LEN
160 )));
161 return;
162 }
163
164 match ast::parse::Parser::new().parse(regex) {
165 Ok(ast) => validate_regex_complexity(kind, index, &ast, issues),
166 Err(error) => issues.push(QualityIssue::Error(format!(
167 "{kind} {index} regex does not compile: {error}"
168 ))),
169 }
170}
171
172fn has_substantial_literal(pattern: &str, min_len: usize) -> bool {
173 let mut max_literal_len = 0;
174 let mut current_literal_len = 0;
175 let mut in_escape = false;
176 let mut in_char_class = false;
177
178 for ch in pattern.chars() {
179 if in_escape {
180 if is_escaped_literal(ch) {
181 current_literal_len += 1;
182 } else {
183 max_literal_len = max_literal_len.max(current_literal_len);
184 current_literal_len = 0;
185 }
186 in_escape = false;
187 continue;
188 }
189
190 match ch {
191 '\\' => in_escape = true,
192 '[' => {
193 max_literal_len = max_literal_len.max(current_literal_len);
194 current_literal_len = 0;
195 in_char_class = true;
196 }
197 ']' => {
198 in_char_class = false;
199 }
200 '(' | ')' | '.' | '*' | '+' | '?' | '{' | '}' | '|' | '^' | '$' => {
201 max_literal_len = max_literal_len.max(current_literal_len);
202 current_literal_len = 0;
203 }
204 _ => {
205 if !in_char_class {
206 current_literal_len += 1;
207 }
208 }
209 }
210 }
211 max_literal_len = max_literal_len.max(current_literal_len);
212 max_literal_len >= min_len
213}
214
215fn is_escaped_literal(ch: char) -> bool {
216 matches!(
217 ch,
218 '[' | ']' | '(' | ')' | '.' | '*' | '+' | '?' | '{' | '}' | '\\' | '|' | '^' | '$'
219 )
220}
221
222fn validate_verify_spec(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
223 if let Some(ref verify) = spec.verify {
224 if !verify.steps.is_empty() {
226 for step in &verify.steps {
227 validate_url(&step.url, issues);
228 check_url_exfil_risk(&step.url, &verify.allowed_domains, issues);
229 }
230 } else if let Some(ref url) = verify.url {
231 validate_url(url, issues);
232 check_url_exfil_risk(url, &verify.allowed_domains, issues);
233 } else {
234 issues.push(QualityIssue::Error(
235 "verify spec has no steps and no default URL".into(),
236 ));
237 }
238 check_oob_consistency(verify, issues);
239 }
240 check_reserved_companion_names(spec, issues);
241}
242
243const RESERVED_COMPANION_NAMES: &[&str] =
250 &["__keyhog_oob_url", "__keyhog_oob_host", "__keyhog_oob_id"];
251
252fn check_reserved_companion_names(spec: &DetectorSpec, issues: &mut Vec<QualityIssue>) {
253 for (i, c) in spec.companions.iter().enumerate() {
254 if RESERVED_COMPANION_NAMES.contains(&c.name.as_str()) {
255 issues.push(QualityIssue::Error(format!(
256 "companion {} name '{}' is reserved for the OOB interpolator. \
257 Pick a different name; this collision would corrupt verify templates.",
258 i, c.name,
259 )));
260 }
261 }
262}
263
264fn check_oob_consistency(verify: &VerifySpec, issues: &mut Vec<QualityIssue>) {
277 let mut interactsh_referenced = false;
278 let mut scan = |s: &str| {
279 if s.contains("{{interactsh") {
280 interactsh_referenced = true;
281 }
282 };
283 if let Some(ref url) = verify.url {
284 scan(url);
285 }
286 if let Some(ref body) = verify.body {
287 scan(body);
288 }
289 for h in &verify.headers {
290 scan(&h.value);
291 }
292 for step in &verify.steps {
293 scan(&step.url);
294 if let Some(ref body) = step.body {
295 scan(body);
296 }
297 for h in &step.headers {
298 scan(&h.value);
299 }
300 }
301 let oob_configured = verify.oob.is_some();
302 match (oob_configured, interactsh_referenced) {
303 (true, false) => issues.push(QualityIssue::Error(
304 "verify.oob is set but no `{{interactsh}}` / `{{interactsh.host}}` / \
305 `{{interactsh.url}}` / `{{interactsh.id}}` token appears in any verify \
306 template - the OOB callback URL has nowhere to land, so the wait_for \
307 would always time out. Either embed an interactsh token in the body, \
308 URL, or a header - or remove the [detector.verify.oob] block."
309 .into(),
310 )),
311 (false, true) => issues.push(QualityIssue::Error(
312 "an `{{interactsh*}}` token is referenced in a verify template but no \
313 [detector.verify.oob] block is set - the token will resolve to an empty \
314 string at runtime and ship a malformed request to the service. Either \
315 add a [detector.verify.oob] block or remove the token."
316 .into(),
317 )),
318 _ => {}
319 }
320}
321
322fn check_url_exfil_risk(url: &str, allowed_domains: &[String], issues: &mut Vec<QualityIssue>) {
329 let trimmed = url.trim();
334 let after_scheme = trimmed
335 .strip_prefix("https://")
336 .or_else(|| trimmed.strip_prefix("http://"))
337 .unwrap_or(trimmed);
338 let host_starts_with_template =
339 after_scheme.starts_with("{{") || after_scheme.starts_with("{") || trimmed == "{{match}}";
340 if host_starts_with_template && allowed_domains.is_empty() {
341 issues.push(QualityIssue::Error(
342 "verify URL host is templated and no `allowed_domains` is set - \
343 attacker-controlled interpolation could exfil credentials. \
344 Either hardcode the authoritative host in the URL or set \
345 `allowed_domains` explicitly. See kimi-wave3 §1."
346 .into(),
347 ));
348 }
349 if url.contains('{') && !url.contains("{{") {
352 issues.push(QualityIssue::Error(
353 "verify URL uses single-brace `{var}` template syntax which the \
354 interpolator does NOT honor (only `{{var}}` works); the URL will \
355 be sent to a literal-string host. Use `{{companion.var}}`."
356 .into(),
357 ));
358 }
359}
360
361fn validate_url(url: &str, issues: &mut Vec<QualityIssue>) {
362 if url.is_empty() {
363 issues.push(QualityIssue::Error("verify URL is empty".into()));
364 }
365 if url.starts_with("http://") && !url.contains("localhost") {
366 issues.push(QualityIssue::Warning(
367 "verify URL uses HTTP instead of HTTPS".into(),
368 ));
369 }
370}
371
372fn has_literal_prefix(pattern: &str, min_len: usize) -> bool {
373 let mut count = 0;
374 for ch in pattern.chars() {
375 match ch {
376 '[' | '(' | '.' | '*' | '+' | '?' | '{' | '\\' | '|' | '^' | '$' => break,
377 _ => count += 1,
378 }
379 }
380 count >= min_len
381}
382
383fn is_pure_character_class(pattern: &str) -> bool {
384 let trimmed = pattern.trim();
385 if !trimmed.starts_with('[') {
386 return false;
387 }
388
389 let Some(close) = trimmed.find(']') else {
390 return false;
391 };
392 let remainder = trimmed[close + 1..].trim();
393 if remainder.is_empty() {
394 return true;
395 }
396 if remainder == "+" || remainder == "*" || remainder == "?" {
397 return true;
398 }
399 if remainder.starts_with('{') {
400 if let Some(qclose) = remainder.find('}') {
401 let after_quantifier = remainder[qclose + 1..].trim();
402 return after_quantifier.is_empty();
403 }
404 }
405
406 false
407}
408
409#[path = "validate_regex.rs"]
410mod validate_regex;
411use validate_regex::validate_regex_complexity;