keyhog_core/spec.rs
1//! Detector specification: TOML-based pattern definitions with regex, keywords,
2//! verification endpoints, and companion patterns.
3
4mod load;
5mod validate;
6
7use serde::{Deserialize, Serialize};
8use thiserror::Error;
9
10pub use load::{
11 load_detector_cache, load_detectors, load_detectors_from_str, load_detectors_with_gate,
12 save_detector_cache,
13};
14pub use validate::{validate_detector, QualityIssue};
15
16/// Metadata field specification for verification results.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct MetadataSpec {
19 /// Field name in the finding metadata map.
20 pub name: String,
21 /// GJSON path to extract from the verification response body.
22 pub json_path: String,
23}
24
25/// A complete detector definition loaded from a TOML file.
26#[derive(Debug, Clone, Serialize, Deserialize, Default)]
27pub struct DetectorSpec {
28 /// Unique stable identifier (e.g. \`aws-access-key\`).
29 pub id: String,
30 /// Human-readable name.
31 pub name: String,
32 /// Target service (e.g. \`aws\`, \`stripe\`).
33 pub service: String,
34 /// Default severity for findings.
35 pub severity: Severity,
36 /// List of regex patterns to match.
37 pub patterns: Vec<PatternSpec>,
38 /// Secondary patterns required to confirm a match.
39 #[serde(default)]
40 pub companions: Vec<CompanionSpec>,
41 /// Live verification configuration.
42 pub verify: Option<VerifySpec>,
43 /// High-performance pre-filtering keywords.
44 #[serde(default)]
45 pub keywords: Vec<String>,
46}
47
48/// A regex pattern with optional capture group and description.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct PatternSpec {
51 /// Regular expression string (Rust flavor).
52 pub regex: String,
53 /// Optional context description.
54 pub description: Option<String>,
55 /// Optional capture group index containing the secret.
56 pub group: Option<usize>,
57}
58
59/// Secondary pattern used to confirm a primary match or provide extra context.
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct CompanionSpec {
62 /// Field name used in verification templates (e.g. \`{{companion.secret_key}}\`).
63 pub name: String,
64 /// Regex to find the companion value nearby.
65 pub regex: String,
66 /// Maximum line distance from the primary match.
67 pub within_lines: usize,
68 /// Whether this companion must be found to report the finding.
69 #[serde(default)]
70 pub required: bool,
71}
72
73/// Live verification configuration for a detector.
74#[derive(Debug, Clone, Default, Serialize, Deserialize)]
75pub struct VerifySpec {
76 /// Target service identifier (defaults to detector's service if omitted).
77 #[serde(default)]
78 pub service: String,
79 /// HTTP method (default: GET).
80 pub method: Option<HttpMethod>,
81 /// Endpoint URL with optional \`{{match}}\` or \`{{companion.<name>}}\` placeholders.
82 pub url: Option<String>,
83 /// Authentication scheme.
84 pub auth: Option<AuthSpec>,
85 /// Custom HTTP headers.
86 #[serde(default)]
87 pub headers: Vec<HeaderSpec>,
88 /// Optional request body template.
89 pub body: Option<String>,
90 /// Criteria for a successful verification.
91 pub success: Option<SuccessSpec>,
92 /// Metadata to extract from the response.
93 #[serde(default)]
94 pub metadata: Vec<MetadataSpec>,
95 /// Optional request timeout override.
96 pub timeout_ms: Option<u64>,
97 /// Multi-step verification flow.
98 #[serde(default)]
99 pub steps: Vec<StepSpec>,
100 /// Domain allowlist for the verify URL after interpolation. If non-empty,
101 /// the resolved host of the (interpolated) URL — and of every step's URL —
102 /// MUST equal one of these entries (or be a subdomain of one). When empty,
103 /// the verifier falls back to a hardcoded service allowlist if the
104 /// `service` field maps to a known provider; otherwise the verifier
105 /// REFUSES to send the request. This blocks malicious detector TOMLs
106 /// that set `url = "{{match}}"` (or interpolate an attacker-controlled
107 /// companion) from exfiltrating credentials. See kimi-wave1 audit
108 /// finding 4.1 + wave3 §1.
109 #[serde(default)]
110 pub allowed_domains: Vec<String>,
111 /// Optional out-of-band verification probe. When set, the verifier mints a
112 /// per-finding correlation URL via the configured interactsh server,
113 /// substitutes `{{interactsh}}` (and `{{interactsh.host}}` /
114 /// `{{interactsh.url}}`) into the request template, and waits for the
115 /// service to call back. OOB verification proves a leaked credential is
116 /// **exfil-capable**, not just live: a webhook URL that returns 200 OK to
117 /// every probe still has to actually fetch our collector to confirm it
118 /// will deliver attacker-controlled traffic.
119 ///
120 /// Gated behind the runtime `--verify-oob` flag — never default. When the
121 /// flag is off, `oob` is ignored and verification falls back to the
122 /// HTTP success criteria alone.
123 pub oob: Option<OobSpec>,
124}
125
126/// Out-of-band callback verification configuration.
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct OobSpec {
129 /// Callback protocol the verifier waits for. The service may also touch
130 /// other protocols on the same correlation id; only the listed ones count
131 /// toward `Verified`.
132 pub protocol: OobProtocol,
133 /// How long to wait for the callback after the HTTP request returns.
134 /// Defaults to 30 seconds when omitted; capped at the engine's
135 /// `oob_timeout_max` to bound scan time.
136 #[serde(default)]
137 pub timeout_secs: Option<u64>,
138 /// Verification policy:
139 /// - `OobAndHttp` (default): both HTTP success criteria *and* OOB
140 /// callback must hold. This is the strict mode for webhook-style
141 /// detectors where 200 OK is necessary but not sufficient.
142 /// - `OobOnly`: ignore HTTP success, trust the OOB callback. For
143 /// detectors where the API has no useful HTTP response shape but
144 /// provably triggers an outbound request (e.g., one-way push tokens).
145 /// - `OobOptional`: HTTP success alone verifies; OOB just enriches
146 /// metadata with `oob_observed=true|false` for the report.
147 #[serde(default)]
148 pub policy: OobPolicy,
149}
150
151/// Out-of-band callback protocol expected from a successful exfil.
152#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
153#[serde(rename_all = "lowercase")]
154pub enum OobProtocol {
155 /// Any DNS resolution against `{{interactsh}}.host`. Cheapest signal —
156 /// many services resolve a webhook URL even before fetching it.
157 Dns,
158 /// HTTP or HTTPS request to the interactsh URL. The strongest signal;
159 /// proves the service made an outbound HTTP request with the credential.
160 Http,
161 /// SMTP delivery attempt to `<random>@{{interactsh.host}}`. For mail
162 /// detectors (Mailgun, SendGrid, …) where exfil = sending mail.
163 Smtp,
164 /// Any of the above. Use sparingly — a chatty CDN doing DNS prefetch
165 /// can cause false positives.
166 Any,
167}
168
169/// How OOB observation combines with HTTP success criteria.
170#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
171#[serde(rename_all = "snake_case")]
172pub enum OobPolicy {
173 #[default]
174 OobAndHttp,
175 OobOnly,
176 OobOptional,
177}
178
179/// A single step in a multi-step verification flow.
180#[derive(Debug, Clone, Serialize, Deserialize)]
181pub struct StepSpec {
182 pub name: String,
183 pub method: HttpMethod,
184 pub url: String,
185 pub auth: AuthSpec,
186 #[serde(default)]
187 pub headers: Vec<HeaderSpec>,
188 pub body: Option<String>,
189 pub success: SuccessSpec,
190 #[serde(default)]
191 pub extract: Vec<MetadataSpec>,
192}
193
194/// Custom HTTP header specification.
195#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct HeaderSpec {
197 pub name: String,
198 pub value: String,
199}
200
201/// Authentication scheme for verification requests.
202#[derive(Debug, Clone, Serialize, Deserialize)]
203#[serde(tag = "type", rename_all = "snake_case")]
204pub enum AuthSpec {
205 None,
206 Bearer {
207 field: String,
208 },
209 Basic {
210 username: String,
211 password: String,
212 },
213 Header {
214 name: String,
215 template: String,
216 },
217 Query {
218 param: String,
219 field: String,
220 },
221 #[serde(rename = "aws_v4")]
222 AwsV4 {
223 access_key: String,
224 secret_key: String,
225 region: String,
226 service: String,
227 session_token: Option<String>,
228 },
229 Script {
230 engine: String,
231 code: String,
232 },
233}
234
235impl AuthSpec {
236 pub fn service_name(&self) -> Option<&str> {
237 match self {
238 AuthSpec::AwsV4 { service, .. } => Some(service),
239 _ => None,
240 }
241 }
242}
243
244/// Criteria for a successful verification response.
245#[derive(Debug, Clone, Serialize, Deserialize, Default)]
246pub struct SuccessSpec {
247 #[serde(default)]
248 /// Required HTTP status code.
249 pub status: Option<u16>,
250 #[serde(default)]
251 /// Reject if this status code is returned.
252 pub status_not: Option<u16>,
253 #[serde(default)]
254 /// Response body must contain this substring.
255 pub body_contains: Option<String>,
256 #[serde(default)]
257 /// Response body must NOT contain this substring.
258 pub body_not_contains: Option<String>,
259 #[serde(default)]
260 /// GJSON path to check in response body.
261 pub json_path: Option<String>,
262 #[serde(default)]
263 /// Expected value at \`json_path\`.
264 pub equals: Option<String>,
265}
266
267/// Severity level for a finding.
268#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Default)]
269#[serde(rename_all = "lowercase")]
270pub enum Severity {
271 #[default]
272 Info,
273 Low,
274 Medium,
275 High,
276 Critical,
277}
278
279impl Severity {
280 pub fn to_severity(&self) -> Self {
281 *self
282 }
283
284 /// Step the severity down one tier (Critical → High, High → Medium, …).
285 /// `Info` stays at `Info` (no lower bucket).
286 ///
287 /// Used by diff-aware scoring: a credential that only appears in non-HEAD
288 /// git history is still a leak (commit history is public if the repo is)
289 /// but is meaningfully less urgent than a credential live in HEAD that an
290 /// attacker can grep right now. One tier of downgrade communicates that
291 /// without hiding the finding entirely.
292 pub fn downgrade_one(self) -> Self {
293 match self {
294 Severity::Critical => Severity::High,
295 Severity::High => Severity::Medium,
296 Severity::Medium => Severity::Low,
297 Severity::Low => Severity::Info,
298 Severity::Info => Severity::Info,
299 }
300 }
301}
302
303/// HTTP method for verification requests.
304#[derive(Debug, Clone, Serialize, Deserialize)]
305pub enum HttpMethod {
306 #[serde(rename = "GET")]
307 Get,
308 #[serde(rename = "POST")]
309 Post,
310 #[serde(rename = "PUT")]
311 Put,
312 #[serde(rename = "DELETE")]
313 Delete,
314 #[serde(rename = "PATCH")]
315 Patch,
316 #[serde(rename = "HEAD")]
317 Head,
318}
319
320/// Wrapping struct for a detector TOML file.
321#[derive(Debug, Clone, Serialize, Deserialize)]
322pub struct DetectorFile {
323 pub detector: DetectorSpec,
324}
325
326/// Errors returned while loading or validating detector specifications.
327#[derive(Debug, Error)]
328#[allow(clippy::result_large_err)] // SpecError variants include 128-byte toml::de::Error; boxing would be a breaking API change.
329pub enum SpecError {
330 #[error(
331 "failed to read detector file {path}: {source}. Fix: check the detector path exists and that the file is readable TOML"
332 )]
333 ReadFile {
334 path: String,
335 source: std::io::Error,
336 },
337 #[error("invalid TOML in detector {path}: {source}. Fix: repair the TOML syntax in the detector file")]
338 InvalidToml {
339 path: std::path::PathBuf,
340 source: toml::de::Error,
341 },
342}
343
344#[cfg(test)]
345mod tests {
346 use super::Severity;
347
348 #[test]
349 fn severity_downgrade_walks_one_step() {
350 assert_eq!(Severity::Critical.downgrade_one(), Severity::High);
351 assert_eq!(Severity::High.downgrade_one(), Severity::Medium);
352 assert_eq!(Severity::Medium.downgrade_one(), Severity::Low);
353 assert_eq!(Severity::Low.downgrade_one(), Severity::Info);
354 }
355
356 #[test]
357 fn severity_downgrade_floors_at_info() {
358 assert_eq!(Severity::Info.downgrade_one(), Severity::Info);
359 }
360
361 #[test]
362 fn severity_downgrade_is_monotonic() {
363 // Repeated downgrade must not loop or skip — every step must be ≤ previous.
364 let mut s = Severity::Critical;
365 for _ in 0..10 {
366 let next = s.downgrade_one();
367 assert!(next <= s);
368 s = next;
369 }
370 assert_eq!(s, Severity::Info);
371 }
372}