Skip to main content

keyhog_core/
spec.rs

1//! Detector specification: TOML-based pattern definitions with regex, keywords,
2//! verification endpoints, and companion patterns.
3
4mod load;
5mod validate;
6
7use serde::{Deserialize, Serialize};
8use thiserror::Error;
9
10pub use load::{
11    load_detector_cache, load_detectors, load_detectors_from_str, load_detectors_with_gate,
12    save_detector_cache,
13};
14pub use validate::{validate_detector, QualityIssue};
15
16/// Metadata field specification for verification results.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct MetadataSpec {
19    /// Field name in the finding metadata map.
20    pub name: String,
21    /// GJSON path to extract from the verification response body.
22    pub json_path: String,
23}
24
25/// A complete detector definition loaded from a TOML file.
26#[derive(Debug, Clone, Serialize, Deserialize, Default)]
27pub struct DetectorSpec {
28    /// Unique stable identifier (e.g. \`aws-access-key\`).
29    pub id: String,
30    /// Human-readable name.
31    pub name: String,
32    /// Target service (e.g. \`aws\`, \`stripe\`).
33    pub service: String,
34    /// Default severity for findings.
35    pub severity: Severity,
36    /// List of regex patterns to match.
37    pub patterns: Vec<PatternSpec>,
38    /// Secondary patterns required to confirm a match.
39    #[serde(default)]
40    pub companions: Vec<CompanionSpec>,
41    /// Live verification configuration.
42    pub verify: Option<VerifySpec>,
43    /// High-performance pre-filtering keywords.
44    #[serde(default)]
45    pub keywords: Vec<String>,
46}
47
48/// A regex pattern with optional capture group and description.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct PatternSpec {
51    /// Regular expression string (Rust flavor).
52    pub regex: String,
53    /// Optional context description.
54    pub description: Option<String>,
55    /// Optional capture group index containing the secret.
56    pub group: Option<usize>,
57}
58
59/// Secondary pattern used to confirm a primary match or provide extra context.
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct CompanionSpec {
62    /// Field name used in verification templates (e.g. \`{{companion.secret_key}}\`).
63    pub name: String,
64    /// Regex to find the companion value nearby.
65    pub regex: String,
66    /// Maximum line distance from the primary match.
67    pub within_lines: usize,
68    /// Whether this companion must be found to report the finding.
69    #[serde(default)]
70    pub required: bool,
71}
72
73/// Live verification configuration for a detector.
74#[derive(Debug, Clone, Default, Serialize, Deserialize)]
75pub struct VerifySpec {
76    /// Target service identifier (defaults to detector's service if omitted).
77    #[serde(default)]
78    pub service: String,
79    /// HTTP method (default: GET).
80    pub method: Option<HttpMethod>,
81    /// Endpoint URL with optional \`{{match}}\` or \`{{companion.<name>}}\` placeholders.
82    pub url: Option<String>,
83    /// Authentication scheme.
84    pub auth: Option<AuthSpec>,
85    /// Custom HTTP headers.
86    #[serde(default)]
87    pub headers: Vec<HeaderSpec>,
88    /// Optional request body template.
89    pub body: Option<String>,
90    /// Criteria for a successful verification.
91    pub success: Option<SuccessSpec>,
92    /// Metadata to extract from the response.
93    #[serde(default)]
94    pub metadata: Vec<MetadataSpec>,
95    /// Optional request timeout override.
96    pub timeout_ms: Option<u64>,
97    /// Multi-step verification flow.
98    #[serde(default)]
99    pub steps: Vec<StepSpec>,
100    /// Domain allowlist for the verify URL after interpolation. If non-empty,
101    /// the resolved host of the (interpolated) URL — and of every step's URL —
102    /// MUST equal one of these entries (or be a subdomain of one). When empty,
103    /// the verifier falls back to a hardcoded service allowlist if the
104    /// `service` field maps to a known provider; otherwise the verifier
105    /// REFUSES to send the request. This blocks malicious detector TOMLs
106    /// that set `url = "{{match}}"` (or interpolate an attacker-controlled
107    /// companion) from exfiltrating credentials. See kimi-wave1 audit
108    /// finding 4.1 + wave3 §1.
109    #[serde(default)]
110    pub allowed_domains: Vec<String>,
111    /// Optional out-of-band verification probe. When set, the verifier mints a
112    /// per-finding correlation URL via the configured interactsh server,
113    /// substitutes `{{interactsh}}` (and `{{interactsh.host}}` /
114    /// `{{interactsh.url}}`) into the request template, and waits for the
115    /// service to call back. OOB verification proves a leaked credential is
116    /// **exfil-capable**, not just live: a webhook URL that returns 200 OK to
117    /// every probe still has to actually fetch our collector to confirm it
118    /// will deliver attacker-controlled traffic.
119    ///
120    /// Gated behind the runtime `--verify-oob` flag — never default. When the
121    /// flag is off, `oob` is ignored and verification falls back to the
122    /// HTTP success criteria alone.
123    pub oob: Option<OobSpec>,
124}
125
126/// Out-of-band callback verification configuration.
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct OobSpec {
129    /// Callback protocol the verifier waits for. The service may also touch
130    /// other protocols on the same correlation id; only the listed ones count
131    /// toward `Verified`.
132    pub protocol: OobProtocol,
133    /// How long to wait for the callback after the HTTP request returns.
134    /// Defaults to 30 seconds when omitted; capped at the engine's
135    /// `oob_timeout_max` to bound scan time.
136    #[serde(default)]
137    pub timeout_secs: Option<u64>,
138    /// Verification policy:
139    /// - `OobAndHttp` (default): both HTTP success criteria *and* OOB
140    ///   callback must hold. This is the strict mode for webhook-style
141    ///   detectors where 200 OK is necessary but not sufficient.
142    /// - `OobOnly`: ignore HTTP success, trust the OOB callback. For
143    ///   detectors where the API has no useful HTTP response shape but
144    ///   provably triggers an outbound request (e.g., one-way push tokens).
145    /// - `OobOptional`: HTTP success alone verifies; OOB just enriches
146    ///   metadata with `oob_observed=true|false` for the report.
147    #[serde(default)]
148    pub policy: OobPolicy,
149}
150
151/// Out-of-band callback protocol expected from a successful exfil.
152#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
153#[serde(rename_all = "lowercase")]
154pub enum OobProtocol {
155    /// Any DNS resolution against `{{interactsh}}.host`. Cheapest signal —
156    /// many services resolve a webhook URL even before fetching it.
157    Dns,
158    /// HTTP or HTTPS request to the interactsh URL. The strongest signal;
159    /// proves the service made an outbound HTTP request with the credential.
160    Http,
161    /// SMTP delivery attempt to `<random>@{{interactsh.host}}`. For mail
162    /// detectors (Mailgun, SendGrid, …) where exfil = sending mail.
163    Smtp,
164    /// Any of the above. Use sparingly — a chatty CDN doing DNS prefetch
165    /// can cause false positives.
166    Any,
167}
168
169/// How OOB observation combines with HTTP success criteria.
170#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
171#[serde(rename_all = "snake_case")]
172pub enum OobPolicy {
173    #[default]
174    OobAndHttp,
175    OobOnly,
176    OobOptional,
177}
178
179/// A single step in a multi-step verification flow.
180#[derive(Debug, Clone, Serialize, Deserialize)]
181pub struct StepSpec {
182    pub name: String,
183    pub method: HttpMethod,
184    pub url: String,
185    pub auth: AuthSpec,
186    #[serde(default)]
187    pub headers: Vec<HeaderSpec>,
188    pub body: Option<String>,
189    pub success: SuccessSpec,
190    #[serde(default)]
191    pub extract: Vec<MetadataSpec>,
192}
193
194/// Custom HTTP header specification.
195#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct HeaderSpec {
197    pub name: String,
198    pub value: String,
199}
200
201/// Authentication scheme for verification requests.
202#[derive(Debug, Clone, Serialize, Deserialize)]
203#[serde(tag = "type", rename_all = "snake_case")]
204pub enum AuthSpec {
205    None,
206    Bearer {
207        field: String,
208    },
209    Basic {
210        username: String,
211        password: String,
212    },
213    Header {
214        name: String,
215        template: String,
216    },
217    Query {
218        param: String,
219        field: String,
220    },
221    #[serde(rename = "aws_v4")]
222    AwsV4 {
223        access_key: String,
224        secret_key: String,
225        region: String,
226        service: String,
227        session_token: Option<String>,
228    },
229    Script {
230        engine: String,
231        code: String,
232    },
233}
234
235impl AuthSpec {
236    pub fn service_name(&self) -> Option<&str> {
237        match self {
238            AuthSpec::AwsV4 { service, .. } => Some(service),
239            _ => None,
240        }
241    }
242}
243
244/// Criteria for a successful verification response.
245#[derive(Debug, Clone, Serialize, Deserialize, Default)]
246pub struct SuccessSpec {
247    #[serde(default)]
248    /// Required HTTP status code.
249    pub status: Option<u16>,
250    #[serde(default)]
251    /// Reject if this status code is returned.
252    pub status_not: Option<u16>,
253    #[serde(default)]
254    /// Response body must contain this substring.
255    pub body_contains: Option<String>,
256    #[serde(default)]
257    /// Response body must NOT contain this substring.
258    pub body_not_contains: Option<String>,
259    #[serde(default)]
260    /// GJSON path to check in response body.
261    pub json_path: Option<String>,
262    #[serde(default)]
263    /// Expected value at \`json_path\`.
264    pub equals: Option<String>,
265}
266
267/// Severity level for a finding.
268#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Default)]
269#[serde(rename_all = "lowercase")]
270pub enum Severity {
271    #[default]
272    Info,
273    Low,
274    Medium,
275    High,
276    Critical,
277}
278
279impl Severity {
280    pub fn to_severity(&self) -> Self {
281        *self
282    }
283
284    /// Step the severity down one tier (Critical → High, High → Medium, …).
285    /// `Info` stays at `Info` (no lower bucket).
286    ///
287    /// Used by diff-aware scoring: a credential that only appears in non-HEAD
288    /// git history is still a leak (commit history is public if the repo is)
289    /// but is meaningfully less urgent than a credential live in HEAD that an
290    /// attacker can grep right now. One tier of downgrade communicates that
291    /// without hiding the finding entirely.
292    pub fn downgrade_one(self) -> Self {
293        match self {
294            Severity::Critical => Severity::High,
295            Severity::High => Severity::Medium,
296            Severity::Medium => Severity::Low,
297            Severity::Low => Severity::Info,
298            Severity::Info => Severity::Info,
299        }
300    }
301}
302
303/// HTTP method for verification requests.
304#[derive(Debug, Clone, Serialize, Deserialize)]
305pub enum HttpMethod {
306    #[serde(rename = "GET")]
307    Get,
308    #[serde(rename = "POST")]
309    Post,
310    #[serde(rename = "PUT")]
311    Put,
312    #[serde(rename = "DELETE")]
313    Delete,
314    #[serde(rename = "PATCH")]
315    Patch,
316    #[serde(rename = "HEAD")]
317    Head,
318}
319
320/// Wrapping struct for a detector TOML file.
321#[derive(Debug, Clone, Serialize, Deserialize)]
322pub struct DetectorFile {
323    pub detector: DetectorSpec,
324}
325
326/// Errors returned while loading or validating detector specifications.
327#[derive(Debug, Error)]
328#[allow(clippy::result_large_err)] // SpecError variants include 128-byte toml::de::Error; boxing would be a breaking API change.
329pub enum SpecError {
330    #[error(
331        "failed to read detector file {path}: {source}. Fix: check the detector path exists and that the file is readable TOML"
332    )]
333    ReadFile {
334        path: String,
335        source: std::io::Error,
336    },
337    #[error("invalid TOML in detector {path}: {source}. Fix: repair the TOML syntax in the detector file")]
338    InvalidToml {
339        path: std::path::PathBuf,
340        source: toml::de::Error,
341    },
342}
343
344#[cfg(test)]
345mod tests {
346    use super::Severity;
347
348    #[test]
349    fn severity_downgrade_walks_one_step() {
350        assert_eq!(Severity::Critical.downgrade_one(), Severity::High);
351        assert_eq!(Severity::High.downgrade_one(), Severity::Medium);
352        assert_eq!(Severity::Medium.downgrade_one(), Severity::Low);
353        assert_eq!(Severity::Low.downgrade_one(), Severity::Info);
354    }
355
356    #[test]
357    fn severity_downgrade_floors_at_info() {
358        assert_eq!(Severity::Info.downgrade_one(), Severity::Info);
359    }
360
361    #[test]
362    fn severity_downgrade_is_monotonic() {
363        // Repeated downgrade must not loop or skip — every step must be ≤ previous.
364        let mut s = Severity::Critical;
365        for _ in 0..10 {
366            let next = s.downgrade_one();
367            assert!(next <= s);
368            s = next;
369        }
370        assert_eq!(s, Severity::Info);
371    }
372}