Skip to main content

keyhog_core/
spec.rs

1//! Detector specification: TOML-based pattern definitions with regex, keywords,
2//! verification endpoints, and companion patterns.
3
4mod load;
5mod validate;
6
7use serde::{Deserialize, Serialize};
8use thiserror::Error;
9
10pub use load::{
11    load_detector_cache, load_detectors, load_detectors_from_str, load_detectors_with_gate,
12    save_detector_cache,
13};
14pub use validate::{validate_detector, QualityIssue};
15
16/// Metadata field specification for verification results.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct MetadataSpec {
19    /// Field name in the finding metadata map.
20    pub name: String,
21    /// GJSON path to extract from the verification response body.
22    pub json_path: String,
23}
24
25/// A complete detector definition loaded from a TOML file.
26#[derive(Debug, Clone, Serialize, Deserialize, Default)]
27pub struct DetectorSpec {
28    /// Unique stable identifier (e.g. \`aws-access-key\`).
29    pub id: String,
30    /// Human-readable name.
31    pub name: String,
32    /// Target service (e.g. \`aws\`, \`stripe\`).
33    pub service: String,
34    /// Default severity for findings.
35    pub severity: Severity,
36    /// List of regex patterns to match.
37    pub patterns: Vec<PatternSpec>,
38    /// Secondary patterns required to confirm a match.
39    #[serde(default)]
40    pub companions: Vec<CompanionSpec>,
41    /// Live verification configuration.
42    pub verify: Option<VerifySpec>,
43    /// High-performance pre-filtering keywords.
44    #[serde(default)]
45    pub keywords: Vec<String>,
46}
47
48/// A regex pattern with optional capture group and description.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct PatternSpec {
51    /// Regular expression string (Rust flavor).
52    pub regex: String,
53    /// Optional context description.
54    pub description: Option<String>,
55    /// Optional capture group index containing the secret.
56    pub group: Option<usize>,
57}
58
59/// Secondary pattern used to confirm a primary match or provide extra context.
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct CompanionSpec {
62    /// Field name used in verification templates (e.g. \`{{companion.secret_key}}\`).
63    pub name: String,
64    /// Regex to find the companion value nearby.
65    pub regex: String,
66    /// Maximum line distance from the primary match.
67    pub within_lines: usize,
68    /// Whether this companion must be found to report the finding.
69    #[serde(default)]
70    pub required: bool,
71}
72
73/// Live verification configuration for a detector.
74#[derive(Debug, Clone, Default, Serialize, Deserialize)]
75pub struct VerifySpec {
76    /// Target service identifier (defaults to detector's service if omitted).
77    #[serde(default)]
78    pub service: String,
79    /// HTTP method (default: GET).
80    pub method: Option<HttpMethod>,
81    /// Endpoint URL with optional \`{{match}}\` or \`{{companion.<name>}}\` placeholders.
82    pub url: Option<String>,
83    /// Authentication scheme.
84    pub auth: Option<AuthSpec>,
85    /// Custom HTTP headers.
86    #[serde(default)]
87    pub headers: Vec<HeaderSpec>,
88    /// Optional request body template.
89    pub body: Option<String>,
90    /// Criteria for a successful verification.
91    pub success: Option<SuccessSpec>,
92    /// Metadata to extract from the response.
93    #[serde(default)]
94    pub metadata: Vec<MetadataSpec>,
95    /// Optional request timeout override.
96    pub timeout_ms: Option<u64>,
97    /// Multi-step verification flow.
98    #[serde(default)]
99    pub steps: Vec<StepSpec>,
100    /// Domain allowlist for the verify URL after interpolation. If non-empty,
101    /// the resolved host of the (interpolated) URL — and of every step's URL —
102    /// MUST equal one of these entries (or be a subdomain of one). When empty,
103    /// the verifier falls back to a hardcoded service allowlist if the
104    /// `service` field maps to a known provider; otherwise the verifier
105    /// REFUSES to send the request. This blocks malicious detector TOMLs
106    /// that set `url = "{{match}}"` (or interpolate an attacker-controlled
107    /// companion) from exfiltrating credentials. See kimi-wave1 audit
108    /// finding 4.1 + wave3 §1.
109    #[serde(default)]
110    pub allowed_domains: Vec<String>,
111}
112
113/// A single step in a multi-step verification flow.
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct StepSpec {
116    pub name: String,
117    pub method: HttpMethod,
118    pub url: String,
119    pub auth: AuthSpec,
120    #[serde(default)]
121    pub headers: Vec<HeaderSpec>,
122    pub body: Option<String>,
123    pub success: SuccessSpec,
124    #[serde(default)]
125    pub extract: Vec<MetadataSpec>,
126}
127
128/// Custom HTTP header specification.
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct HeaderSpec {
131    pub name: String,
132    pub value: String,
133}
134
135/// Authentication scheme for verification requests.
136#[derive(Debug, Clone, Serialize, Deserialize)]
137#[serde(tag = "type", rename_all = "snake_case")]
138pub enum AuthSpec {
139    None,
140    Bearer {
141        field: String,
142    },
143    Basic {
144        username: String,
145        password: String,
146    },
147    Header {
148        name: String,
149        template: String,
150    },
151    Query {
152        param: String,
153        field: String,
154    },
155    #[serde(rename = "aws_v4")]
156    AwsV4 {
157        access_key: String,
158        secret_key: String,
159        region: String,
160        service: String,
161        session_token: Option<String>,
162    },
163    Script {
164        engine: String,
165        code: String,
166    },
167}
168
169impl AuthSpec {
170    pub fn service_name(&self) -> Option<&str> {
171        match self {
172            AuthSpec::AwsV4 { service, .. } => Some(service),
173            _ => None,
174        }
175    }
176}
177
178/// Criteria for a successful verification response.
179#[derive(Debug, Clone, Serialize, Deserialize, Default)]
180pub struct SuccessSpec {
181    #[serde(default)]
182    /// Required HTTP status code.
183    pub status: Option<u16>,
184    #[serde(default)]
185    /// Reject if this status code is returned.
186    pub status_not: Option<u16>,
187    #[serde(default)]
188    /// Response body must contain this substring.
189    pub body_contains: Option<String>,
190    #[serde(default)]
191    /// Response body must NOT contain this substring.
192    pub body_not_contains: Option<String>,
193    #[serde(default)]
194    /// GJSON path to check in response body.
195    pub json_path: Option<String>,
196    #[serde(default)]
197    /// Expected value at \`json_path\`.
198    pub equals: Option<String>,
199}
200
201/// Severity level for a finding.
202#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Default)]
203#[serde(rename_all = "lowercase")]
204pub enum Severity {
205    #[default]
206    Info,
207    Low,
208    Medium,
209    High,
210    Critical,
211}
212
213impl Severity {
214    pub fn to_severity(&self) -> Self {
215        *self
216    }
217
218    /// Step the severity down one tier (Critical → High, High → Medium, …).
219    /// `Info` stays at `Info` (no lower bucket).
220    ///
221    /// Used by diff-aware scoring: a credential that only appears in non-HEAD
222    /// git history is still a leak (commit history is public if the repo is)
223    /// but is meaningfully less urgent than a credential live in HEAD that an
224    /// attacker can grep right now. One tier of downgrade communicates that
225    /// without hiding the finding entirely.
226    pub fn downgrade_one(self) -> Self {
227        match self {
228            Severity::Critical => Severity::High,
229            Severity::High => Severity::Medium,
230            Severity::Medium => Severity::Low,
231            Severity::Low => Severity::Info,
232            Severity::Info => Severity::Info,
233        }
234    }
235}
236
237/// HTTP method for verification requests.
238#[derive(Debug, Clone, Serialize, Deserialize)]
239pub enum HttpMethod {
240    #[serde(rename = "GET")]
241    Get,
242    #[serde(rename = "POST")]
243    Post,
244    #[serde(rename = "PUT")]
245    Put,
246    #[serde(rename = "DELETE")]
247    Delete,
248    #[serde(rename = "PATCH")]
249    Patch,
250    #[serde(rename = "HEAD")]
251    Head,
252}
253
254/// Wrapping struct for a detector TOML file.
255#[derive(Debug, Clone, Serialize, Deserialize)]
256pub struct DetectorFile {
257    pub detector: DetectorSpec,
258}
259
260/// Errors returned while loading or validating detector specifications.
261#[derive(Debug, Error)]
262pub enum SpecError {
263    #[error(
264        "failed to read detector file {path}: {source}. Fix: check the detector path exists and that the file is readable TOML"
265    )]
266    ReadFile {
267        path: String,
268        source: std::io::Error,
269    },
270    #[error("invalid TOML in detector {path}: {source}. Fix: repair the TOML syntax in the detector file")]
271    InvalidToml {
272        path: std::path::PathBuf,
273        source: toml::de::Error,
274    },
275}
276
277#[cfg(test)]
278mod tests {
279    use super::Severity;
280
281    #[test]
282    fn severity_downgrade_walks_one_step() {
283        assert_eq!(Severity::Critical.downgrade_one(), Severity::High);
284        assert_eq!(Severity::High.downgrade_one(), Severity::Medium);
285        assert_eq!(Severity::Medium.downgrade_one(), Severity::Low);
286        assert_eq!(Severity::Low.downgrade_one(), Severity::Info);
287    }
288
289    #[test]
290    fn severity_downgrade_floors_at_info() {
291        assert_eq!(Severity::Info.downgrade_one(), Severity::Info);
292    }
293
294    #[test]
295    fn severity_downgrade_is_monotonic() {
296        // Repeated downgrade must not loop or skip — every step must be ≤ previous.
297        let mut s = Severity::Critical;
298        for _ in 0..10 {
299            let next = s.downgrade_one();
300            assert!(next <= s);
301            s = next;
302        }
303        assert_eq!(s, Severity::Info);
304    }
305}