keyhog_verifier/lib.rs
1//! Live credential verification: confirms whether detected secrets are actually
2//! active by making HTTP requests to the service's API endpoint as specified in
3//! each detector's `[detector.verify]` configuration.
4
5#![allow(clippy::too_many_arguments)]
6#![allow(clippy::type_complexity)]
7
8/// Local HTTP compatibility shim backed by reqwest..
9pub mod reqwest {
10 pub use reqwest::*;
11}
12
13pub mod bogon;
14/// Shared in-memory verification cache.
15pub mod cache;
16pub mod domain_allowlist;
17pub mod interpolate;
18pub mod oob;
19pub mod rate_limit;
20pub mod ssrf;
21mod verify;
22
23use std::collections::HashMap;
24use std::sync::Arc;
25use std::time::Duration;
26
27use dashmap::DashMap;
28use keyhog_core::{redact, DedupedMatch, DetectorSpec, VerificationResult, VerifiedFinding};
29
30// Re-export dedup types from core so existing consumers (`use keyhog_verifier::DedupedMatch`)
31// continue to work without source changes.
32use crate::reqwest::{Client, Error as ReqwestError};
33pub use keyhog_core::{dedup_matches, DedupScope};
34use thiserror::Error;
35use tokio::sync::{Notify, Semaphore};
36
37/// Errors returned while constructing or executing live verification.
38#[derive(Debug, Error)]
39pub enum VerifyError {
40 #[error(
41 "failed to send HTTP request: {0}. Fix: check network access, proxy settings, and the verification endpoint"
42 )]
43 Http(#[from] ReqwestError),
44 #[error(
45 "failed to build configured HTTP client: {0}. Fix: use a valid timeout and supported TLS/network configuration"
46 )]
47 ClientBuild(ReqwestError),
48 #[error(
49 "invalid verifier proxy configuration: {0}. Fix: use a valid http://, https://, or socks5:// URL, or set 'off' to disable proxying entirely"
50 )]
51 ProxyConfig(String),
52 #[error(
53 "failed to resolve verification field: {0}. Fix: use `match` or `companion.<name>` fields that exist in the detector spec"
54 )]
55 FieldResolution(String),
56}
57
58/// Live-verification engine with shared client, cache, and concurrency limits.
59pub struct VerificationEngine {
60 client: Client,
61 detectors: Arc<HashMap<Arc<str>, DetectorSpec>>,
62 /// Per-service concurrency limit to avoid hammering APIs.
63 service_semaphores: Arc<HashMap<Arc<str>, Arc<Semaphore>>>,
64 /// Global concurrency limit.
65 global_semaphore: Arc<Semaphore>,
66 timeout: Duration,
67 /// Response cache to avoid re-verifying the same credential.
68 cache: Arc<cache::VerificationCache>,
69 /// One in-flight request per (detector_id, credential). DashMap (per-shard
70 /// locking) replaces the previous parking_lot::Mutex<HashMap> which was an
71 /// async anti-pattern - see audits/legendary-2026-04-26.
72 pub(crate) inflight: Arc<DashMap<(Arc<str>, Arc<str>), Arc<Notify>>>,
73 pub(crate) max_inflight_keys: usize,
74 pub(crate) danger_allow_private_ips: bool,
75 pub(crate) danger_allow_http: bool,
76 /// Mirrors `VerifyConfig.insecure_tls`. The base `client` is built
77 /// with `danger_accept_invalid_certs(insecure_tls)`, but the
78 /// per-request DNS-pinning rebuild path needs the bool itself so
79 /// it can match the base client's posture. See
80 /// `verify/request.rs::resolved_client_for_url`.
81 pub(crate) insecure_tls: bool,
82 /// Snapshot of "was the base client built with a proxy" - propagated
83 /// to per-request rebuild paths so they skip the rebuild (which would
84 /// strip the proxy). See `verify/request.rs::resolved_client_for_url`.
85 pub(crate) proxy_in_use: bool,
86 /// Optional OOB session. When `Some`, detectors with `[detector.verify.oob]`
87 /// receive a per-finding callback URL and the engine waits for the
88 /// service to call back. When `None`, those detectors fall through to
89 /// HTTP-only success criteria. Set via [`VerificationEngine::enable_oob`].
90 pub(crate) oob_session: Option<Arc<oob::OobSession>>,
91}
92
93/// Runtime configuration for live verification.
94///
95/// Config-surface boundary: `VerifyConfig` is an **orthogonal subsystem**
96/// config, NOT part of the detection/bench config surface. Only
97/// `ScanConfig` + `ScannerConfig` (+ nested `MultilineConfig`) influence
98/// detection accuracy and are exercised by the benchmark. `VerifyConfig`
99/// governs live HTTP verification (network I/O, concurrency, proxy, TLS)
100/// and is constructed only on the `--verify` path
101/// (`cli/src/orchestrator/postprocess.rs`); the bench runs with
102/// `--no-verification` and never touches it. The sibling orthogonal configs
103/// are `OobConfig` (verifier/src/oob/session.rs, `--verify-oob` only),
104/// `HttpClientConfig` (sources/src/http.rs, per-source network I/O),
105/// `MegakernelSessionConfig` (scanner GPU slot geometry), and
106/// `AwsSigV4Config` (S3 request signing). Do NOT fold any of these into the
107/// canonical scan config: they are legitimately separate axes.
108pub struct VerifyConfig {
109 /// End-to-end timeout for one verification attempt.
110 pub timeout: Duration,
111 /// Maximum concurrent requests allowed per service.
112 pub max_concurrent_per_service: usize,
113 /// Maximum concurrent verification tasks overall.
114 pub max_concurrent_global: usize,
115 /// Upper bound for distinct in-flight deduplication keys.
116 pub max_inflight_keys: usize,
117 /// Whether to skip SSRF protection for private IP addresses.
118 pub danger_allow_private_ips: bool,
119 /// Whether to allow plaintext HTTP verification URLs. Default `false`:
120 /// production paths must use HTTPS so credentials are never sent in the
121 /// clear. Test fixtures (mock HTTP servers, in-memory listeners) opt in.
122 pub danger_allow_http: bool,
123 /// Explicit upstream proxy URL applied to every verifier request and OOB
124 /// poll. `None` falls back to the `KEYHOG_PROXY` env var; literal `"off"`
125 /// disables proxying entirely. Until this was added, `--proxy` only
126 /// reached the WebSource scanner - verification traffic and interactsh
127 /// polls bypassed it silently, surprising operators who pointed Burp at
128 /// keyhog and saw only half the traffic.
129 pub proxy: Option<String>,
130 /// Accept invalid / self-signed TLS certs for verifier + OOB traffic.
131 /// Off by default. Required when intercepting through a MITM proxy
132 /// (Burp, mitmproxy) that re-signs HTTPS with its own CA.
133 pub insecure_tls: bool,
134}
135
136impl Default for VerifyConfig {
137 fn default() -> Self {
138 Self {
139 timeout: Duration::from_secs(5),
140 max_concurrent_per_service: 5,
141 max_concurrent_global: 20,
142 max_inflight_keys: 10_000,
143 danger_allow_private_ips: false,
144 danger_allow_http: false,
145 proxy: None,
146 insecure_tls: false,
147 }
148 }
149}
150
151/// Resolve a proxy spec into an applied `reqwest::ClientBuilder`. Handles
152/// the literal `"off"` sentinel (disables proxying inc. env-var inheritance)
153/// and the `KEYHOG_PROXY` env-var fallback when no explicit value is set.
154/// Extracted so the verifier client and OOB client share one resolver and
155/// the same env-var contract.
156pub(crate) fn apply_proxy_config(
157 builder: reqwest::ClientBuilder,
158 explicit: Option<&str>,
159) -> Result<reqwest::ClientBuilder, String> {
160 let resolved = if let Some(p) = explicit {
161 Some(p.to_string())
162 } else {
163 std::env::var("KEYHOG_PROXY").ok().filter(|s| !s.is_empty())
164 };
165 match resolved.as_deref() {
166 Some("off") | Some("none") | Some("") => Ok(builder.no_proxy()),
167 Some(url) => {
168 let proxy = reqwest::Proxy::all(url)
169 .map_err(|e| format!("invalid verifier proxy URL {url:?}: {e}"))?;
170 Ok(builder.proxy(proxy))
171 }
172 None => Ok(builder),
173 }
174}
175
176/// Returns true iff the resolved proxy policy actually routes traffic
177/// through a proxy. Mirrors [`apply_proxy_config`]'s mode resolution:
178/// - explicit `Some(url)` or `KEYHOG_PROXY=<url>` → `true`
179/// - explicit `Some("off"|"none"|"")` or `KEYHOG_PROXY=off|none|""` → `false`
180/// - none of those set → checks reqwest's standard env-proxy vars
181/// (`HTTPS_PROXY`, `HTTP_PROXY`, `ALL_PROXY`). `NO_PROXY` alone does
182/// not make a proxy active. Empty strings count as unset, matching
183/// reqwest's own builder behavior.
184///
185/// Issue #2: pre-fix `proxy_in_use` was set from `KEYHOG_PROXY.is_some()`
186/// alone - `KEYHOG_PROXY=off` (documented "disable" sentinel) ALSO set
187/// the flag to true, which in turn disabled DNS pinning in
188/// `resolved_client_for_url()` even though no proxy was active. Operators
189/// using `KEYHOG_PROXY=off` for direct-connect verification lost DNS-
190/// rebinding protection.
191///
192/// Issue #3: pre-fix the check ignored reqwest's standard `HTTPS_PROXY`
193/// / `HTTP_PROXY` / `ALL_PROXY` env vars even though the shared client
194/// honored them via reqwest defaults. A user with `HTTPS_PROXY=http://burp:8080`
195/// got `proxy_in_use == false` → verifier rebuilt the pinned client
196/// from scratch and dropped the env-proxy. The pinned path then connected
197/// direct, bypassing the operator's interception/audit layer. Including
198/// the reqwest env vars closes that gap.
199pub fn proxy_is_active(explicit: Option<&str>) -> bool {
200 let resolved = if let Some(p) = explicit {
201 Some(p.to_string())
202 } else {
203 std::env::var("KEYHOG_PROXY").ok().filter(|s| !s.is_empty())
204 };
205 match resolved.as_deref() {
206 Some("off") | Some("none") | Some("") => return false,
207 Some(_) => return true,
208 None => {}
209 }
210 for var in [
211 "HTTPS_PROXY",
212 "https_proxy",
213 "HTTP_PROXY",
214 "http_proxy",
215 "ALL_PROXY",
216 "all_proxy",
217 ] {
218 if std::env::var(var)
219 .ok()
220 .is_some_and(|v| !v.trim().is_empty())
221 {
222 return true;
223 }
224 }
225 false
226}
227
228/// Convert a [`DedupedMatch`] into a [`VerifiedFinding`] with the given verification result.
229pub(crate) fn into_finding(
230 group: DedupedMatch,
231 verification: VerificationResult,
232 metadata: HashMap<String, String>,
233) -> VerifiedFinding {
234 VerifiedFinding {
235 detector_id: group.detector_id,
236 detector_name: group.detector_name,
237 service: group.service,
238 severity: group.severity,
239 credential_redacted: redact(&group.credential),
240 credential_hash: group.credential_hash,
241 location: group.primary_location,
242 verification,
243 metadata,
244 additional_locations: group.additional_locations,
245 confidence: group.confidence,
246 }
247}
248
249/// Hidden hooks for integration tests. Not covered by semver.
250#[doc(hidden)]
251pub mod testing {
252 pub use crate::bogon::ip_addr_is_bogon;
253 pub use crate::interpolate::sanitize_oob_value;
254 pub use crate::interpolate::sanitize_raw_value;
255 pub use crate::oob::redact_interactsh_error;
256 pub use crate::verify::format_sigv4_timestamps;
257}