Skip to main content

keyhog_verifier/
lib.rs

1//! Live credential verification: confirms whether detected secrets are actually
2//! active by making HTTP requests to the service's API endpoint as specified in
3//! each detector's `[detector.verify]` configuration.
4
5#![allow(clippy::too_many_arguments)]
6#![allow(clippy::type_complexity)]
7
8/// Local HTTP compatibility shim backed by reqwest..
9pub mod reqwest {
10    pub use reqwest::*;
11}
12
13pub mod bogon;
14/// Shared in-memory verification cache.
15pub mod cache;
16pub mod domain_allowlist;
17pub mod interpolate;
18pub mod oob;
19pub mod rate_limit;
20pub mod ssrf;
21mod verify;
22
23use std::collections::HashMap;
24use std::sync::Arc;
25use std::time::Duration;
26
27use dashmap::DashMap;
28use keyhog_core::{redact, DedupedMatch, DetectorSpec, VerificationResult, VerifiedFinding};
29
30// Re-export dedup types from core so existing consumers (`use keyhog_verifier::DedupedMatch`)
31// continue to work without source changes.
32use crate::reqwest::{Client, Error as ReqwestError};
33pub use keyhog_core::{dedup_matches, DedupScope};
34use thiserror::Error;
35use tokio::sync::{Notify, Semaphore};
36
37/// Errors returned while constructing or executing live verification.
38#[derive(Debug, Error)]
39pub enum VerifyError {
40    #[error(
41        "failed to send HTTP request: {0}. Fix: check network access, proxy settings, and the verification endpoint"
42    )]
43    Http(#[from] ReqwestError),
44    #[error(
45        "failed to build configured HTTP client: {0}. Fix: use a valid timeout and supported TLS/network configuration"
46    )]
47    ClientBuild(ReqwestError),
48    #[error(
49        "invalid verifier proxy configuration: {0}. Fix: use a valid http://, https://, or socks5:// URL, or set 'off' to disable proxying entirely"
50    )]
51    ProxyConfig(String),
52    #[error(
53        "failed to resolve verification field: {0}. Fix: use `match` or `companion.<name>` fields that exist in the detector spec"
54    )]
55    FieldResolution(String),
56}
57
58/// Live-verification engine with shared client, cache, and concurrency limits.
59pub struct VerificationEngine {
60    client: Client,
61    detectors: Arc<HashMap<Arc<str>, DetectorSpec>>,
62    /// Per-service concurrency limit to avoid hammering APIs.
63    service_semaphores: Arc<HashMap<Arc<str>, Arc<Semaphore>>>,
64    /// Global concurrency limit.
65    global_semaphore: Arc<Semaphore>,
66    timeout: Duration,
67    /// Response cache to avoid re-verifying the same credential.
68    cache: Arc<cache::VerificationCache>,
69    /// One in-flight request per (detector_id, credential). DashMap (per-shard
70    /// locking) replaces the previous parking_lot::Mutex<HashMap> which was an
71    /// async anti-pattern - see audits/legendary-2026-04-26.
72    pub(crate) inflight: Arc<DashMap<(Arc<str>, Arc<str>), Arc<Notify>>>,
73    pub(crate) max_inflight_keys: usize,
74    pub(crate) danger_allow_private_ips: bool,
75    pub(crate) danger_allow_http: bool,
76    /// Mirrors `VerifyConfig.insecure_tls`. The base `client` is built
77    /// with `danger_accept_invalid_certs(insecure_tls)`, but the
78    /// per-request DNS-pinning rebuild path needs the bool itself so
79    /// it can match the base client's posture. See
80    /// `verify/request.rs::resolved_client_for_url`.
81    pub(crate) insecure_tls: bool,
82    /// Snapshot of "was the base client built with a proxy" - propagated
83    /// to per-request rebuild paths so they skip the rebuild (which would
84    /// strip the proxy). See `verify/request.rs::resolved_client_for_url`.
85    pub(crate) proxy_in_use: bool,
86    /// Optional OOB session. When `Some`, detectors with `[detector.verify.oob]`
87    /// receive a per-finding callback URL and the engine waits for the
88    /// service to call back. When `None`, those detectors fall through to
89    /// HTTP-only success criteria. Set via [`VerificationEngine::enable_oob`].
90    pub(crate) oob_session: Option<Arc<oob::OobSession>>,
91}
92
93/// Runtime configuration for live verification.
94///
95/// Config-surface boundary: `VerifyConfig` is an **orthogonal subsystem**
96/// config, NOT part of the detection/bench config surface. Only
97/// `ScanConfig` + `ScannerConfig` (+ nested `MultilineConfig`) influence
98/// detection accuracy and are exercised by the benchmark. `VerifyConfig`
99/// governs live HTTP verification (network I/O, concurrency, proxy, TLS)
100/// and is constructed only on the `--verify` path
101/// (`cli/src/orchestrator/postprocess.rs`); the bench runs with
102/// `--no-verification` and never touches it. The sibling orthogonal configs
103/// are `OobConfig` (verifier/src/oob/session.rs, `--verify-oob` only),
104/// `HttpClientConfig` (sources/src/http.rs, per-source network I/O),
105/// `MegakernelSessionConfig` (scanner GPU slot geometry), and
106/// `AwsSigV4Config` (S3 request signing). Do NOT fold any of these into the
107/// canonical scan config: they are legitimately separate axes.
108pub struct VerifyConfig {
109    /// End-to-end timeout for one verification attempt.
110    pub timeout: Duration,
111    /// Maximum concurrent requests allowed per service.
112    pub max_concurrent_per_service: usize,
113    /// Maximum concurrent verification tasks overall.
114    pub max_concurrent_global: usize,
115    /// Upper bound for distinct in-flight deduplication keys.
116    pub max_inflight_keys: usize,
117    /// Whether to skip SSRF protection for private IP addresses.
118    pub danger_allow_private_ips: bool,
119    /// Whether to allow plaintext HTTP verification URLs. Default `false`:
120    /// production paths must use HTTPS so credentials are never sent in the
121    /// clear. Test fixtures (mock HTTP servers, in-memory listeners) opt in.
122    pub danger_allow_http: bool,
123    /// Explicit upstream proxy URL applied to every verifier request and OOB
124    /// poll. `None` falls back to the `KEYHOG_PROXY` env var; literal `"off"`
125    /// disables proxying entirely. Until this was added, `--proxy` only
126    /// reached the WebSource scanner - verification traffic and interactsh
127    /// polls bypassed it silently, surprising operators who pointed Burp at
128    /// keyhog and saw only half the traffic.
129    pub proxy: Option<String>,
130    /// Accept invalid / self-signed TLS certs for verifier + OOB traffic.
131    /// Off by default. Required when intercepting through a MITM proxy
132    /// (Burp, mitmproxy) that re-signs HTTPS with its own CA.
133    pub insecure_tls: bool,
134}
135
136impl Default for VerifyConfig {
137    fn default() -> Self {
138        Self {
139            timeout: Duration::from_secs(5),
140            max_concurrent_per_service: 5,
141            max_concurrent_global: 20,
142            max_inflight_keys: 10_000,
143            danger_allow_private_ips: false,
144            danger_allow_http: false,
145            proxy: None,
146            insecure_tls: false,
147        }
148    }
149}
150
151/// Resolve a proxy spec into an applied `reqwest::ClientBuilder`. Handles
152/// the literal `"off"` sentinel (disables proxying inc. env-var inheritance)
153/// and the `KEYHOG_PROXY` env-var fallback when no explicit value is set.
154/// Extracted so the verifier client and OOB client share one resolver and
155/// the same env-var contract.
156pub(crate) fn apply_proxy_config(
157    builder: reqwest::ClientBuilder,
158    explicit: Option<&str>,
159) -> Result<reqwest::ClientBuilder, String> {
160    let resolved = if let Some(p) = explicit {
161        Some(p.to_string())
162    } else {
163        std::env::var("KEYHOG_PROXY").ok().filter(|s| !s.is_empty())
164    };
165    match resolved.as_deref() {
166        Some("off") | Some("none") | Some("") => Ok(builder.no_proxy()),
167        Some(url) => {
168            let proxy = reqwest::Proxy::all(url)
169                .map_err(|e| format!("invalid verifier proxy URL {url:?}: {e}"))?;
170            Ok(builder.proxy(proxy))
171        }
172        None => Ok(builder),
173    }
174}
175
176/// Returns true iff the resolved proxy policy actually routes traffic
177/// through a proxy. Mirrors [`apply_proxy_config`]'s mode resolution:
178///   - explicit `Some(url)` or `KEYHOG_PROXY=<url>` → `true`
179///   - explicit `Some("off"|"none"|"")` or `KEYHOG_PROXY=off|none|""` → `false`
180///   - none of those set → checks reqwest's standard env-proxy vars
181///     (`HTTPS_PROXY`, `HTTP_PROXY`, `ALL_PROXY`). `NO_PROXY` alone does
182///     not make a proxy active. Empty strings count as unset, matching
183///     reqwest's own builder behavior.
184///
185/// Issue #2: pre-fix `proxy_in_use` was set from `KEYHOG_PROXY.is_some()`
186/// alone - `KEYHOG_PROXY=off` (documented "disable" sentinel) ALSO set
187/// the flag to true, which in turn disabled DNS pinning in
188/// `resolved_client_for_url()` even though no proxy was active. Operators
189/// using `KEYHOG_PROXY=off` for direct-connect verification lost DNS-
190/// rebinding protection.
191///
192/// Issue #3: pre-fix the check ignored reqwest's standard `HTTPS_PROXY`
193/// / `HTTP_PROXY` / `ALL_PROXY` env vars even though the shared client
194/// honored them via reqwest defaults. A user with `HTTPS_PROXY=http://burp:8080`
195/// got `proxy_in_use == false` → verifier rebuilt the pinned client
196/// from scratch and dropped the env-proxy. The pinned path then connected
197/// direct, bypassing the operator's interception/audit layer. Including
198/// the reqwest env vars closes that gap.
199pub fn proxy_is_active(explicit: Option<&str>) -> bool {
200    let resolved = if let Some(p) = explicit {
201        Some(p.to_string())
202    } else {
203        std::env::var("KEYHOG_PROXY").ok().filter(|s| !s.is_empty())
204    };
205    match resolved.as_deref() {
206        Some("off") | Some("none") | Some("") => return false,
207        Some(_) => return true,
208        None => {}
209    }
210    for var in [
211        "HTTPS_PROXY",
212        "https_proxy",
213        "HTTP_PROXY",
214        "http_proxy",
215        "ALL_PROXY",
216        "all_proxy",
217    ] {
218        if std::env::var(var)
219            .ok()
220            .is_some_and(|v| !v.trim().is_empty())
221        {
222            return true;
223        }
224    }
225    false
226}
227
228/// Convert a [`DedupedMatch`] into a [`VerifiedFinding`] with the given verification result.
229pub(crate) fn into_finding(
230    group: DedupedMatch,
231    verification: VerificationResult,
232    metadata: HashMap<String, String>,
233) -> VerifiedFinding {
234    VerifiedFinding {
235        detector_id: group.detector_id,
236        detector_name: group.detector_name,
237        service: group.service,
238        severity: group.severity,
239        credential_redacted: redact(&group.credential),
240        credential_hash: group.credential_hash,
241        location: group.primary_location,
242        verification,
243        metadata,
244        additional_locations: group.additional_locations,
245        confidence: group.confidence,
246    }
247}
248
249/// Hidden hooks for integration tests. Not covered by semver.
250#[doc(hidden)]
251pub mod testing {
252    pub use crate::bogon::ip_addr_is_bogon;
253    pub use crate::interpolate::sanitize_oob_value;
254    pub use crate::interpolate::sanitize_raw_value;
255    pub use crate::oob::redact_interactsh_error;
256    pub use crate::verify::format_sigv4_timestamps;
257}