keyhog-verifier 0.5.37

keyhog-verifier: parallel async credential verification framework
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
//! Low-level interactsh protocol client.
//!
//! A thin async wrapper around the projectdiscovery/interactsh-server register/
//! poll/deregister endpoints. Stateless aside from the RSA keypair, secret,
//! correlation id, and HTTP client - `OobSession` (in `session.rs`) layers
//! the per-finding subscription, polling loop, and notification fan-out on top.
//!
//! ## Crypto invariants
//!
//! - RSA-2048, OAEP padding, SHA-256 hash and MGF - interactsh-server speaks
//!   exactly this combination; `RSA_PKCS1_OAEP_PADDING` with SHA-256 in their
//!   Go code. Other parameters won't decrypt.
//! - AES-256-CFB with a 16-byte IV prepended to ciphertext. Each interaction
//!   carries an independent IV; the AES key is per-poll-batch.
//! - We never log credentials, public keys, or decrypted payloads. Errors
//!   carry stable strings - useful for support, opaque to leaks.

use std::time::Duration;

use base64::{engine::general_purpose::STANDARD as B64, Engine as _};
use rand::distributions::Alphanumeric;
use rand::{rngs::OsRng, Rng};
use reqwest::Client;
use rsa::pkcs8::{EncodePublicKey, LineEnding};
use rsa::{Oaep, RsaPrivateKey, RsaPublicKey};
use serde::{Deserialize, Serialize};
use sha2::Sha256;
use thiserror::Error;
use tracing::{debug, warn};

/// Stable bucket name for the global rate limiter. Every OOB call across
/// every detector shares this bucket so the aggregate request rate to the
/// upstream collector never exceeds the configured `--verify-rate`. Using
/// the literal string `"oob.interactsh"` (not the server URL) means the
/// budget covers all configured collectors collectively - the limit is
/// about our own machine not blasting traffic, not about per-host fairness.
const OOB_SERVICE: &str = "oob.interactsh";

/// All errors that can arise from the OOB client. `Transient` errors mean the
/// caller should retry (network blip, rate-limit); everything else is final.
#[derive(Debug, Error)]
pub enum InteractshError {
    #[error("interactsh keypair generation failed: {0}")]
    KeyGen(String),
    #[error("interactsh public-key encoding failed: {0}")]
    KeyEncode(String),
    #[error("interactsh register failed (HTTP {status}): {body}")]
    Register { status: u16, body: String },
    #[error("interactsh poll failed (HTTP {status}): {body}")]
    Poll { status: u16, body: String },
    #[error("interactsh response shape unexpected: {0}")]
    BadResponse(String),
    #[error("interactsh AES key unwrap failed: {0}")]
    AesUnwrap(String),
    #[error("interactsh interaction decrypt failed: {0}")]
    Decrypt(String),
    #[error("interactsh transport error: {0}")]
    Transport(#[from] reqwest::Error),
    #[error("interactsh request timed out after {0:?}")]
    Timeout(Duration),
}

/// Protocol category of a received interaction.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum InteractionProtocol {
    Dns,
    Http,
    Smtp,
    Other,
}

impl InteractionProtocol {
    pub(super) fn parse(s: &str) -> Self {
        match s.to_ascii_lowercase().as_str() {
            "dns" => Self::Dns,
            "http" => Self::Http,
            "smtp" | "smtp-mail" => Self::Smtp,
            _ => Self::Other,
        }
    }
}

/// One decrypted interaction returned by the collector.
#[derive(Debug, Clone)]
pub struct Interaction {
    /// Full 33-char unique id (correlation-id || 13-char suffix). This is
    /// what we match against per-finding URLs we minted.
    pub unique_id: String,
    pub protocol: InteractionProtocol,
    pub remote_address: String,
    pub timestamp: String,
    /// Raw protocol payload (HTTP request line + headers, DNS query, etc.).
    /// Sized - interactsh truncates server-side, but we cap to 16 KiB here as
    /// a defense-in-depth budget against memory abuse from a hostile server.
    pub raw_payload: String,
}

/// One interactsh registration. Cheap to clone (Arc-friendly fields only on
/// caller's side; here we hold owned values because the session pins this
/// for the lifetime of the engine).
pub struct InteractshClient {
    http: Client,
    server: String,
    correlation_id: String,
    secret_key: String,
    private_key: RsaPrivateKey,
    /// Length of the per-URL suffix (interactsh uses 13 to bring total ID to
    /// 33 chars). Exposed for tests; production always uses 13.
    suffix_len: usize,
}

impl InteractshClient {
    /// Test-only constructor without network registration. Returns
    /// `Err` if the RSA keygen RNG fails - which never happens on a
    /// healthy platform, but propagating the error keeps this constructor
    /// off the no-panic-in-production gate and matches the rest of the
    /// `InteractshError` surface. Test callers wrap with `.unwrap()` at
    /// the test boundary.
    pub fn for_test(server: &str) -> Result<Self, InteractshError> {
        let private_key = RsaPrivateKey::new(&mut OsRng, 1024)
            .map_err(|e| InteractshError::KeyGen(e.to_string()))?;
        Ok(Self {
            http: Client::new(),
            server: normalize_server(server),
            correlation_id: "abcdefghijklmnopqrst".to_string(),
            secret_key: "test-secret".to_string(),
            private_key,
            suffix_len: 13,
        })
    }
}

/// JSON shapes from interactsh-server. Field names match the upstream Go
/// definitions (`pkg/server/types.go`). `serde(default)` keeps us forward-
/// compatible with future fields.
#[derive(Serialize)]
struct RegisterRequest<'a> {
    #[serde(rename = "public-key")]
    public_key: &'a str,
    #[serde(rename = "secret-key")]
    secret_key: &'a str,
    #[serde(rename = "correlation-id")]
    correlation_id: &'a str,
}

#[derive(Deserialize, Default)]
#[serde(default)]
struct PollResponse {
    /// Each entry is base64( AES-256-CFB( IV[16] || ciphertext ) ).
    data: Vec<String>,
    /// Auxiliary metadata; ignored.
    #[allow(dead_code)]
    extra: Vec<String>,
    /// Base64( RSA-OAEP-SHA256( 32-byte AES key ) ). Server omits when there
    /// are no interactions; in that case `data` is also empty.
    aes_key: Option<String>,
}

/// Decrypted interaction shape. `serde(default)` because interactsh-server
/// sometimes ships partial events (failed protocol parse, etc.) and we'd
/// rather degrade gracefully than 500.
/// Hard cap on the body of a `/poll` response. Protects the process from a
/// hostile or misbehaving collector returning a multi-gigabyte JSON that
/// would force `serde_json::from_slice` to allocate the whole thing
/// in-memory before we can validate it. 4 MiB comfortably fits any
/// reasonable poll batch - see the rationale at the call site.
const MAX_POLL_BODY_BYTES: usize = 4 * 1024 * 1024;

/// Cap on error/diagnostic bodies. We only display the first 256 chars in
/// the error message anyway, but the cap prevents a server returning a
/// 500 with a 1 GiB body from spiking memory.
const ERROR_BODY_CAP: usize = 64 * 1024;

/// Stream a response body into a Vec under a hard byte cap. Returns
/// `BadResponse` if the cap is exceeded - abort the read rather than
/// trust the server's framing.
async fn read_capped_bytes(
    resp: reqwest::Response,
    cap: usize,
) -> Result<Vec<u8>, InteractshError> {
    use futures_util::StreamExt;
    let mut stream = resp.bytes_stream();
    let mut buf: Vec<u8> = Vec::new();
    while let Some(chunk) = stream.next().await {
        let chunk = chunk.map_err(InteractshError::Transport)?;
        if buf.len().saturating_add(chunk.len()) > cap {
            return Err(InteractshError::BadResponse(format!(
                "response body exceeds {cap}-byte cap"
            )));
        }
        buf.extend_from_slice(&chunk);
    }
    Ok(buf)
}

/// Like `read_capped_bytes` but for diagnostic error messages - never
/// returns `Err`; on a stream failure or cap breach it returns whatever
/// was buffered so the error log can still surface something.
async fn read_capped_text(resp: reqwest::Response, cap: usize) -> String {
    use futures_util::StreamExt;
    let mut stream = resp.bytes_stream();
    let mut buf: Vec<u8> = Vec::new();
    while let Some(chunk) = stream.next().await {
        let Ok(chunk) = chunk else { break };
        if buf.len().saturating_add(chunk.len()) > cap {
            break;
        }
        buf.extend_from_slice(&chunk);
    }
    String::from_utf8_lossy(&buf).into_owned()
}

impl InteractshClient {
    /// Build, generate keys, and register with the collector. The returned
    /// client is ready to mint URLs and be polled.
    pub async fn register(http: Client, server: &str) -> Result<Self, InteractshError> {
        // RSA-2048 keygen happens on a blocking thread - it's CPU-bound for
        // ~100ms and would otherwise stall the runtime.
        let private_key = tokio::task::spawn_blocking(|| {
            RsaPrivateKey::new(&mut OsRng, 2048).map_err(|e| InteractshError::KeyGen(e.to_string()))
        })
        .await
        .map_err(|e| InteractshError::KeyGen(format!("join error: {e}")))??;

        let public_key = RsaPublicKey::from(&private_key);
        let pem = public_key
            .to_public_key_pem(LineEnding::LF)
            .map_err(|e| InteractshError::KeyEncode(e.to_string()))?;
        let public_key_b64 = B64.encode(pem.as_bytes());

        // Correlation id is 20 lowercase alphanumerics - interactsh-server
        // matches incoming subdomains by this prefix, so the ID space must
        // be wide enough that collisions are statistically impossible across
        // every concurrent scanner sharing the collector. 36^20 ≈ 1.3e31.
        let correlation_id: String = OsRng
            .sample_iter(&Alphanumeric)
            .take(20)
            .map(|b| (b as char).to_ascii_lowercase())
            .collect();
        let secret_key = uuid::Uuid::new_v4().to_string();

        let server = normalize_server(server);

        let body = RegisterRequest {
            public_key: &public_key_b64,
            secret_key: &secret_key,
            correlation_id: &correlation_id,
        };
        // SECURITY/POLITENESS: kimi verifier audit LOW finding. Every OOB
        // request - register, poll, deregister - shares the same upstream
        // interactsh collector. Without rate limiting, a scan that fires
        // 200 detector-verify subscriptions in parallel would hammer the
        // collector with 200 register calls in flight at once, get IP-banned,
        // and silently lose all OOB observability for the rest of the run.
        // We bucket every OOB call under a single service id so the global
        // limiter (default 5 rps) governs the aggregate.
        crate::rate_limit::get_rate_limiter()
            .wait(OOB_SERVICE)
            .await;
        let resp = http
            .post(format!("{server}/register"))
            .json(&body)
            .send()
            .await?;
        let status = resp.status();
        if !status.is_success() {
            let body = read_capped_text(resp, ERROR_BODY_CAP).await;
            return Err(InteractshError::Register {
                status: status.as_u16(),
                body: body.chars().take(256).collect(),
            });
        }
        // Drain (and discard) the register success body under a cap. Some
        // interactsh deployments echo registration metadata; we don't need
        // it but must not let the connection sit half-read indefinitely.
        let _ = read_capped_bytes(resp, ERROR_BODY_CAP).await;
        debug!(target: "keyhog::oob", correlation_id = %correlation_id, server = %server, "registered with interactsh collector");

        Ok(Self {
            http,
            server,
            correlation_id,
            secret_key,
            private_key,
            suffix_len: 13,
        })
    }

    /// Mint a fresh callback URL bound to this session. The full 33-char
    /// subdomain is returned (unique-id) plus the host the service should
    /// hit. Caller is responsible for embedding it where the credential's
    /// API will follow.
    pub fn mint_url(&self) -> MintedUrl {
        let suffix: String = OsRng
            .sample_iter(&Alphanumeric)
            .take(self.suffix_len)
            .map(|b| (b as char).to_ascii_lowercase())
            .collect();
        let unique_id = format!("{}{}", self.correlation_id, suffix);
        let host = format!("{}.{}", unique_id, self.server_host());
        let url = format!("https://{host}");
        MintedUrl {
            unique_id,
            host,
            url,
        }
    }

    /// Poll once. Returns every interaction the collector has buffered for
    /// this correlation id since the last poll.
    pub async fn poll(&self) -> Result<Vec<Interaction>, InteractshError> {
        // See `register` for the rate-limiter rationale - same bucket so all
        // OOB traffic to the collector aggregates under one budget.
        crate::rate_limit::get_rate_limiter()
            .wait(OOB_SERVICE)
            .await;
        let resp = self
            .http
            .get(format!("{}/poll", self.server))
            .query(&[("id", &self.correlation_id), ("secret", &self.secret_key)])
            .send()
            .await?;
        let status = resp.status();
        if !status.is_success() {
            let body = read_capped_text(resp, ERROR_BODY_CAP).await;
            return Err(InteractshError::Poll {
                status: status.as_u16(),
                body: body.chars().take(256).collect(),
            });
        }
        // Bound the response body before deserialization. A malicious or
        // misbehaving collector could otherwise blow process memory by
        // returning a multi-gigabyte JSON. 4 MiB comfortably fits even a
        // dense poll batch (≤100 interactions × ~16 KiB raw_payload each
        // base64-expanded ≈ 2 MiB) with headroom.
        let body = read_capped_bytes(resp, MAX_POLL_BODY_BYTES).await?;
        let parsed: PollResponse = serde_json::from_slice(&body)
            .map_err(|e| InteractshError::BadResponse(e.to_string()))?;
        if parsed.data.is_empty() {
            return Ok(Vec::new());
        }
        let aes_key_b64 = parsed.aes_key.ok_or_else(|| {
            InteractshError::BadResponse("data present but aes_key missing".into())
        })?;
        let aes_key = self.unwrap_aes_key(&aes_key_b64)?;
        if aes_key.len() != 32 {
            return Err(InteractshError::AesUnwrap(format!(
                "expected 32-byte AES-256 key, got {}",
                aes_key.len()
            )));
        }

        let mut out = Vec::with_capacity(parsed.data.len());
        for entry in parsed.data {
            match super::decrypt::decrypt_entry(&aes_key, &entry) {
                Ok(Some(interaction)) => out.push(interaction),
                Ok(None) => {} // unparseable JSON - skip, don't fail the batch
                Err(e) => {
                    warn!(target: "keyhog::oob", error = %e, "interactsh entry decrypt failed; skipping")
                }
            }
        }
        Ok(out)
    }

    /// Tear down the registration. Idempotent on the server side; a failure
    /// to deregister is non-fatal - the server prunes inactive sessions
    /// after its retention window.
    pub async fn deregister(&self) -> Result<(), InteractshError> {
        #[derive(Serialize)]
        struct DeregisterRequest<'a> {
            #[serde(rename = "correlation-id")]
            correlation_id: &'a str,
            #[serde(rename = "secret-key")]
            secret_key: &'a str,
        }
        // See `register` for the rate-limiter rationale.
        crate::rate_limit::get_rate_limiter()
            .wait(OOB_SERVICE)
            .await;
        let _ = self
            .http
            .post(format!("{}/deregister", self.server))
            .json(&DeregisterRequest {
                correlation_id: &self.correlation_id,
                secret_key: &self.secret_key,
            })
            .send()
            .await?;
        Ok(())
    }

    pub fn correlation_id(&self) -> &str {
        &self.correlation_id
    }

    /// `oast.fun` from `https://oast.fun/`.
    fn server_host(&self) -> &str {
        // strip scheme; we normalized at register time so no path component.
        self.server
            .split_once("://")
            .map(|(_, rest)| rest)
            .unwrap_or(&self.server)
            .trim_end_matches('/')
    }

    fn unwrap_aes_key(&self, b64: &str) -> Result<Vec<u8>, InteractshError> {
        let wrapped = B64
            .decode(b64.as_bytes())
            .map_err(|e| InteractshError::AesUnwrap(format!("base64: {e}")))?;
        let padding = Oaep::new::<Sha256>();
        self.private_key
            .decrypt(padding, &wrapped)
            .map_err(|e| InteractshError::AesUnwrap(format!("rsa-oaep: {e}")))
    }
}

/// One per-finding callback URL, returned from `InteractshClient::mint_url`.
#[derive(Debug, Clone)]
pub struct MintedUrl {
    /// Full 33-char id; the value the service will reflect in DNS/HTTP host.
    pub unique_id: String,
    /// `<unique_id>.<server-host>` - bare host without scheme.
    pub host: String,
    /// `https://<host>` - convenience for HTTP-shaped probes.
    pub url: String,
}

/// Accept `oast.fun`, `oast.fun/`, `https://oast.fun`, `https://oast.fun/`.
/// Always return `https://<host>` with no trailing slash. HTTP-only is
/// rejected because the AES key flowing back must travel TLS-wrapped.
fn normalize_server(s: &str) -> String {
    let s = s.trim().trim_end_matches('/');
    if let Some(rest) = s.strip_prefix("http://") {
        // Force-upgrade. We never speak plaintext to a collector - the
        // wrapped AES key would leak otherwise.
        format!("https://{rest}")
    } else if s.starts_with("https://") {
        s.to_string()
    } else {
        format!("https://{s}")
    }
}