Skip to main content

keyhog_verifier/
verify.rs

1//! Verification execution logic.
2//!
3//! Verification is explicitly opt-in via the `--verify` CLI flag.
4//! Security invariants for this module:
5//! - Credentials are never stored permanently. They are only used in-memory for the current run.
6//! - HTTPS only. TLS certificate validation stays enabled for every request.
7//! - Private IPs and private DNS resolutions are blocked to reduce SSRF risk.
8//! - Redirects are not followed.
9//! - Response bodies are capped at 1 MB.
10
11use std::collections::HashMap;
12use std::net::{IpAddr, SocketAddr};
13use std::sync::Arc;
14use std::time::Duration;
15
16use dashmap::DashMap;
17use keyhog_core::{
18    AuthSpec, DetectorSpec, HttpMethod, MetadataSpec, SuccessSpec, VerificationResult,
19    VerifiedFinding,
20};
21use reqwest::Client;
22use tokio::sync::Notify;
23use tokio::task::JoinSet;
24
25use crate::interpolate::{interpolate, resolve_field};
26use crate::ssrf::{is_private_ip, is_private_ipv4, is_private_url, parse_numeric_ipv4_host};
27use crate::{DedupedMatch, VerificationEngine, VerifyConfig, VerifyError, cache};
28
29#[cfg(test)]
30use crate::dedup_matches;
31#[cfg(test)]
32use crate::ssrf::parse_url_host;
33#[cfg(test)]
34use keyhog_core::{MatchLocation, RawMatch};
35use tokio::sync::Semaphore;
36
37const DEFAULT_SERVICE_CONCURRENCY: usize = 5;
38const MAX_VERIFY_ATTEMPTS: usize = 3;
39const RETRY_DELAY_MS: u64 = 500;
40/// Maximum response body size to read during verification (1 MB).
41/// Prevents OOM from malicious endpoints returning unbounded data.
42const MAX_RESPONSE_BODY_BYTES: usize = 1024 * 1024;
43const BODY_ERROR_MESSAGE: &str = "body read failed";
44const BODY_TOO_LARGE_ERROR: &str = "response body exceeds 1MB limit";
45const GENERIC_REQUEST_ERROR: &str = "request failed";
46const CONNECTION_FAILED_ERROR: &str = "connection failed";
47const TOO_MANY_REDIRECTS_ERROR: &str = "too many redirects";
48const TIMEOUT_ERROR: &str = "timeout";
49const PRIVATE_URL_ERROR: &str = "blocked: private URL";
50const HTTPS_ONLY_ERROR: &str = "blocked: HTTPS only";
51const MAX_RETRIES_EXCEEDED_ERROR: &str = "max retries exceeded";
52const AWS_VALID_ACCESS_KEY_PREFIXES: &[&str] = &["AKIA", "ASIA", "AROA", "AIDA", "AGPA"];
53const AWS_ACCESS_KEY_LEN: usize = 20;
54const AWS_MIN_SECRET_KEY_LEN: usize = 40;
55
56impl VerificationEngine {
57    /// Create a verifier with shared HTTP client, cache, and concurrency controls.
58    ///
59    /// # Examples
60    ///
61    /// ```rust
62    /// use keyhog_core::{DetectorSpec, PatternSpec, Severity};
63    /// use keyhog_verifier::{VerificationEngine, VerifyConfig};
64    ///
65    /// let engine = VerificationEngine::new(
66    ///     &[DetectorSpec {
67    ///         id: "demo-token".into(),
68    ///         name: "Demo Token".into(),
69    ///         service: "demo".into(),
70    ///         severity: Severity::High,
71    ///         patterns: vec![PatternSpec {
72    ///             regex: "demo_[A-Z0-9]{8}".into(),
73    ///             description: None,
74    ///             group: None,
75    ///         }],
76    ///         companion: None,
77    ///         verify: None,
78    ///         keywords: vec!["demo_".into()],
79    ///     }],
80    ///     VerifyConfig::default(),
81    /// )
82    /// .unwrap();
83    ///
84    /// let _ = engine;
85    /// ```
86    pub fn new(detectors: &[DetectorSpec], config: VerifyConfig) -> Result<Self, VerifyError> {
87        let client = Client::builder()
88            .timeout(config.timeout)
89            // SAFETY: verification traffic must keep certificate validation on.
90            .danger_accept_invalid_certs(false)
91            .redirect(reqwest::redirect::Policy::none())
92            .build()
93            .map_err(VerifyError::ClientBuild)?;
94
95        let detector_map: HashMap<String, DetectorSpec> = detectors
96            .iter()
97            .cloned()
98            .map(|d| (d.id.clone(), d))
99            .collect();
100
101        let mut service_semaphores = HashMap::new();
102        for d in detectors {
103            service_semaphores
104                .entry(d.service.clone())
105                .or_insert_with(|| Arc::new(Semaphore::new(config.max_concurrent_per_service)));
106        }
107
108        Ok(Self {
109            client,
110            detectors: detector_map,
111            service_semaphores,
112            global_semaphore: Arc::new(Semaphore::new(config.max_concurrent_global)),
113            timeout: config.timeout,
114            cache: Arc::new(cache::VerificationCache::default_ttl()),
115            inflight: Arc::new(DashMap::new()),
116            max_inflight_keys: config.max_inflight_keys,
117        })
118    }
119
120    /// Verify a batch of deduplicated raw matches in parallel.
121    /// Returns one `VerifiedFinding` per unique (detector_id, credential).
122    ///
123    /// # Examples
124    ///
125    /// ```rust,no_run
126    /// use keyhog_core::{DetectorSpec, MatchLocation, PatternSpec, RawMatch, Severity};
127    /// use keyhog_verifier::{VerificationEngine, VerifyConfig, dedup_matches};
128    ///
129    /// # async fn demo() {
130    /// let detector = DetectorSpec {
131    ///     id: "demo-token".into(),
132    ///     name: "Demo Token".into(),
133    ///     service: "demo".into(),
134    ///     severity: Severity::High,
135    ///     patterns: vec![PatternSpec {
136    ///         regex: "demo_[A-Z0-9]{8}".into(),
137    ///         description: None,
138    ///         group: None,
139    ///     }],
140    ///     companion: None,
141    ///     verify: None,
142    ///     keywords: vec!["demo_".into()],
143    /// };
144    /// let engine = VerificationEngine::new(&[detector], VerifyConfig::default()).unwrap();
145    /// let findings = engine
146    ///     .verify_all(dedup_matches(vec![RawMatch {
147    ///         detector_id: "demo-token".into(),
148    ///         detector_name: "Demo Token".into(),
149    ///         service: "demo".into(),
150    ///         severity: Severity::High,
151    ///         credential: "demo_ABC12345".into(),
152    ///         companion: None,
153    ///         location: MatchLocation {
154    ///             source: "filesystem".into(),
155    ///             file_path: Some(".env".into()),
156    ///             line: Some(1),
157    ///             offset: 0,
158    ///             commit: None,
159    ///             author: None,
160    ///             date: None,
161    ///         },
162    ///         entropy: None,
163    ///         confidence: None,
164    ///     }]))
165    ///     .await;
166    /// assert_eq!(findings.len(), 1);
167    /// # }
168    /// ```
169    pub async fn verify_all(&self, groups: Vec<DedupedMatch>) -> Vec<VerifiedFinding> {
170        let max_active = self.global_semaphore.available_permits().max(1);
171        let total = groups.len();
172        let shared = VerifyTaskShared {
173            global_semaphore: self.global_semaphore.clone(),
174            service_semaphores: self.service_semaphores.clone(),
175            client: self.client.clone(),
176            detectors: self.detectors.clone(),
177            timeout: self.timeout,
178            cache: self.cache.clone(),
179            inflight: self.inflight.clone(),
180            max_inflight_keys: self.max_inflight_keys,
181        };
182        let mut pending = groups.into_iter();
183        let mut join_set = JoinSet::new();
184
185        while join_set.len() < max_active {
186            let Some(group) = pending.next() else {
187                break;
188            };
189            join_set.spawn(verify_group_task(shared.clone(), group));
190        }
191
192        let mut findings = Vec::with_capacity(total);
193        while let Some(result) = join_set.join_next().await {
194            match result {
195                Ok(finding) => findings.push(finding),
196                Err(e) => tracing::error!("verification task panicked: {}", e),
197            }
198
199            if let Some(group) = pending.next() {
200                join_set.spawn(verify_group_task(shared.clone(), group));
201            }
202        }
203        findings
204    }
205}
206
207#[derive(Clone)]
208struct VerifyTaskShared {
209    global_semaphore: Arc<Semaphore>,
210    service_semaphores: HashMap<String, Arc<Semaphore>>,
211    client: Client,
212    detectors: HashMap<String, DetectorSpec>,
213    timeout: Duration,
214    cache: Arc<cache::VerificationCache>,
215    inflight: Arc<DashMap<(String, String), Arc<Notify>>>,
216    max_inflight_keys: usize,
217}
218
219async fn verify_group_task(shared: VerifyTaskShared, group: DedupedMatch) -> VerifiedFinding {
220    let global = shared.global_semaphore;
221    let service_sem = shared
222        .service_semaphores
223        .get(&group.service)
224        .cloned()
225        .unwrap_or_else(|| Arc::new(Semaphore::new(DEFAULT_SERVICE_CONCURRENCY)));
226    let client = shared.client;
227    let detector = shared.detectors.get(&group.detector_id).cloned();
228    let timeout = shared.timeout;
229
230    let cache = shared.cache;
231    let inflight = shared.inflight;
232    let max_inflight_keys = shared.max_inflight_keys;
233
234    let Ok(_global_permit) = global.acquire().await else {
235        return group.into_finding(
236            VerificationResult::Error("semaphore closed".into()),
237            HashMap::new(),
238        );
239    };
240    let Ok(_service_permit) = service_sem.acquire().await else {
241        return group.into_finding(
242            VerificationResult::Error("service semaphore closed".into()),
243            HashMap::new(),
244        );
245    };
246
247    if let Some((cached_result, cached_meta)) = cache.get(&group.credential, &group.detector_id) {
248        return group.into_finding(cached_result, cached_meta);
249    }
250
251    let inflight_guard = if inflight.len() >= max_inflight_keys {
252        None
253    } else {
254        let inflight_key = (group.detector_id.clone(), group.credential.clone());
255        loop {
256            if let Some((cached_result, cached_meta)) =
257                cache.get(&group.credential, &group.detector_id)
258            {
259                return group.into_finding(cached_result, cached_meta);
260            }
261
262            match inflight.entry(inflight_key.clone()) {
263                dashmap::mapref::entry::Entry::Occupied(entry) => {
264                    let notify = entry.get().clone();
265                    // SAFETY: lock ordering is one-way: task permits
266                    // (global, then service) are acquired before touching
267                    // inflight, and the DashMap entry guard is dropped before
268                    // await.
269                    // CRITICAL: We MUST create the `Notified` future before dropping `entry`.
270                    // This registers our interest synchronously. If we drop `entry` first,
271                    // the verifying task could remove it and call `notify_waiters()` before
272                    // we create the future, causing a permanent hang (lost wake-up).
273                    let fut = notify.notified();
274                    drop(entry);
275                    fut.await;
276                }
277                dashmap::mapref::entry::Entry::Vacant(entry) => {
278                    let notify = Arc::new(Notify::new());
279                    entry.insert(notify.clone());
280                    break Some(InflightGuard {
281                        key: inflight_key,
282                        inflight: inflight.clone(),
283                        notify,
284                    });
285                }
286            }
287        }
288    };
289    let _inflight_guard = inflight_guard;
290
291    let (verification, metadata) = match &detector {
292        Some(det) => match &det.verify {
293            Some(verify_spec) => {
294                verify_with_retry(
295                    &client,
296                    verify_spec,
297                    &group.credential,
298                    group.companion.as_deref(),
299                    timeout,
300                )
301                .await
302            }
303            None => (VerificationResult::Unverifiable, HashMap::new()),
304        },
305        None => (VerificationResult::Unverifiable, HashMap::new()),
306    };
307
308    cache.put(
309        &group.credential,
310        &group.detector_id,
311        verification.clone(),
312        metadata.clone(),
313    );
314
315    group.into_finding(verification, metadata)
316}
317
318struct InflightGuard {
319    key: (String, String),
320    inflight: Arc<DashMap<(String, String), Arc<Notify>>>,
321    notify: Arc<Notify>,
322}
323
324impl Drop for InflightGuard {
325    fn drop(&mut self) {
326        // SAFETY: cleanup follows the same ordering guarantee as verify_all:
327        // remove the inflight marker without holding any other map guard, then
328        // notify waiters. There is no second lock acquired while this guard is
329        // dropped, so the owner cannot deadlock with waiting tasks.
330        self.inflight.remove(&self.key);
331        self.notify.notify_waiters();
332    }
333}
334
335/// Perform verification with retry logic for transient failures.
336async fn verify_with_retry(
337    client: &Client,
338    spec: &keyhog_core::VerifySpec,
339    credential: &str,
340    companion: Option<&str>,
341    timeout: Duration,
342) -> (VerificationResult, HashMap<String, String>) {
343    for attempt in 0..MAX_VERIFY_ATTEMPTS {
344        let VerificationAttempt {
345            result,
346            metadata,
347            transient,
348        } = verify_credential(client, spec, credential, companion, timeout).await;
349        if transient && attempt + 1 < MAX_VERIFY_ATTEMPTS {
350            let delay_ms = RETRY_DELAY_MS * (attempt as u64 + 1);
351            tokio::time::sleep(Duration::from_millis(delay_ms)).await;
352            continue;
353        }
354        return (result, metadata);
355    }
356    (
357        VerificationResult::Error(MAX_RETRIES_EXCEEDED_ERROR.into()),
358        HashMap::new(),
359    )
360}
361
362struct VerificationAttempt {
363    result: VerificationResult,
364    metadata: HashMap<String, String>,
365    transient: bool,
366}
367
368#[derive(Debug)]
369struct ResolvedTarget {
370    client: Client,
371    url: reqwest::Url,
372}
373
374/// Perform one verification HTTP call for a credential.
375async fn verify_credential(
376    client: &Client,
377    spec: &keyhog_core::VerifySpec,
378    credential: &str,
379    companion: Option<&str>,
380    timeout: Duration,
381) -> VerificationAttempt {
382    let timeout = verification_timeout(spec, timeout);
383    let raw_url = interpolate(&spec.url, credential, companion);
384    let resolved_target = match resolved_client_for_url(client, &raw_url, timeout).await {
385        Ok(resolved_target) => resolved_target,
386        Err(result) => {
387            return VerificationAttempt {
388                result,
389                metadata: HashMap::new(),
390                transient: false,
391            };
392        }
393    };
394
395    // SSRF protection: block verification against private/internal IPs.
396    if is_private_url(resolved_target.url.as_str()) {
397        return VerificationAttempt {
398            result: VerificationResult::Error(PRIVATE_URL_ERROR.into()),
399            metadata: HashMap::new(),
400            transient: false,
401        };
402    }
403
404    let base_request = build_request(
405        &resolved_target.client,
406        spec,
407        resolved_target.url.clone(),
408        credential,
409        companion,
410        timeout,
411    )
412    .await;
413    let mut request = match base_request {
414        RequestBuildResult::Ready(request) => request,
415        RequestBuildResult::Final(result, metadata) => {
416            return VerificationAttempt {
417                result,
418                metadata,
419                transient: false,
420            };
421        }
422    };
423
424    // Apply additional headers.
425    for header in &spec.headers {
426        let value = interpolate(&header.value, credential, companion);
427        request = request.header(&header.name, &value);
428    }
429
430    // Apply body.
431    if let Some(body_template) = &spec.body {
432        let body = interpolate(body_template, credential, companion);
433        request = request.body(body);
434    }
435
436    // Execute.
437    let response = match execute_request(request).await {
438        Ok(resp) => resp,
439        Err(error) => {
440            return VerificationAttempt {
441                result: error.result,
442                metadata: HashMap::new(),
443                transient: error.transient,
444            };
445        }
446    };
447
448    let status = response.status().as_u16();
449    let body = match read_response_body(response).await {
450        Ok(body) => body,
451        Err(error) => {
452            return VerificationAttempt {
453                result: error.result,
454                metadata: HashMap::new(),
455                transient: error.transient,
456            };
457        }
458    };
459
460    // Evaluate success condition.
461    let is_live = evaluate_success(&spec.success, status, &body);
462
463    let is_actually_live = is_live && !body_indicates_error(&body);
464
465    let metadata = extract_metadata(&spec.metadata, &body);
466
467    let verification_result = if is_actually_live {
468        VerificationResult::Live
469    } else if status == 429 {
470        VerificationResult::RateLimited
471    } else {
472        VerificationResult::Dead
473    };
474
475    VerificationAttempt {
476        result: verification_result,
477        metadata,
478        transient: false,
479    }
480}
481
482fn verification_timeout(spec: &keyhog_core::VerifySpec, default_timeout: Duration) -> Duration {
483    spec.timeout_ms
484        .map(Duration::from_millis)
485        .unwrap_or(default_timeout)
486}
487
488async fn resolved_client_for_url(
489    client: &Client,
490    url: &str,
491    timeout: Duration,
492) -> Result<ResolvedTarget, VerificationResult> {
493    if is_private_url(url) {
494        return Err(VerificationResult::Error(PRIVATE_URL_ERROR.into()));
495    }
496    let parsed = reqwest::Url::parse(url)
497        .map_err(|_| VerificationResult::Error(GENERIC_REQUEST_ERROR.into()))?;
498    if parsed.scheme() != "https" {
499        return Err(VerificationResult::Error(HTTPS_ONLY_ERROR.into()));
500    }
501    let Some(host) = parsed.host_str() else {
502        return Err(VerificationResult::Error(GENERIC_REQUEST_ERROR.into()));
503    };
504    if let Ok(ip) = host.parse::<IpAddr>() {
505        if is_private_ip(ip) {
506            return Err(VerificationResult::Error(PRIVATE_URL_ERROR.into()));
507        }
508        return Ok(ResolvedTarget {
509            client: client.clone(),
510            url: parsed,
511        });
512    }
513    if let Some(ip) = parse_numeric_ipv4_host(host) {
514        if is_private_ipv4(ip) {
515            return Err(VerificationResult::Error(PRIVATE_URL_ERROR.into()));
516        }
517        return Ok(ResolvedTarget {
518            client: client.clone(),
519            url: parsed,
520        });
521    }
522
523    let port = parsed.port_or_known_default().unwrap_or(443);
524    let addrs = tokio::time::timeout(timeout, tokio::net::lookup_host((host, port)))
525        .await
526        .map_err(|_| VerificationResult::Error(TIMEOUT_ERROR.into()))?
527        .map_err(|_| VerificationResult::Error(CONNECTION_FAILED_ERROR.into()))?
528        .collect::<Vec<SocketAddr>>();
529    if addrs.is_empty() || addrs.iter().any(|addr| is_private_ip(addr.ip())) {
530        return Err(VerificationResult::Error(PRIVATE_URL_ERROR.into()));
531    }
532    let pinned_addrs = addrs
533        .into_iter()
534        .map(|addr| SocketAddr::new(addr.ip(), port))
535        .collect::<Vec<_>>();
536
537    let resolved_client = reqwest::Client::builder()
538        .timeout(timeout)
539        .danger_accept_invalid_certs(false)
540        .redirect(reqwest::redirect::Policy::none())
541        // SAFETY: this dedicated client is paired with the already-parsed URL
542        // below and only ever resolves `host` to the vetted address set from
543        // this function, so reqwest cannot perform a fresh DNS lookup later.
544        .resolve_to_addrs(host, &pinned_addrs)
545        .build()
546        .map_err(|_| VerificationResult::Error(GENERIC_REQUEST_ERROR.into()))?;
547
548    Ok(ResolvedTarget {
549        client: resolved_client,
550        url: parsed,
551    })
552}
553
554enum RequestBuildResult {
555    Ready(reqwest::RequestBuilder),
556    Final(VerificationResult, HashMap<String, String>),
557}
558
559async fn build_request(
560    client: &Client,
561    spec: &keyhog_core::VerifySpec,
562    url: reqwest::Url,
563    credential: &str,
564    companion: Option<&str>,
565    timeout: Duration,
566) -> RequestBuildResult {
567    let request = request_for_method(client, &spec.method, url).timeout(timeout);
568    apply_auth(request, &spec.auth, credential, companion, timeout, client).await
569}
570
571fn request_for_method(
572    client: &Client,
573    method: &HttpMethod,
574    url: reqwest::Url,
575) -> reqwest::RequestBuilder {
576    match method {
577        HttpMethod::Get => client.get(url),
578        HttpMethod::Post => client.post(url),
579        HttpMethod::Put => client.put(url),
580        HttpMethod::Delete => client.delete(url),
581        HttpMethod::Head => client.head(url),
582        HttpMethod::Patch => client.patch(url),
583    }
584}
585
586async fn apply_auth(
587    request: reqwest::RequestBuilder,
588    auth: &AuthSpec,
589    credential: &str,
590    companion: Option<&str>,
591    timeout: Duration,
592    client: &Client,
593) -> RequestBuildResult {
594    match auth {
595        AuthSpec::None => RequestBuildResult::Ready(request),
596        AuthSpec::Bearer { field } => {
597            let token = resolve_field(field, credential, companion);
598            RequestBuildResult::Ready(request.bearer_auth(&token))
599        }
600        AuthSpec::Basic { username, password } => {
601            let user = resolve_field(username, credential, companion);
602            let pass = resolve_field(password, credential, companion);
603            RequestBuildResult::Ready(request.basic_auth(&user, Some(&pass)))
604        }
605        AuthSpec::Header { name, template } => {
606            let value = interpolate(template, credential, companion);
607            RequestBuildResult::Ready(request.header(name, &value))
608        }
609        AuthSpec::Query { param, field } => {
610            let value = resolve_field(field, credential, companion);
611            RequestBuildResult::Ready(request.query(&[(param.as_str(), value.as_str())]))
612        }
613        AuthSpec::AwsV4 {
614            access_key,
615            secret_key,
616            region,
617            ..
618        } => {
619            build_aws_probe(
620                access_key, secret_key, region, credential, companion, timeout, client,
621            )
622            .await
623        }
624    }
625}
626
627/// Build and execute an AWS SigV4-signed `GetCallerIdentity` request.
628///
629/// This performs real authentication against the AWS STS endpoint:
630/// - Constructs a canonical request per AWS Signature Version 4
631/// - Signs with the provided secret key using HMAC-SHA256
632/// - Returns `Live` if STS responds 200, `Dead` on 403
633///
634/// # Security
635///
636/// - Only contacts `sts.<region>.amazonaws.com` over HTTPS
637/// - The secret key is used only for HMAC signing and never transmitted
638/// - No data mutation: `GetCallerIdentity` is a read-only STS action
639async fn build_aws_probe(
640    access_key: &str,
641    secret_key: &str,
642    region: &str,
643    credential: &str,
644    companion: Option<&str>,
645    timeout: Duration,
646    client: &Client,
647) -> RequestBuildResult {
648    let access_key = resolve_field(access_key, credential, companion);
649    let secret_key = resolve_field(secret_key, credential, companion);
650
651    if secret_key.is_empty() {
652        return RequestBuildResult::Final(VerificationResult::Unverifiable, HashMap::new());
653    }
654
655    if !valid_aws_format(&access_key, &secret_key) {
656        return RequestBuildResult::Final(
657            VerificationResult::Dead,
658            HashMap::from([("format_valid".into(), "false".into())]),
659        );
660    }
661
662    let host = format!("sts.{region}.amazonaws.com");
663    let url = format!("https://{host}/");
664    let body = "Action=GetCallerIdentity&Version=2011-06-15";
665
666    // Build SigV4 signed request
667    match build_sigv4_request(
668        client,
669        &url,
670        &host,
671        body,
672        &access_key,
673        &secret_key,
674        region,
675        "sts",
676        timeout,
677    )
678    .await
679    {
680        Ok((result, metadata)) => RequestBuildResult::Final(result, metadata),
681        Err(error_msg) => RequestBuildResult::Final(
682            VerificationResult::Error(error_msg),
683            HashMap::from([("format_valid".into(), "true".into())]),
684        ),
685    }
686}
687
688/// Construct and send an AWS SigV4-signed HTTP POST request.
689#[allow(clippy::too_many_arguments)]
690async fn build_sigv4_request(
691    client: &Client,
692    url: &str,
693    host: &str,
694    body: &str,
695    access_key: &str,
696    secret_key: &str,
697    region: &str,
698    service: &str,
699    timeout: Duration,
700) -> Result<(VerificationResult, HashMap<String, String>), String> {
701    use hmac::{Hmac, Mac};
702    use sha2::Sha256;
703
704    let now = chrono_lite_now();
705    let datestamp = &now[..8]; // YYYYMMDD
706    let amz_date = &now; // YYYYMMDDTHHMMSSZ
707
708    // Step 1: Create canonical request
709    let payload_hash = hex_sha256(body.as_bytes());
710    let canonical_headers = format!(
711        "content-type:application/x-www-form-urlencoded\nhost:{host}\nx-amz-date:{amz_date}\n"
712    );
713    let signed_headers = "content-type;host;x-amz-date";
714    let canonical_request =
715        format!("POST\n/\n\n{canonical_headers}\n{signed_headers}\n{payload_hash}");
716
717    // Step 2: Create string to sign
718    let credential_scope = format!("{datestamp}/{region}/{service}/aws4_request");
719    let canonical_request_hash = hex_sha256(canonical_request.as_bytes());
720    let string_to_sign =
721        format!("AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{canonical_request_hash}");
722
723    // Step 3: Calculate signature
724    let signing_key = derive_signing_key(secret_key, datestamp, region, service);
725    let signature = {
726        let mut mac =
727            Hmac::<Sha256>::new_from_slice(&signing_key).map_err(|_| AWS_SIGNING_ERROR)?;
728        mac.update(string_to_sign.as_bytes());
729        hex::encode(mac.finalize().into_bytes())
730    };
731
732    // Step 4: Build Authorization header
733    let authorization = format!(
734        "AWS4-HMAC-SHA256 Credential={access_key}/{credential_scope}, SignedHeaders={signed_headers}, Signature={signature}"
735    );
736
737    // Send the signed request
738    let response = client
739        .post(url)
740        .timeout(timeout)
741        .header("Content-Type", "application/x-www-form-urlencoded")
742        .header("Host", host)
743        .header("X-Amz-Date", amz_date)
744        .header("Authorization", &authorization)
745        .body(body.to_string())
746        .send()
747        .await
748        .map_err(|_| AWS_STS_UNREACHABLE_ERROR)?;
749
750    let status = response.status().as_u16();
751    let response_body = response.text().await.unwrap_or_default();
752
753    let mut metadata = HashMap::from([("format_valid".into(), "true".into())]);
754
755    let result = match status {
756        200 => {
757            // Parse GetCallerIdentity response for account info
758            if let Some(account) = extract_xml_field(&response_body, "Account") {
759                metadata.insert("aws_account".into(), account);
760            }
761            if let Some(arn) = extract_xml_field(&response_body, "Arn") {
762                metadata.insert("aws_arn".into(), arn);
763            }
764            VerificationResult::Live
765        }
766        403 => VerificationResult::Dead,
767        429 => VerificationResult::RateLimited,
768        _ => VerificationResult::Error(format!("unexpected STS response status: {status}")),
769    };
770
771    Ok((result, metadata))
772}
773
774/// Derive the SigV4 signing key: HMAC(HMAC(HMAC(HMAC("AWS4"+secret, date), region), service), "aws4_request")
775fn derive_signing_key(secret_key: &str, datestamp: &str, region: &str, service: &str) -> Vec<u8> {
776    let k_secret = format!("AWS4{secret_key}");
777    let k_date = hmac_sha256(k_secret.as_bytes(), datestamp.as_bytes());
778    let k_region = hmac_sha256(&k_date, region.as_bytes());
779    let k_service = hmac_sha256(&k_region, service.as_bytes());
780    hmac_sha256(&k_service, b"aws4_request")
781}
782
783/// Compute HMAC-SHA256.
784fn hmac_sha256(key: &[u8], data: &[u8]) -> Vec<u8> {
785    use hmac::{Hmac, Mac};
786    use sha2::Sha256;
787
788    let mut mac = Hmac::<Sha256>::new_from_slice(key).expect("HMAC accepts any key length");
789    mac.update(data);
790    mac.finalize().into_bytes().to_vec()
791}
792
793/// Compute hex-encoded SHA-256 digest.
794fn hex_sha256(data: &[u8]) -> String {
795    use sha2::{Digest, Sha256};
796    let hash = Sha256::digest(data);
797    hex::encode(hash)
798}
799
800/// Generate a UTC timestamp in AWS format: YYYYMMDDTHHMMSSZ.
801/// Avoids pulling in the `chrono` crate by using `SystemTime`.
802fn chrono_lite_now() -> String {
803    use std::time::SystemTime;
804    let now = SystemTime::now()
805        .duration_since(SystemTime::UNIX_EPOCH)
806        .expect("system clock is before epoch");
807    let secs = now.as_secs();
808    // Break epoch seconds into date/time components
809    let days = secs / 86400;
810    let time_of_day = secs % 86400;
811    let hours = time_of_day / 3600;
812    let minutes = (time_of_day % 3600) / 60;
813    let seconds = time_of_day % 60;
814
815    // Civil date from days since epoch (simplified Rata Die algorithm)
816    let (year, month, day) = civil_from_days(days as i64);
817    format!("{year:04}{month:02}{day:02}T{hours:02}{minutes:02}{seconds:02}Z")
818}
819
820/// Convert days since Unix epoch to (year, month, day).
821/// Algorithm from Howard Hinnant's date algorithms.
822fn civil_from_days(days: i64) -> (i32, u32, u32) {
823    let z = days + 719468;
824    let era = if z >= 0 { z } else { z - 146096 } / 146097;
825    let doe = (z - era * 146097) as u32;
826    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
827    let y = yoe as i64 + era * 400;
828    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
829    let mp = (5 * doy + 2) / 153;
830    let d = doy - (153 * mp + 2) / 5 + 1;
831    let m = if mp < 10 { mp + 3 } else { mp - 9 };
832    let y = if m <= 2 { y + 1 } else { y };
833    (y as i32, m, d)
834}
835
836/// Extract a simple XML field value: `<Tag>value</Tag>`.
837fn extract_xml_field(xml: &str, tag: &str) -> Option<String> {
838    let open = format!("<{tag}>");
839    let close = format!("</{tag}>");
840    let start = xml.find(&open)? + open.len();
841    let end = xml[start..].find(&close)? + start;
842    Some(xml[start..end].to_string())
843}
844
845/// Validate that the access key and secret key have valid AWS format.
846fn valid_aws_format(access_key: &str, secret_key: &str) -> bool {
847    AWS_VALID_ACCESS_KEY_PREFIXES
848        .iter()
849        .any(|prefix| access_key.starts_with(prefix))
850        && access_key.len() == AWS_ACCESS_KEY_LEN
851        && secret_key.len() >= AWS_MIN_SECRET_KEY_LEN
852        && secret_key
853            .chars()
854            .all(|c| c.is_ascii_alphanumeric() || matches!(c, '+' | '/' | '='))
855}
856
857const AWS_SIGNING_ERROR: &str = "failed to create HMAC signing key";
858const AWS_STS_UNREACHABLE_ERROR: &str = "aws sts endpoint unreachable";
859
860struct VerificationFailure {
861    result: VerificationResult,
862    transient: bool,
863}
864
865async fn execute_request(
866    request: reqwest::RequestBuilder,
867) -> Result<reqwest::Response, VerificationFailure> {
868    request.send().await.map_err(|error| VerificationFailure {
869        result: VerificationResult::Error(sanitize_request_error(&error).into()),
870        transient: error.is_timeout() || error.is_connect() || error.is_request(),
871    })
872}
873
874fn sanitize_request_error(error: &reqwest::Error) -> &'static str {
875    if error.is_timeout() {
876        TIMEOUT_ERROR
877    } else if error.is_connect() {
878        CONNECTION_FAILED_ERROR
879    } else if error.is_redirect() {
880        TOO_MANY_REDIRECTS_ERROR
881    } else {
882        GENERIC_REQUEST_ERROR
883    }
884}
885
886async fn read_response_body(response: reqwest::Response) -> Result<String, VerificationFailure> {
887    // First check: Content-Length header as a fast-path rejection. This header
888    // is optional and attacker-controlled, so it's only used to reject
889    // obviously-too-large responses without starting to stream.
890    let content_length = response.content_length().unwrap_or(0) as usize;
891    if content_length > MAX_RESPONSE_BODY_BYTES {
892        return Err(VerificationFailure {
893            result: VerificationResult::Error(BODY_TOO_LARGE_ERROR.into()),
894            transient: false,
895        });
896    }
897
898    // Stream the body in chunks, aborting early if the accumulated size exceeds
899    // the limit. This prevents OOM from malicious endpoints that send large
900    // bodies via chunked transfer encoding without a Content-Length header.
901    let mut accumulated = Vec::with_capacity(content_length.min(MAX_RESPONSE_BODY_BYTES));
902    let mut stream = response.bytes_stream();
903    use futures_util::StreamExt;
904    while let Some(chunk_result) = stream.next().await {
905        let chunk = chunk_result.map_err(|_| VerificationFailure {
906            result: VerificationResult::Error(BODY_ERROR_MESSAGE.into()),
907            transient: true,
908        })?;
909        if accumulated.len() + chunk.len() > MAX_RESPONSE_BODY_BYTES {
910            return Err(VerificationFailure {
911                result: VerificationResult::Error(BODY_TOO_LARGE_ERROR.into()),
912                transient: false,
913            });
914        }
915        accumulated.extend_from_slice(&chunk);
916    }
917
918    Ok(String::from_utf8(accumulated).unwrap_or_default())
919}
920
921/// Check if a response body contains error indicators despite a 200 status.
922/// Many APIs return 200 with error JSON instead of proper HTTP status codes.
923///
924/// Matches JSON key patterns like `"error":` or `"invalid_token":` to reduce
925/// false positives from values containing error-like words (e.g.,
926/// `"invalid_login_count": 0` should not trigger this).
927///
928/// `SUCCESS_OVERRIDES` are only considered when no explicit error key is found.
929/// This prevents responses like `{"ok":true, "error":"rate_limited"}` from
930/// being incorrectly treated as successful.
931fn body_indicates_error(body: &str) -> bool {
932    let lower = body.to_lowercase();
933    let has_error = ERROR_INDICATORS.iter().any(|indicator| {
934        lower.match_indices(indicator).any(|(pos, _)| {
935            let before = lower[..pos].trim_end();
936            let after = lower[pos + indicator.len()..].trim_start();
937            let valid_key_start =
938                before.is_empty() || before.ends_with('{') || before.ends_with(',');
939            valid_key_start && after.starts_with(':')
940        })
941    });
942
943    if !has_error {
944        return false;
945    }
946
947    // An explicit error key takes precedence over success overrides.
948    // APIs that return both `"ok":true` and `"error":"..."` should be
949    // treated as errors — the error field is more specific and the `ok`
950    // field often reflects request delivery, not auth success.
951    // However, `"error": null` is a common pattern meaning "no error"
952    // and should NOT trigger error detection.
953    let has_explicit_error_key = lower.match_indices("\"error\"").any(|(pos, _)| {
954        let after = lower[pos + "\"error\"".len()..].trim_start();
955        after.starts_with(':') && {
956            let value_start = after[1..].trim_start();
957            // "error": null means "no error" — don't treat as error
958            !value_start.starts_with("null")
959        }
960    });
961
962    if has_explicit_error_key {
963        return true;
964    }
965
966    !contains_any(&lower, SUCCESS_OVERRIDES)
967}
968
969/// Evaluate whether a verification response meets the success criteria.
970fn evaluate_success(spec: &SuccessSpec, status: u16, body: &str) -> bool {
971    if !status_matches(spec, status) || !body_matches(spec, body) {
972        return false;
973    }
974
975    if let Some(ref json_path) = spec.json_path {
976        let Ok(parsed) = serde_json::from_str::<serde_json::Value>(body) else {
977            return false;
978        };
979        return json_expectation_matches(spec, &parsed, json_path);
980    }
981    true
982}
983
984fn status_matches(spec: &SuccessSpec, status: u16) -> bool {
985    if let Some(expected_status) = spec.status
986        && status != expected_status
987    {
988        return false;
989    }
990
991    if let Some(not_status) = spec.status_not
992        && status == not_status
993    {
994        return false;
995    }
996
997    true
998}
999
1000fn body_matches(spec: &SuccessSpec, body: &str) -> bool {
1001    if let Some(ref needle) = spec.body_contains
1002        && !body.contains(needle)
1003    {
1004        return false;
1005    }
1006
1007    if let Some(ref needle) = spec.body_not_contains
1008        && body.contains(needle)
1009    {
1010        return false;
1011    }
1012
1013    true
1014}
1015
1016fn json_expectation_matches(
1017    spec: &SuccessSpec,
1018    parsed: &serde_json::Value,
1019    json_path: &str,
1020) -> bool {
1021    let value = json_pointer_get(parsed, json_path);
1022    match &spec.equals {
1023        Some(expected) => value.is_some_and(|actual| json_value_to_string(actual) == *expected),
1024        None => value.is_some(),
1025    }
1026}
1027
1028fn json_value_to_string(value: &serde_json::Value) -> String {
1029    match value {
1030        serde_json::Value::String(text) => text.clone(),
1031        serde_json::Value::Bool(boolean) => boolean.to_string(),
1032        serde_json::Value::Number(number) => number.to_string(),
1033        other => other.to_string(),
1034    }
1035}
1036
1037fn contains_any(haystack: &str, needles: &[&str]) -> bool {
1038    needles.iter().any(|needle| haystack.contains(needle))
1039}
1040
1041const ERROR_INDICATORS: &[&str] = &[
1042    "\"error\"",
1043    "\"unauthorized\"",
1044    "\"forbidden\"",
1045    "\"invalid\"",
1046    "\"invalid_token\"",
1047    "\"invalid_key\"",
1048    "\"invalid_api_key\"",
1049    "\"authentication_error\"",
1050    "\"auth_error\"",
1051    "\"unauthenticated\"",
1052    "\"not_authenticated\"",
1053    "\"access_denied\"",
1054    "\"permission_denied\"",
1055    "\"invalid_credentials\"",
1056    "\"bad_credentials\"",
1057    "\"expired\"",
1058    "\"token_expired\"",
1059    "\"key_expired\"",
1060    "\"revoked\"",
1061    "\"inactive\"",
1062    "\"disabled\"",
1063    "\"suspended\"",
1064];
1065
1066const SUCCESS_OVERRIDES: &[&str] = &[
1067    "\"ok\":true",
1068    "\"ok\": true",
1069    "\"success\":true",
1070    "\"success\": true",
1071    "\"authenticated\":true",
1072    "\"valid\":true",
1073];
1074
1075/// Simple dot-path JSON accessor: "ok" → root["ok"], "data.user.name" → root["data"]["user"]["name"].
1076fn json_pointer_get<'a>(value: &'a serde_json::Value, path: &str) -> Option<&'a serde_json::Value> {
1077    const MAX_JSON_PATH_DEPTH: usize = 20;
1078
1079    let mut current = value;
1080    let mut depth = 0usize;
1081    for segment in path.split('.') {
1082        depth += 1;
1083        if depth > MAX_JSON_PATH_DEPTH || segment.is_empty() {
1084            return None;
1085        }
1086        current = current.get(segment)?;
1087    }
1088    Some(current)
1089}
1090
1091/// Extract metadata fields from a verification response body.
1092fn extract_metadata(specs: &[MetadataSpec], body: &str) -> HashMap<String, String> {
1093    let mut metadata = HashMap::new();
1094
1095    let parsed: Option<serde_json::Value> = serde_json::from_str(body).ok();
1096
1097    for spec in specs {
1098        if let Some(ref json_path) = spec.json_path
1099            && let Some(ref parsed) = parsed
1100            && let Some(value) = json_pointer_get(parsed, json_path)
1101        {
1102            let s = match value {
1103                serde_json::Value::String(s) => s.clone(),
1104                other => other.to_string(),
1105            };
1106            metadata.insert(spec.name.clone(), s);
1107        }
1108        if let Some(ref header_name) = spec.header {
1109            // Header extraction would need the actual response headers.
1110            // For now, we only support JSON-based extraction since we consume the body.
1111            tracing::debug!(
1112                "header extraction for '{}' not supported in body-only mode",
1113                header_name
1114            );
1115        }
1116    }
1117
1118    metadata
1119}
1120
1121#[cfg(test)]
1122mod tests {
1123    use super::*;
1124    use keyhog_core::Severity;
1125    use std::sync::Arc;
1126    use std::sync::atomic::{AtomicUsize, Ordering};
1127    use tokio::io::{AsyncReadExt, AsyncWriteExt};
1128    use tokio::net::TcpListener;
1129
1130    #[test]
1131    fn interpolation() {
1132        assert_eq!(
1133            interpolate(
1134                "https://api.example.com/check?key={{match}}",
1135                "abc123",
1136                None
1137            ),
1138            "https://api.example.com/check?key=abc123"
1139        );
1140        assert_eq!(
1141            interpolate("{{companion.secret}}", "key", Some("mysecret")),
1142            "mysecret"
1143        );
1144    }
1145
1146    #[test]
1147    fn interpolation_handles_empty_companion_replacements() {
1148        assert_eq!(
1149            interpolate(
1150                "https://api.example.com/{{companion.secret}}/{{companion.secret}}",
1151                "key",
1152                Some("")
1153            ),
1154            "https://api.example.com//"
1155        );
1156    }
1157
1158    #[test]
1159    fn field_resolution() {
1160        assert_eq!(resolve_field("match", "cred", None), "cred");
1161        assert_eq!(
1162            resolve_field("companion.secret", "cred", Some("sec")),
1163            "sec"
1164        );
1165        assert_eq!(
1166            resolve_field("literal_value", "cred", None),
1167            "literal_value"
1168        );
1169        assert_eq!(resolve_field("", "cred", None), "");
1170    }
1171
1172    #[test]
1173    fn success_status_check() {
1174        let spec = SuccessSpec {
1175            status: Some(200),
1176            status_not: None,
1177            body_contains: None,
1178            body_not_contains: None,
1179            json_path: None,
1180            equals: None,
1181        };
1182        assert!(evaluate_success(&spec, 200, ""));
1183        assert!(!evaluate_success(&spec, 401, ""));
1184    }
1185
1186    #[test]
1187    fn success_json_path_check() {
1188        let spec = SuccessSpec {
1189            status: Some(200),
1190            status_not: None,
1191            body_contains: None,
1192            body_not_contains: None,
1193            json_path: Some("ok".into()),
1194            equals: Some("true".into()),
1195        };
1196        assert!(evaluate_success(&spec, 200, r#"{"ok": true}"#));
1197        assert!(!evaluate_success(&spec, 200, r#"{"ok": false}"#));
1198        assert!(!evaluate_success(&spec, 401, r#"{"ok": true}"#));
1199    }
1200
1201    #[test]
1202    fn dedup_merges_locations() {
1203        let m1 = RawMatch {
1204            detector_id: "test".into(),
1205            detector_name: "Test".into(),
1206            service: "test".into(),
1207            severity: Severity::High,
1208            credential: "SECRET123".into(),
1209            companion: None,
1210            location: MatchLocation {
1211                source: "fs".into(),
1212                file_path: Some("a.py".into()),
1213                line: Some(1),
1214                offset: 0,
1215                commit: None,
1216                author: None,
1217                date: None,
1218            },
1219            entropy: None,
1220            confidence: Some(0.75),
1221        };
1222        let m2 = RawMatch {
1223            location: MatchLocation {
1224                file_path: Some("b.py".into()),
1225                line: Some(10),
1226                ..m1.location.clone()
1227            },
1228            ..m1.clone()
1229        };
1230
1231        let groups = dedup_matches(vec![m1, m2]);
1232        assert_eq!(groups.len(), 1);
1233        assert_eq!(groups[0].additional_locations.len(), 1);
1234    }
1235
1236    #[test]
1237    fn json_pointer_nested() {
1238        let document: serde_json::Value =
1239            serde_json::from_str(r#"{"data": {"user": {"name": "alice"}}}"#).unwrap();
1240        assert_eq!(
1241            json_pointer_get(&document, "data.user.name"),
1242            Some(&serde_json::Value::String("alice".into()))
1243        );
1244        assert!(json_pointer_get(&document, "data.missing").is_none());
1245    }
1246
1247    #[test]
1248    fn json_pointer_rejects_excessive_depth() {
1249        let value: serde_json::Value = serde_json::from_str(r#"{"a":{"b":{"c":true}}}"#).unwrap();
1250        let path = (0..21)
1251            .map(|i| format!("level{i}"))
1252            .collect::<Vec<_>>()
1253            .join(".");
1254        assert!(json_pointer_get(&value, &path).is_none());
1255        assert!(json_pointer_get(&value, "a.b.c").is_some());
1256    }
1257
1258    #[tokio::test]
1259    async fn verify_all_blocks_integer_private_hosts() {
1260        let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
1261        let addr = listener.local_addr().unwrap();
1262        let requests = Arc::new(AtomicUsize::new(0));
1263        let requests_clone = requests.clone();
1264
1265        tokio::spawn(async move {
1266            loop {
1267                let Ok((mut stream, _)) = listener.accept().await else {
1268                    break;
1269                };
1270                let count = requests_clone.clone();
1271                tokio::spawn(async move {
1272                    let mut buf = [0u8; 1024];
1273                    let _ = stream.read(&mut buf).await;
1274                    count.fetch_add(1, Ordering::SeqCst);
1275                    tokio::time::sleep(Duration::from_millis(25)).await;
1276                    let _ = stream
1277                        .write_all(b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nOK")
1278                        .await;
1279                });
1280            }
1281        });
1282
1283        let detector = DetectorSpec {
1284            id: "test".into(),
1285            name: "Test".into(),
1286            service: "test".into(),
1287            severity: Severity::High,
1288            patterns: vec![],
1289            companion: None,
1290            verify: Some(keyhog_core::VerifySpec {
1291                method: HttpMethod::Get,
1292                url: format!("http://2130706433:{}/verify", addr.port()),
1293                auth: AuthSpec::None,
1294                headers: vec![],
1295                body: None,
1296                success: SuccessSpec {
1297                    status: Some(200),
1298                    status_not: None,
1299                    body_contains: None,
1300                    body_not_contains: None,
1301                    json_path: None,
1302                    equals: None,
1303                },
1304                metadata: vec![],
1305                timeout_ms: None,
1306            }),
1307            keywords: vec![],
1308        };
1309
1310        let engine = VerificationEngine::new(
1311            &[detector],
1312            VerifyConfig {
1313                timeout: Duration::from_secs(1),
1314                max_concurrent_per_service: 50,
1315                max_concurrent_global: 50,
1316                ..Default::default()
1317            },
1318        )
1319        .unwrap();
1320
1321        let make_match = || RawMatch {
1322            detector_id: "test".into(),
1323            detector_name: "Test".into(),
1324            service: "test".into(),
1325            severity: Severity::High,
1326            credential: "same-credential".into(),
1327            companion: None,
1328            location: MatchLocation {
1329                source: "fs".into(),
1330                file_path: Some("a.txt".into()),
1331                line: Some(1),
1332                offset: 0,
1333                commit: None,
1334                author: None,
1335                date: None,
1336            },
1337            entropy: None,
1338            confidence: Some(0.9),
1339        };
1340
1341        let group = dedup_matches(vec![make_match()]).pop().unwrap();
1342        let groups = (0..20).map(|_| group.clone()).collect();
1343        let findings = engine.verify_all(groups).await;
1344        assert_eq!(findings.len(), 20);
1345        assert!(findings.iter().all(|finding| {
1346            matches!(
1347                &finding.verification,
1348                VerificationResult::Error(message) if message == PRIVATE_URL_ERROR
1349            )
1350        }));
1351        assert_eq!(requests.load(Ordering::SeqCst), 0);
1352    }
1353
1354    #[tokio::test]
1355    async fn aws_sigv4_probe_fails_on_unreachable_endpoint() {
1356        let client = Client::new();
1357        let result = build_sigv4_request(
1358            &client,
1359            "https://127.0.0.1:1/",
1360            "127.0.0.1:1",
1361            "Action=GetCallerIdentity&Version=2011-06-15",
1362            "AKIAIOSFODNN7EXAMPLE",
1363            "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
1364            "us-east-1",
1365            "sts",
1366            Duration::from_millis(50),
1367        )
1368        .await;
1369
1370        assert!(result.is_err(), "should fail on unreachable endpoint");
1371    }
1372
1373    #[test]
1374    fn aws_sigv4_signing_key_derivation_is_deterministic() {
1375        let key1 = derive_signing_key("secret", "20260325", "us-east-1", "sts");
1376        let key2 = derive_signing_key("secret", "20260325", "us-east-1", "sts");
1377        assert_eq!(key1, key2, "signing key must be deterministic");
1378        assert_eq!(key1.len(), 32, "HMAC-SHA256 output is 32 bytes");
1379    }
1380
1381    #[test]
1382    fn chrono_lite_now_produces_valid_format() {
1383        let timestamp = chrono_lite_now();
1384        assert_eq!(timestamp.len(), 16, "YYYYMMDDTHHMMSSZ = 16 chars");
1385        assert!(timestamp.ends_with('Z'));
1386        assert!(timestamp.contains('T'));
1387    }
1388
1389    // =========================================================================
1390    // SSRF Protection Tests
1391    // =========================================================================
1392
1393    #[test]
1394    fn ssrf_blocks_localhost() {
1395        assert!(is_private_url("http://localhost/api"));
1396        assert!(is_private_url("https://localhost:8080/verify"));
1397        assert!(is_private_url("http://LOCALHOST/path"));
1398    }
1399
1400    #[test]
1401    fn ssrf_blocks_loopback() {
1402        assert!(is_private_url("http://127.0.0.1/api"));
1403        assert!(is_private_url("http://127.0.0.1:3000/check"));
1404        assert!(is_private_url("https://127.0.0.1/secret"));
1405    }
1406
1407    #[test]
1408    fn ssrf_blocks_private_class_a() {
1409        assert!(is_private_url("http://10.0.0.1/api"));
1410        assert!(is_private_url("http://10.255.255.255/verify"));
1411        assert!(is_private_url("https://10.10.10.10/check"));
1412    }
1413
1414    #[test]
1415    fn ssrf_blocks_private_class_b() {
1416        assert!(is_private_url("http://172.16.0.1/api"));
1417        assert!(is_private_url("http://172.17.1.1/verify"));
1418        assert!(is_private_url("http://172.18.2.2/check"));
1419        assert!(is_private_url("http://172.19.3.3/test"));
1420        assert!(is_private_url("http://172.20.0.0/api"));
1421        assert!(is_private_url("http://172.30.0.0/api"));
1422        assert!(is_private_url("http://172.31.255.255/verify"));
1423    }
1424
1425    #[test]
1426    fn ssrf_blocks_private_class_c() {
1427        assert!(is_private_url("http://192.168.0.1/api"));
1428        assert!(is_private_url("http://192.168.1.1/verify"));
1429        assert!(is_private_url("https://192.168.255.255/check"));
1430    }
1431
1432    #[test]
1433    fn ssrf_blocks_link_local() {
1434        assert!(is_private_url("http://169.254.0.1/metadata"));
1435        assert!(is_private_url("http://169.254.169.254/latest"));
1436        assert!(is_private_url("https://169.254.1.1/api"));
1437    }
1438
1439    #[test]
1440    fn ssrf_blocks_ipv6_loopback() {
1441        assert!(is_private_url("http://[::1]/api"));
1442        assert!(is_private_url("https://[::1]:8080/verify"));
1443    }
1444
1445    #[test]
1446    fn ssrf_blocks_ipv6_private_ranges_and_mapped_ipv4() {
1447        assert!(is_private_url("http://[fd00::1]/api"));
1448        assert!(is_private_url("http://[fe80::1]/api"));
1449        assert!(is_private_url("http://[::ffff:127.0.0.1]/api"));
1450    }
1451
1452    #[test]
1453    fn ssrf_blocks_zero_address() {
1454        assert!(is_private_url("http://0.0.0.0/api"));
1455        assert!(is_private_url("http://0.0.0.0:3000/verify"));
1456    }
1457
1458    #[test]
1459    fn ssrf_blocks_integer_loopback_host() {
1460        assert!(is_private_url("http://2130706433/api"));
1461    }
1462
1463    #[test]
1464    fn ssrf_blocks_hex_and_octal_ipv4_hosts() {
1465        assert!(is_private_url("http://0x7f000001/api"));
1466        assert!(is_private_url("http://0177.0.0.1/api"));
1467        assert!(is_private_url("http://0x7f.0x0.0x0.0x1/api"));
1468    }
1469
1470    #[test]
1471    fn ssrf_blocks_short_dotted_ipv4_hosts() {
1472        assert!(is_private_url("http://127.1/api"));
1473        assert!(is_private_url("http://127.0.1/api"));
1474    }
1475
1476    #[test]
1477    fn ssrf_blocks_cloud_metadata() {
1478        assert!(is_private_url("http://metadata.google.internal/"));
1479        assert!(is_private_url("http://169.254.169.254/latest/meta-data/"));
1480        assert!(is_private_url("https://metadata.google/computeMetadata"));
1481    }
1482
1483    #[test]
1484    fn ssrf_blocks_percent_encoded_private_hosts_after_decoding() {
1485        assert!(is_private_url("http://%31%32%37.0.0.1/api"));
1486    }
1487
1488    #[tokio::test]
1489    async fn resolved_client_rejects_private_dns_results() {
1490        let client = reqwest::Client::builder().build().unwrap();
1491        let resolved_client =
1492            resolved_client_for_url(&client, "http://localhost/api", Duration::from_secs(1)).await;
1493        assert!(matches!(
1494            resolved_client,
1495            Err(VerificationResult::Error(message)) if message == PRIVATE_URL_ERROR
1496        ));
1497    }
1498
1499    #[tokio::test]
1500    async fn resolved_client_rejects_private_ip_literals_and_numeric_ipv4_hosts() {
1501        let client = reqwest::Client::builder().build().unwrap();
1502
1503        for url in ["http://127.0.0.1/api", "http://2130706433/api"] {
1504            let resolved_client =
1505                resolved_client_for_url(&client, url, Duration::from_secs(1)).await;
1506            assert!(
1507                matches!(resolved_client, Err(VerificationResult::Error(ref message)) if message == PRIVATE_URL_ERROR),
1508                "expected private URL rejection for {url}, got {resolved_client:?}"
1509            );
1510        }
1511    }
1512
1513    #[tokio::test]
1514    async fn resolved_client_rejects_non_https_public_urls() {
1515        let client = reqwest::Client::builder().build().unwrap();
1516        let resolved_client =
1517            resolved_client_for_url(&client, "http://example.com/api", Duration::from_secs(1))
1518                .await;
1519        assert!(matches!(
1520            resolved_client,
1521            Err(VerificationResult::Error(message)) if message == HTTPS_ONLY_ERROR
1522        ));
1523    }
1524
1525    #[test]
1526    fn ssrf_allows_public_urls() {
1527        assert!(!is_private_url("https://api.github.com/users/octocat"));
1528        assert!(!is_private_url("https://api.openai.com/v1/models"));
1529        assert!(!is_private_url(
1530            "https://hooks.slack.com/services/T000/B000/XXXX"
1531        ));
1532        assert!(!is_private_url("http://example.com/api"));
1533        assert!(!is_private_url("http://134744072/api"));
1534    }
1535
1536    // =========================================================================
1537    // Interpolation Security Tests
1538    // =========================================================================
1539
1540    #[test]
1541    fn interpolation_url_encodes_special_chars() {
1542        let cred = "key/with/slashes";
1543        assert_eq!(
1544            interpolate("https://api.example.com/{{match}}", cred, None),
1545            "https://api.example.com/key%2Fwith%2Fslashes"
1546        );
1547    }
1548
1549    #[test]
1550    fn interpolation_url_encodes_query_params() {
1551        let cred = "key=value&other=test";
1552        assert_eq!(
1553            interpolate("https://api.example.com?token={{match}}", cred, None),
1554            "https://api.example.com?token=key%3Dvalue%26other%3Dtest"
1555        );
1556    }
1557
1558    #[test]
1559    fn interpolation_prevents_template_injection() {
1560        let cred = "{{malicious}}";
1561        let interpolated_url = interpolate("https://api.example.com/{{match}}", cred, None);
1562        assert_eq!(
1563            interpolated_url,
1564            "https://api.example.com/%7B%7Bmalicious%7D%7D"
1565        );
1566    }
1567
1568    #[test]
1569    fn interpolation_handles_newlines() {
1570        let cred = "key\nwith\nnewlines";
1571        let interpolated_url = interpolate("https://api.example.com/{{match}}", cred, None);
1572        assert!(interpolated_url.contains("%0A"));
1573        assert!(!interpolated_url.contains('\n'));
1574    }
1575
1576    #[test]
1577    fn interpolation_handles_companion_with_special_chars() {
1578        let companion = "secret/with/chars";
1579        let interpolated_url = interpolate(
1580            "https://api.example.com?key={{companion.token}}",
1581            "key",
1582            Some(companion),
1583        );
1584        assert!(interpolated_url.contains("%2F"));
1585    }
1586
1587    // =========================================================================
1588    // Body Analysis Tests
1589    // =========================================================================
1590
1591    #[test]
1592    fn body_indicates_error_null_response() {
1593        assert!(!body_indicates_error("null"));
1594        assert!(!body_indicates_error("NULL"));
1595    }
1596
1597    #[test]
1598    fn body_indicates_error_real_error_patterns() {
1599        assert!(body_indicates_error(r#"{"error": "invalid token"}"#));
1600        assert!(body_indicates_error(r#"{"unauthorized": true}"#));
1601        assert!(body_indicates_error(r#"{"invalid_key": "bad"}"#));
1602        assert!(body_indicates_error(
1603            r#"{"access_denied": "no permission"}"#
1604        ));
1605        assert!(body_indicates_error(r#"{"expired": true}"#));
1606        assert!(body_indicates_error(r#"{"revoked": "yes"}"#));
1607    }
1608
1609    #[test]
1610    fn body_success_override_patterns() {
1611        // These should NOT indicate error — success keys without explicit error
1612        assert!(!body_indicates_error(r#"{"ok":true, "error": null}"#));
1613        assert!(!body_indicates_error(
1614            r#"{"success":true, "warning": "minor"}"#
1615        ));
1616        assert!(!body_indicates_error(r#"{"authenticated":true}"#));
1617        assert!(!body_indicates_error(r#"{"valid":true}"#));
1618    }
1619
1620    #[test]
1621    fn body_error_explicit_key_overrides_success() {
1622        // An explicit "error" key with a real value should be detected as an
1623        // error even when "ok":true is also present. This prevents dead
1624        // credentials from being reported as live.
1625        assert!(body_indicates_error(
1626            r#"{"ok":true, "error": "rate_limited"}"#
1627        ));
1628        assert!(body_indicates_error(
1629            r#"{"ok":true, "error": "invalid_token"}"#
1630        ));
1631        assert!(body_indicates_error(
1632            r#"{"success":true, "error": "unauthorized"}"#
1633        ));
1634    }
1635
1636    #[test]
1637    fn body_indicates_error_empty_body() {
1638        assert!(!body_indicates_error(""));
1639    }
1640
1641    #[test]
1642    fn body_indicates_error_non_json() {
1643        assert!(!body_indicates_error("plain text response"));
1644        assert!(!body_indicates_error("<html><body>Error</body></html>"));
1645        assert!(!body_indicates_error("this has \"error\" in it"));
1646    }
1647
1648    macro_rules! indicator_case {
1649        ($name:ident, $indicator:expr) => {
1650            #[test]
1651            fn $name() {
1652                let body = format!(r#"{{"{}": true}}"#, $indicator);
1653                assert!(body_indicates_error(&body));
1654            }
1655        };
1656    }
1657
1658    indicator_case!(indicator_unauthorized_detected, "unauthorized");
1659    indicator_case!(indicator_forbidden_detected, "forbidden");
1660    indicator_case!(indicator_invalid_detected, "invalid");
1661    indicator_case!(indicator_invalid_token_detected, "invalid_token");
1662    indicator_case!(indicator_invalid_key_detected, "invalid_key");
1663    indicator_case!(indicator_invalid_api_key_detected, "invalid_api_key");
1664    indicator_case!(
1665        indicator_authentication_error_detected,
1666        "authentication_error"
1667    );
1668    indicator_case!(indicator_auth_error_detected, "auth_error");
1669    indicator_case!(indicator_unauthenticated_detected, "unauthenticated");
1670    indicator_case!(indicator_not_authenticated_detected, "not_authenticated");
1671    indicator_case!(indicator_access_denied_detected, "access_denied");
1672    indicator_case!(indicator_permission_denied_detected, "permission_denied");
1673    indicator_case!(
1674        indicator_invalid_credentials_detected,
1675        "invalid_credentials"
1676    );
1677    indicator_case!(indicator_bad_credentials_detected, "bad_credentials");
1678    indicator_case!(indicator_expired_detected, "expired");
1679    indicator_case!(indicator_token_expired_detected, "token_expired");
1680    indicator_case!(indicator_key_expired_detected, "key_expired");
1681    indicator_case!(indicator_revoked_detected, "revoked");
1682    indicator_case!(indicator_inactive_detected, "inactive");
1683    indicator_case!(indicator_disabled_detected, "disabled");
1684
1685    #[test]
1686    fn success_override_ok_true_is_not_error() {
1687        assert!(!body_indicates_error(r#"{"ok": true}"#));
1688    }
1689
1690    #[test]
1691    fn success_override_success_true_is_not_error() {
1692        assert!(!body_indicates_error(r#"{"success": true}"#));
1693    }
1694
1695    #[test]
1696    fn success_override_authenticated_true_is_not_error() {
1697        assert!(!body_indicates_error(r#"{"authenticated": true}"#));
1698    }
1699
1700    #[test]
1701    fn success_override_valid_true_is_not_error() {
1702        assert!(!body_indicates_error(r#"{"valid": true}"#));
1703    }
1704
1705    #[test]
1706    fn body_indicates_error_ignores_indicator_inside_string_values() {
1707        assert!(!body_indicates_error(
1708            r#"{"message":"this text mentions \"error\" but is not an error key"}"#
1709        ));
1710        assert!(!body_indicates_error(
1711            r#"{"detail":"the word \"invalid\" appears here as content"}"#
1712        ));
1713    }
1714
1715    // =========================================================================
1716    // Cache Tests
1717    // =========================================================================
1718
1719    #[test]
1720    fn cache_basic_hit() {
1721        let cache = cache::VerificationCache::default_ttl();
1722        cache.put(
1723            "test-cred",
1724            "test-detector",
1725            VerificationResult::Live,
1726            HashMap::from([("key".into(), "value".into())]),
1727        );
1728
1729        let cached_verification = cache.get("test-cred", "test-detector");
1730        assert!(cached_verification.is_some());
1731        let (verification, metadata) = cached_verification.unwrap();
1732        assert!(matches!(verification, VerificationResult::Live));
1733        assert_eq!(metadata.get("key"), Some(&"value".to_string()));
1734    }
1735
1736    #[test]
1737    fn cache_miss_different_credential() {
1738        let cache = cache::VerificationCache::default_ttl();
1739        cache.put(
1740            "cred-1",
1741            "detector",
1742            VerificationResult::Live,
1743            HashMap::new(),
1744        );
1745
1746        let cached_verification = cache.get("cred-2", "detector");
1747        assert!(cached_verification.is_none());
1748    }
1749
1750    #[test]
1751    fn cache_miss_different_detector() {
1752        let cache = cache::VerificationCache::default_ttl();
1753        cache.put(
1754            "cred",
1755            "detector-1",
1756            VerificationResult::Live,
1757            HashMap::new(),
1758        );
1759
1760        let cached_verification = cache.get("cred", "detector-2");
1761        assert!(cached_verification.is_none());
1762    }
1763
1764    #[test]
1765    fn cache_ttl_expiration() {
1766        let cache = cache::VerificationCache::new(Duration::from_millis(10));
1767        cache.put(
1768            "test-cred",
1769            "test-detector",
1770            VerificationResult::Live,
1771            HashMap::new(),
1772        );
1773
1774        // Immediately should be available
1775        assert!(cache.get("test-cred", "test-detector").is_some());
1776
1777        // Wait for expiration
1778        std::thread::sleep(Duration::from_millis(50));
1779
1780        // Should be expired now
1781        assert!(cache.get("test-cred", "test-detector").is_none());
1782    }
1783
1784    #[test]
1785    fn cache_eviction_of_expired_entries() {
1786        // Test that expired entries are properly evicted
1787        let cache = cache::VerificationCache::new(Duration::from_millis(1));
1788
1789        cache.put("cred-1", "det", VerificationResult::Live, HashMap::new());
1790        std::thread::sleep(Duration::from_millis(5));
1791        cache.put("cred-2", "det", VerificationResult::Live, HashMap::new());
1792
1793        // First entry should be expired, second should be present
1794        assert!(cache.get("cred-1", "det").is_none());
1795        assert!(cache.get("cred-2", "det").is_some());
1796    }
1797
1798    #[test]
1799    fn cache_integrity_after_multiple_puts() {
1800        let cache = cache::VerificationCache::default_ttl();
1801
1802        // Put same credential with different results
1803        cache.put("cred", "det", VerificationResult::Dead, HashMap::new());
1804        cache.put("cred", "det", VerificationResult::Live, HashMap::new());
1805
1806        // Should have the latest value
1807        let (verification, _) = cache.get("cred", "det").unwrap();
1808        assert!(matches!(verification, VerificationResult::Live));
1809    }
1810
1811    // =========================================================================
1812    // Dedup Mode Tests
1813    // =========================================================================
1814
1815    #[test]
1816    fn dedup_per_location_same_detector_different_files() {
1817        let m1 = RawMatch {
1818            detector_id: "test-det".into(),
1819            detector_name: "Test".into(),
1820            service: "svc".into(),
1821            severity: Severity::High,
1822            credential: "SAME_SECRET".into(),
1823            companion: None,
1824            location: MatchLocation {
1825                source: "fs".into(),
1826                file_path: Some("a.py".into()),
1827                line: Some(1),
1828                offset: 0,
1829                commit: None,
1830                author: None,
1831                date: None,
1832            },
1833            entropy: None,
1834            confidence: Some(0.9),
1835        };
1836        let m2 = RawMatch {
1837            location: MatchLocation {
1838                file_path: Some("b.py".into()),
1839                line: Some(10),
1840                ..m1.location.clone()
1841            },
1842            ..m1.clone()
1843        };
1844
1845        let groups = dedup_matches(vec![m1, m2]);
1846        assert_eq!(groups.len(), 1);
1847        assert_eq!(groups[0].additional_locations.len(), 1);
1848        assert_eq!(groups[0].primary_location.file_path, Some("a.py".into()));
1849    }
1850
1851    #[test]
1852    fn dedup_consolidated_different_detectors_same_credential() {
1853        let m1 = RawMatch {
1854            detector_id: "detector-1".into(),
1855            detector_name: "Detector 1".into(),
1856            service: "svc".into(),
1857            severity: Severity::High,
1858            credential: "SAME_SECRET".into(),
1859            companion: None,
1860            location: MatchLocation {
1861                source: "fs".into(),
1862                file_path: Some("a.py".into()),
1863                line: Some(1),
1864                offset: 0,
1865                commit: None,
1866                author: None,
1867                date: None,
1868            },
1869            entropy: None,
1870            confidence: Some(0.9),
1871        };
1872        let m2 = RawMatch {
1873            detector_id: "detector-2".into(),
1874            detector_name: "Detector 2".into(),
1875            location: MatchLocation {
1876                file_path: Some("b.py".into()),
1877                line: Some(10),
1878                ..m1.location.clone()
1879            },
1880            ..m1.clone()
1881        };
1882
1883        let groups = dedup_matches(vec![m1, m2]);
1884        // Should create separate groups because detector_id is different
1885        assert_eq!(groups.len(), 2);
1886    }
1887
1888    #[test]
1889    fn dedup_preserves_companion() {
1890        let m1 = RawMatch {
1891            detector_id: "test".into(),
1892            detector_name: "Test".into(),
1893            service: "svc".into(),
1894            severity: Severity::High,
1895            credential: "SECRET".into(),
1896            companion: None,
1897            location: MatchLocation {
1898                source: "fs".into(),
1899                file_path: Some("a.py".into()),
1900                line: Some(1),
1901                offset: 0,
1902                commit: None,
1903                author: None,
1904                date: None,
1905            },
1906            entropy: None,
1907            confidence: Some(0.9),
1908        };
1909        let m2 = RawMatch {
1910            companion: Some("companion-value".into()),
1911            location: MatchLocation {
1912                file_path: Some("b.py".into()),
1913                line: Some(10),
1914                ..m1.location.clone()
1915            },
1916            ..m1.clone()
1917        };
1918
1919        let groups = dedup_matches(vec![m1, m2]);
1920        assert_eq!(groups.len(), 1);
1921        assert_eq!(groups[0].companion, Some("companion-value".into()));
1922    }
1923
1924    // =========================================================================
1925    // Edge Case Tests
1926    // =========================================================================
1927
1928    #[test]
1929    fn evaluate_success_handles_redirect_status() {
1930        let spec = SuccessSpec {
1931            status: Some(301),
1932            status_not: None,
1933            body_contains: None,
1934            body_not_contains: None,
1935            json_path: None,
1936            equals: None,
1937        };
1938        assert!(evaluate_success(&spec, 301, ""));
1939        assert!(!evaluate_success(&spec, 200, ""));
1940    }
1941
1942    #[test]
1943    fn evaluate_success_rate_limit_status() {
1944        let spec = SuccessSpec {
1945            status: None,
1946            status_not: Some(429),
1947            body_contains: None,
1948            body_not_contains: None,
1949            json_path: None,
1950            equals: None,
1951        };
1952        assert!(!evaluate_success(&spec, 429, ""));
1953        assert!(evaluate_success(&spec, 200, ""));
1954    }
1955
1956    #[test]
1957    fn detector_timeout_override_takes_precedence() {
1958        let spec = keyhog_core::VerifySpec {
1959            method: HttpMethod::Get,
1960            url: "https://example.com/verify".into(),
1961            auth: AuthSpec::None,
1962            headers: vec![],
1963            body: None,
1964            success: SuccessSpec {
1965                status: Some(200),
1966                status_not: None,
1967                body_contains: None,
1968                body_not_contains: None,
1969                json_path: None,
1970                equals: None,
1971            },
1972            metadata: vec![],
1973            timeout_ms: Some(250),
1974        };
1975
1976        assert_eq!(
1977            verification_timeout(&spec, Duration::from_secs(5)),
1978            Duration::from_millis(250)
1979        );
1980
1981        let without_override = keyhog_core::VerifySpec {
1982            timeout_ms: None,
1983            ..spec
1984        };
1985        assert_eq!(
1986            verification_timeout(&without_override, Duration::from_secs(5)),
1987            Duration::from_secs(5)
1988        );
1989    }
1990
1991    #[test]
1992    fn verify_empty_url_returns_error() {
1993        // Empty URL should trigger connection error handling
1994        let rt = tokio::runtime::Runtime::new().unwrap();
1995        rt.block_on(async {
1996            let client = Client::new();
1997            let spec = keyhog_core::VerifySpec {
1998                method: HttpMethod::Get,
1999                url: "".to_string(),
2000                auth: AuthSpec::None,
2001                headers: vec![],
2002                body: None,
2003                success: SuccessSpec {
2004                    status: Some(200),
2005                    status_not: None,
2006                    body_contains: None,
2007                    body_not_contains: None,
2008                    json_path: None,
2009                    equals: None,
2010                },
2011                metadata: vec![],
2012                timeout_ms: Some(1000),
2013            };
2014
2015            let verification =
2016                verify_credential(&client, &spec, "test", None, Duration::from_secs(1))
2017                    .await
2018                    .result;
2019            assert!(matches!(verification, VerificationResult::Error(_)));
2020        });
2021    }
2022
2023    #[test]
2024    fn verify_missing_verify_spec_returns_unverifiable() {
2025        let detector = DetectorSpec {
2026            id: "test".into(),
2027            name: "Test".into(),
2028            service: "test".into(),
2029            severity: Severity::Low,
2030            patterns: vec![],
2031            companion: None,
2032            verify: None, // Missing verify spec
2033            keywords: vec![],
2034        };
2035
2036        let engine = VerificationEngine::new(&[detector], VerifyConfig::default()).unwrap();
2037
2038        let rt = tokio::runtime::Runtime::new().unwrap();
2039        rt.block_on(async {
2040            let group = DedupedMatch {
2041                detector_id: "test".into(),
2042                detector_name: "Test".into(),
2043                service: "test".into(),
2044                severity: Severity::Low,
2045                credential: "test-cred".into(),
2046                companion: None,
2047                primary_location: MatchLocation {
2048                    source: "fs".into(),
2049                    file_path: Some("test.txt".into()),
2050                    line: Some(1),
2051                    offset: 0,
2052                    commit: None,
2053                    author: None,
2054                    date: None,
2055                },
2056                additional_locations: vec![],
2057                confidence: Some(0.5),
2058            };
2059
2060            let findings = engine.verify_all(vec![group]).await;
2061            assert_eq!(findings.len(), 1);
2062            assert!(matches!(
2063                findings[0].verification,
2064                VerificationResult::Unverifiable
2065            ));
2066        });
2067    }
2068
2069    #[test]
2070    fn success_body_contains_check() {
2071        let spec = SuccessSpec {
2072            status: Some(200),
2073            status_not: None,
2074            body_contains: Some("verified".into()),
2075            body_not_contains: None,
2076            json_path: None,
2077            equals: None,
2078        };
2079        assert!(evaluate_success(&spec, 200, r#"{"status": "verified"}"#));
2080        assert!(!evaluate_success(&spec, 200, r#"{"status": "pending"}"#));
2081    }
2082
2083    #[test]
2084    fn success_body_not_contains_check() {
2085        let spec = SuccessSpec {
2086            status: Some(200),
2087            status_not: None,
2088            body_contains: None,
2089            body_not_contains: Some("error".into()),
2090            json_path: None,
2091            equals: None,
2092        };
2093        assert!(evaluate_success(&spec, 200, r#"{"ok": true}"#));
2094        assert!(!evaluate_success(&spec, 200, r#"{"error": "failed"}"#));
2095    }
2096
2097    // =========================================================================
2098    // Verification Edge Cases
2099    // =========================================================================
2100
2101    #[test]
2102    fn verify_url_exactly_8kb_max_length() {
2103        // URL exactly 8KB (8192 bytes) should be valid for interpolation
2104        let long_path = "a".repeat(8192 - "https://api.example.com/".len());
2105        let url = format!("https://api.example.com/{}", long_path);
2106        assert_eq!(url.len(), 8192);
2107
2108        // Interpolation should handle this without issues
2109        let interpolated_url = interpolate(&url, "test-cred", None);
2110        assert_eq!(interpolated_url.len(), 8192);
2111        assert!(interpolated_url.starts_with("https://api.example.com/"));
2112    }
2113
2114    #[test]
2115    fn credential_10kb_long() {
2116        // Credential that is 10KB long should be handled properly
2117        let long_credential = "x".repeat(10240);
2118        assert_eq!(long_credential.len(), 10240);
2119
2120        // Interpolation with exact template should return credential unchanged
2121        let interpolated_credential = interpolate("{{match}}", &long_credential, None);
2122        assert_eq!(interpolated_credential.len(), 10240);
2123        assert_eq!(interpolated_credential, long_credential);
2124
2125        // URL interpolation should URL-encode it
2126        let url_result = interpolate(
2127            "https://api.example.com/?key={{match}}",
2128            &long_credential,
2129            None,
2130        );
2131        assert!(url_result.contains("xxxxxxxxxx"));
2132    }
2133
2134    #[test]
2135    fn credential_all_printable_ascii() {
2136        // Credential containing every printable ASCII character (32-126)
2137        let all_ascii: String = (32..=126).map(|c| c as u8 as char).collect();
2138        assert_eq!(all_ascii.len(), 95);
2139
2140        // Test interpolation doesn't corrupt special characters when used as literal
2141        let interpolated_credential = interpolate("{{match}}", &all_ascii, None);
2142        assert_eq!(interpolated_credential, all_ascii);
2143
2144        // URL encoding should handle all special characters
2145        let url_result = interpolate("https://api.example.com/{{match}}", &all_ascii, None);
2146        // All non-alphanumeric characters should be percent-encoded
2147        assert!(url_result.starts_with("https://api.example.com/"));
2148    }
2149
2150    #[test]
2151    fn companion_identical_to_primary_credential() {
2152        // Companion that is identical to the primary credential
2153        let credential = "SAME_CREDENTIAL_12345";
2154
2155        let interpolated_credential = interpolate("{{match}}", credential, Some(credential));
2156        assert_eq!(interpolated_credential, credential);
2157
2158        // Test with companion template
2159        let comp_result = interpolate("{{companion.secret}}", credential, Some(credential));
2160        assert_eq!(comp_result, credential);
2161
2162        // URL interpolation with both
2163        let url_result = interpolate(
2164            "https://api.example.com/?primary={{match}}&companion={{companion.secret}}",
2165            credential,
2166            Some(credential),
2167        );
2168        // Both should be URL-encoded when embedded
2169        assert!(url_result.contains("primary="));
2170        assert!(url_result.contains("companion="));
2171    }
2172
2173    #[test]
2174    fn verify_spec_json_path_with_dots_in_field_names() {
2175        // JSON path containing dots in field names (needs proper escaping handling)
2176        // Note: json_pointer_get uses dot-separated paths, so field names with dots
2177        // are not directly supported - this tests the current behavior
2178        let document: serde_json::Value =
2179            serde_json::from_str(r#"{"field.with.dots": {"nested.key": "value"}}"#).unwrap();
2180        assert!(json_pointer_get(&document, "field.with.dots").is_none());
2181
2182        // Normal nested access works fine
2183        let normal_val: serde_json::Value =
2184            serde_json::from_str(r#"{"data": {"user.name": "alice"}}"#).unwrap();
2185        assert_eq!(
2186            json_pointer_get(&normal_val, "data"),
2187            Some(&serde_json::Value::Object(
2188                [(
2189                    "user.name".to_string(),
2190                    serde_json::Value::String("alice".into())
2191                )]
2192                .into_iter()
2193                .collect()
2194            ))
2195        );
2196    }
2197
2198    #[test]
2199    fn success_body_contains_matches_credential_itself() {
2200        // When body_contains pattern is the credential itself
2201        let credential = "sk_test_4242424242424242";
2202        let body = format!(r#"{{"token": "{}", "valid": true}}"#, credential);
2203
2204        let spec = SuccessSpec {
2205            status: Some(200),
2206            status_not: None,
2207            body_contains: Some(credential.into()),
2208            body_not_contains: None,
2209            json_path: None,
2210            equals: None,
2211        };
2212
2213        assert!(evaluate_success(&spec, 200, &body));
2214
2215        // Should fail if credential not in body
2216        let wrong_body = r#"{"token": "other", "valid": true}"#;
2217        assert!(!evaluate_success(&spec, 200, wrong_body));
2218    }
2219
2220    #[tokio::test]
2221    async fn consecutive_verifications_cache_poisoning_protection() {
2222        use std::sync::atomic::{AtomicUsize, Ordering};
2223
2224        let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
2225        let addr = listener.local_addr().unwrap();
2226        let request_count = Arc::new(AtomicUsize::new(0));
2227        let count_clone = request_count.clone();
2228
2229        tokio::spawn(async move {
2230            loop {
2231                let Ok((mut stream, _)) = listener.accept().await else {
2232                    break;
2233                };
2234                let count = count_clone.clone();
2235                tokio::spawn(async move {
2236                    let mut buf = [0u8; 1024];
2237                    let _ = stream.read(&mut buf).await;
2238                    count.fetch_add(1, Ordering::SeqCst);
2239                    let _ = stream
2240                        .write_all(
2241                            b"HTTP/1.1 200 OK\r\nContent-Length: 13\r\n\r\n{\"valid\": true}",
2242                        )
2243                        .await;
2244                });
2245            }
2246        });
2247
2248        let detector = DetectorSpec {
2249            id: "cache-test".into(),
2250            name: "Cache Test".into(),
2251            service: "cache-service".into(),
2252            severity: Severity::High,
2253            patterns: vec![],
2254            companion: None,
2255            verify: Some(keyhog_core::VerifySpec {
2256                method: HttpMethod::Get,
2257                url: format!("http://127.0.0.1:{}/verify", addr.port()),
2258                auth: AuthSpec::None,
2259                headers: vec![],
2260                body: None,
2261                success: SuccessSpec {
2262                    status: Some(200),
2263                    status_not: None,
2264                    body_contains: None,
2265                    body_not_contains: None,
2266                    json_path: None,
2267                    equals: None,
2268                },
2269                metadata: vec![],
2270                timeout_ms: None,
2271            }),
2272            keywords: vec![],
2273        };
2274
2275        let engine = VerificationEngine::new(
2276            &[detector],
2277            VerifyConfig {
2278                timeout: Duration::from_secs(1),
2279                max_concurrent_per_service: 50,
2280                max_concurrent_global: 50,
2281                ..Default::default()
2282            },
2283        )
2284        .unwrap();
2285
2286        let make_match = |cred: &str| RawMatch {
2287            detector_id: "cache-test".into(),
2288            detector_name: "Cache Test".into(),
2289            service: "cache-service".into(),
2290            severity: Severity::High,
2291            credential: cred.into(),
2292            companion: None,
2293            location: MatchLocation {
2294                source: "fs".into(),
2295                file_path: Some("test.txt".into()),
2296                line: Some(1),
2297                offset: 0,
2298                commit: None,
2299                author: None,
2300                date: None,
2301            },
2302            entropy: None,
2303            confidence: Some(0.9),
2304        };
2305
2306        // First verification with credential A
2307        let group_a = dedup_matches(vec![make_match("cred-a")]).pop().unwrap();
2308        let findings_a = engine.verify_all(vec![group_a.clone()]).await;
2309        assert_eq!(findings_a.len(), 1);
2310
2311        // Second verification with same credential A (should use cache)
2312        let findings_a2 = engine.verify_all(vec![group_a.clone()]).await;
2313        assert_eq!(findings_a2.len(), 1);
2314
2315        // Both results should be identical (cache hit)
2316        assert_eq!(
2317            std::mem::discriminant(&findings_a[0].verification),
2318            std::mem::discriminant(&findings_a2[0].verification)
2319        );
2320
2321        // Different credential B should be independent
2322        let group_b = dedup_matches(vec![make_match("cred-b")]).pop().unwrap();
2323        let findings_b = engine.verify_all(vec![group_b]).await;
2324        assert_eq!(findings_b.len(), 1);
2325
2326        // Cache should not have cross-contaminated results
2327        assert!(matches!(
2328            findings_a[0].verification,
2329            VerificationResult::Live | VerificationResult::Dead | VerificationResult::Error(_)
2330        ));
2331    }
2332
2333    #[test]
2334    fn verify_with_delete_method() {
2335        // Verify that DELETE method is properly supported
2336        let rt = tokio::runtime::Runtime::new().unwrap();
2337        rt.block_on(async {
2338            let client = Client::new();
2339
2340            // Build a DELETE request - should not panic
2341            let request = request_for_method(
2342                &client,
2343                &HttpMethod::Delete,
2344                reqwest::Url::parse("https://example.com/resource/123").unwrap(),
2345            );
2346
2347            // The request builder should be functional (we can't actually send without a server)
2348            let _ = request;
2349        });
2350    }
2351
2352    #[test]
2353    fn verify_url_with_ipv6_literal() {
2354        // URL with IPv6 literal address should be properly handled
2355        let ipv6_urls = vec![
2356            "http://[::1]:8080/api",
2357            "https://[2001:db8::1]/verify",
2358            "http://[fe80::1]:3000/check",
2359        ];
2360
2361        for url in ipv6_urls {
2362            // parse_url_host should extract the host correctly
2363            let host = parse_url_host(url);
2364            assert!(host.is_some(), "Failed to parse host for: {}", url);
2365
2366            let host_str = host.unwrap();
2367            // IPv6 addresses should be handled (without brackets after parsing)
2368            assert!(
2369                host_str.contains(':')
2370                    || host_str == "::1"
2371                    || host_str.starts_with("fe80")
2372                    || host_str.starts_with("2001"),
2373                "Unexpected host for {}: {}",
2374                url,
2375                host_str
2376            );
2377        }
2378
2379        // IPv6 loopback should be blocked as private
2380        assert!(is_private_url("http://[::1]/api"));
2381        assert!(is_private_url("http://[::1]:8080/verify"));
2382
2383        // IPv6 ULA should be blocked as private
2384        assert!(is_private_url("http://[fd00::1]/api"));
2385
2386        // IPv6 link-local should be blocked as private
2387        assert!(is_private_url("http://[fe80::1]/api"));
2388        assert!(is_private_url("http://[fe80::1]:3000/check"));
2389    }
2390
2391    #[test]
2392    fn body_valid_jsonl_multiple_objects() {
2393        // Body with JSONL format (multiple JSON objects, one per line)
2394        let jsonl_body = r#"{"id": 1, "valid": true}
2395{"id": 2, "valid": false}
2396{"id": 3, "valid": true}"#;
2397
2398        // body_indicates_error should handle JSONL gracefully
2399        // It looks for error indicators as JSON keys
2400        assert!(!body_indicates_error(jsonl_body));
2401
2402        // Success spec with body_contains should work on the entire body
2403        let spec = SuccessSpec {
2404            status: Some(200),
2405            status_not: None,
2406            body_contains: Some("\"valid\": true".into()),
2407            body_not_contains: None,
2408            json_path: None,
2409            equals: None,
2410        };
2411
2412        assert!(evaluate_success(&spec, 200, jsonl_body));
2413
2414        // Should fail if pattern not present
2415        let spec_missing = SuccessSpec {
2416            status: Some(200),
2417            status_not: None,
2418            body_contains: Some("not_found".into()),
2419            body_not_contains: None,
2420            json_path: None,
2421            equals: None,
2422        };
2423        assert!(!evaluate_success(&spec_missing, 200, jsonl_body));
2424
2425        // JSON path won't work because the body as a whole is not valid JSON
2426        let spec_json = SuccessSpec {
2427            status: Some(200),
2428            status_not: None,
2429            body_contains: None,
2430            body_not_contains: None,
2431            json_path: Some("id".into()),
2432            equals: None,
2433        };
2434        assert!(!evaluate_success(&spec_json, 200, jsonl_body));
2435    }
2436}