Skip to main content

keyhog_verifier/
verify.rs

1//! Verification execution logic.
2//!
3//! Verification is explicitly opt-in via the `--verify` CLI flag.
4//! Security invariants for this module:
5//! - Credentials are never stored permanently. They are only used in-memory for the current run.
6//! - HTTPS only. TLS certificate validation stays enabled for every request.
7//! - Private IPs and private DNS resolutions are blocked to reduce SSRF risk.
8//! - Redirects are not followed.
9//! - Response bodies are capped at 1 MB.
10
11use std::collections::HashMap;
12use std::net::{IpAddr, SocketAddr};
13use std::sync::Arc;
14use std::time::Duration;
15
16use dashmap::DashMap;
17use keyhog_core::{
18    AuthSpec, DetectorSpec, HttpMethod, MetadataSpec, SuccessSpec, VerificationResult,
19    VerifiedFinding,
20};
21use reqwest::Client;
22use tokio::sync::Notify;
23use tokio::task::JoinSet;
24
25use crate::interpolate::{interpolate, resolve_field};
26use crate::ssrf::{is_private_ip, is_private_ipv4, is_private_url, parse_numeric_ipv4_host};
27use crate::{DedupedMatch, VerificationEngine, VerifyConfig, VerifyError, cache, into_finding};
28
29#[cfg(test)]
30use crate::{DedupScope, dedup_matches};
31#[cfg(test)]
32use crate::ssrf::parse_url_host;
33#[cfg(test)]
34use keyhog_core::{MatchLocation, RawMatch};
35use tokio::sync::Semaphore;
36
37const DEFAULT_SERVICE_CONCURRENCY: usize = 5;
38const MAX_VERIFY_ATTEMPTS: usize = 3;
39const RETRY_DELAY_MS: u64 = 500;
40/// Maximum response body size to read during verification (1 MB).
41/// Prevents OOM from malicious endpoints returning unbounded data.
42const MAX_RESPONSE_BODY_BYTES: usize = 1024 * 1024;
43const BODY_ERROR_MESSAGE: &str = "body read failed";
44const BODY_TOO_LARGE_ERROR: &str = "response body exceeds 1MB limit";
45const GENERIC_REQUEST_ERROR: &str = "request failed";
46const CONNECTION_FAILED_ERROR: &str = "connection failed";
47const TOO_MANY_REDIRECTS_ERROR: &str = "too many redirects";
48const TIMEOUT_ERROR: &str = "timeout";
49const PRIVATE_URL_ERROR: &str = "blocked: private URL";
50const HTTPS_ONLY_ERROR: &str = "blocked: HTTPS only";
51const MAX_RETRIES_EXCEEDED_ERROR: &str = "max retries exceeded";
52const AWS_VALID_ACCESS_KEY_PREFIXES: &[&str] = &["AKIA", "ASIA", "AROA", "AIDA", "AGPA"];
53const AWS_ACCESS_KEY_LEN: usize = 20;
54const AWS_MIN_SECRET_KEY_LEN: usize = 40;
55
56impl VerificationEngine {
57    /// Create a verifier with shared HTTP client, cache, and concurrency controls.
58    ///
59    /// # Examples
60    ///
61    /// ```rust
62    /// use keyhog_core::{DetectorSpec, PatternSpec, Severity};
63    /// use keyhog_verifier::{VerificationEngine, VerifyConfig};
64    ///
65    /// let engine = VerificationEngine::new(
66    ///     &[DetectorSpec {
67    ///         id: "demo-token".into(),
68    ///         name: "Demo Token".into(),
69    ///         service: "demo".into(),
70    ///         severity: Severity::High,
71    ///         patterns: vec![PatternSpec {
72    ///             regex: "demo_[A-Z0-9]{8}".into(),
73    ///             description: None,
74    ///             group: None,
75    ///         }],
76    ///         companion: None,
77    ///         verify: None,
78    ///         keywords: vec!["demo_".into()],
79    ///     }],
80    ///     VerifyConfig::default(),
81    /// )
82    /// .unwrap();
83    ///
84    /// let _ = engine;
85    /// ```
86    pub fn new(detectors: &[DetectorSpec], config: VerifyConfig) -> Result<Self, VerifyError> {
87        let client = Client::builder()
88            .timeout(config.timeout)
89            // SAFETY: verification traffic must keep certificate validation on.
90            .danger_accept_invalid_certs(false)
91            .redirect(reqwest::redirect::Policy::none())
92            .build()
93            .map_err(VerifyError::ClientBuild)?;
94
95        let detector_map: HashMap<String, DetectorSpec> = detectors
96            .iter()
97            .cloned()
98            .map(|d| (d.id.clone(), d))
99            .collect();
100
101        let mut service_semaphores = HashMap::new();
102        for d in detectors {
103            service_semaphores
104                .entry(d.service.clone())
105                .or_insert_with(|| Arc::new(Semaphore::new(config.max_concurrent_per_service)));
106        }
107
108        Ok(Self {
109            client,
110            detectors: detector_map,
111            service_semaphores,
112            global_semaphore: Arc::new(Semaphore::new(config.max_concurrent_global)),
113            timeout: config.timeout,
114            cache: Arc::new(cache::VerificationCache::default_ttl()),
115            inflight: Arc::new(DashMap::new()),
116            max_inflight_keys: config.max_inflight_keys,
117        })
118    }
119
120    /// Verify a batch of deduplicated raw matches in parallel.
121    /// Returns one `VerifiedFinding` per unique (detector_id, credential).
122    ///
123    /// # Examples
124    ///
125    /// ```rust,no_run
126    /// use keyhog_core::{DetectorSpec, MatchLocation, PatternSpec, RawMatch, Severity};
127    /// use keyhog_verifier::{DedupScope, VerificationEngine, VerifyConfig, dedup_matches};
128    ///
129    /// # async fn demo() {
130    /// let detector = DetectorSpec {
131    ///     id: "demo-token".into(),
132    ///     name: "Demo Token".into(),
133    ///     service: "demo".into(),
134    ///     severity: Severity::High,
135    ///     patterns: vec![PatternSpec {
136    ///         regex: "demo_[A-Z0-9]{8}".into(),
137    ///         description: None,
138    ///         group: None,
139    ///     }],
140    ///     companion: None,
141    ///     verify: None,
142    ///     keywords: vec!["demo_".into()],
143    /// };
144    /// let engine = VerificationEngine::new(&[detector], VerifyConfig::default()).unwrap();
145    /// let findings = engine
146    ///     .verify_all(dedup_matches(vec![RawMatch {
147    ///         detector_id: "demo-token".into(),
148    ///         detector_name: "Demo Token".into(),
149    ///         service: "demo".into(),
150    ///         severity: Severity::High,
151    ///         credential: "demo_ABC12345".into(),
152    ///         companion: None,
153    ///         location: MatchLocation {
154    ///             source: "filesystem".into(),
155    ///             file_path: Some(".env".into()),
156    ///             line: Some(1),
157    ///             offset: 0,
158    ///             commit: None,
159    ///             author: None,
160    ///             date: None,
161    ///         },
162    ///         entropy: None,
163    ///         confidence: None,
164    ///     }], &DedupScope::Credential))
165    ///     .await;
166    /// assert_eq!(findings.len(), 1);
167    /// # }
168    /// ```
169    pub async fn verify_all(&self, groups: Vec<DedupedMatch>) -> Vec<VerifiedFinding> {
170        let max_active = self.global_semaphore.available_permits().max(1);
171        let total = groups.len();
172        let shared = VerifyTaskShared {
173            global_semaphore: self.global_semaphore.clone(),
174            service_semaphores: self.service_semaphores.clone(),
175            client: self.client.clone(),
176            detectors: self.detectors.clone(),
177            timeout: self.timeout,
178            cache: self.cache.clone(),
179            inflight: self.inflight.clone(),
180            max_inflight_keys: self.max_inflight_keys,
181        };
182        let mut pending = groups.into_iter();
183        let mut join_set = JoinSet::new();
184
185        while join_set.len() < max_active {
186            let Some(group) = pending.next() else {
187                break;
188            };
189            join_set.spawn(verify_group_task(shared.clone(), group));
190        }
191
192        let mut findings = Vec::with_capacity(total);
193        while let Some(result) = join_set.join_next().await {
194            match result {
195                Ok(finding) => findings.push(finding),
196                Err(e) => tracing::error!("verification task panicked: {}", e),
197            }
198
199            if let Some(group) = pending.next() {
200                join_set.spawn(verify_group_task(shared.clone(), group));
201            }
202        }
203        findings
204    }
205}
206
207#[derive(Clone)]
208struct VerifyTaskShared {
209    global_semaphore: Arc<Semaphore>,
210    service_semaphores: HashMap<String, Arc<Semaphore>>,
211    client: Client,
212    detectors: HashMap<String, DetectorSpec>,
213    timeout: Duration,
214    cache: Arc<cache::VerificationCache>,
215    inflight: Arc<DashMap<(String, String), Arc<Notify>>>,
216    max_inflight_keys: usize,
217}
218
219async fn verify_group_task(shared: VerifyTaskShared, group: DedupedMatch) -> VerifiedFinding {
220    let global = shared.global_semaphore;
221    let service_sem = shared
222        .service_semaphores
223        .get(&group.service)
224        .cloned()
225        .unwrap_or_else(|| Arc::new(Semaphore::new(DEFAULT_SERVICE_CONCURRENCY)));
226    let client = shared.client;
227    let detector = shared.detectors.get(&group.detector_id).cloned();
228    let timeout = shared.timeout;
229
230    let cache = shared.cache;
231    let inflight = shared.inflight;
232    let max_inflight_keys = shared.max_inflight_keys;
233
234    let Ok(_global_permit) = global.acquire().await else {
235        return into_finding(
236            group,
237            VerificationResult::Error("semaphore closed".into()),
238            HashMap::new(),
239        );
240    };
241    let Ok(_service_permit) = service_sem.acquire().await else {
242        return into_finding(
243            group,
244            VerificationResult::Error("service semaphore closed".into()),
245            HashMap::new(),
246        );
247    };
248
249    if let Some((cached_result, cached_meta)) = cache.get(&group.credential, &group.detector_id) {
250        return into_finding(group, cached_result, cached_meta);
251    }
252
253    let inflight_guard = if inflight.len() >= max_inflight_keys {
254        None
255    } else {
256        let inflight_key = (group.detector_id.clone(), group.credential.clone());
257        loop {
258            if let Some((cached_result, cached_meta)) =
259                cache.get(&group.credential, &group.detector_id)
260            {
261                return into_finding(group, cached_result, cached_meta);
262            }
263
264            match inflight.entry(inflight_key.clone()) {
265                dashmap::mapref::entry::Entry::Occupied(entry) => {
266                    let notify = entry.get().clone();
267                    // SAFETY: lock ordering is one-way: task permits
268                    // (global, then service) are acquired before touching
269                    // inflight, and the DashMap entry guard is dropped before
270                    // await.
271                    // CRITICAL: We MUST create the `Notified` future before dropping `entry`.
272                    // This registers our interest synchronously. If we drop `entry` first,
273                    // the verifying task could remove it and call `notify_waiters()` before
274                    // we create the future, causing a permanent hang (lost wake-up).
275                    let fut = notify.notified();
276                    drop(entry);
277                    fut.await;
278                }
279                dashmap::mapref::entry::Entry::Vacant(entry) => {
280                    let notify = Arc::new(Notify::new());
281                    entry.insert(notify.clone());
282                    break Some(InflightGuard {
283                        key: inflight_key,
284                        inflight: inflight.clone(),
285                        notify,
286                    });
287                }
288            }
289        }
290    };
291    let _inflight_guard = inflight_guard;
292
293    let (verification, metadata) = match &detector {
294        Some(det) => match &det.verify {
295            Some(verify_spec) => {
296                verify_with_retry(
297                    &client,
298                    verify_spec,
299                    &group.credential,
300                    group.companion.as_deref(),
301                    timeout,
302                )
303                .await
304            }
305            None => (VerificationResult::Unverifiable, HashMap::new()),
306        },
307        None => (VerificationResult::Unverifiable, HashMap::new()),
308    };
309
310    cache.put(
311        &group.credential,
312        &group.detector_id,
313        verification.clone(),
314        metadata.clone(),
315    );
316
317    into_finding(group, verification, metadata)
318}
319
320struct InflightGuard {
321    key: (String, String),
322    inflight: Arc<DashMap<(String, String), Arc<Notify>>>,
323    notify: Arc<Notify>,
324}
325
326impl Drop for InflightGuard {
327    fn drop(&mut self) {
328        // SAFETY: cleanup follows the same ordering guarantee as verify_all:
329        // remove the inflight marker without holding any other map guard, then
330        // notify waiters. There is no second lock acquired while this guard is
331        // dropped, so the owner cannot deadlock with waiting tasks.
332        self.inflight.remove(&self.key);
333        self.notify.notify_waiters();
334    }
335}
336
337/// Perform verification with retry logic for transient failures.
338async fn verify_with_retry(
339    client: &Client,
340    spec: &keyhog_core::VerifySpec,
341    credential: &str,
342    companion: Option<&str>,
343    timeout: Duration,
344) -> (VerificationResult, HashMap<String, String>) {
345    for attempt in 0..MAX_VERIFY_ATTEMPTS {
346        let VerificationAttempt {
347            result,
348            metadata,
349            transient,
350        } = verify_credential(client, spec, credential, companion, timeout).await;
351        if transient && attempt + 1 < MAX_VERIFY_ATTEMPTS {
352            let delay_ms = RETRY_DELAY_MS * (attempt as u64 + 1);
353            tokio::time::sleep(Duration::from_millis(delay_ms)).await;
354            continue;
355        }
356        return (result, metadata);
357    }
358    (
359        VerificationResult::Error(MAX_RETRIES_EXCEEDED_ERROR.into()),
360        HashMap::new(),
361    )
362}
363
364struct VerificationAttempt {
365    result: VerificationResult,
366    metadata: HashMap<String, String>,
367    transient: bool,
368}
369
370#[derive(Debug)]
371struct ResolvedTarget {
372    client: Client,
373    url: reqwest::Url,
374}
375
376/// Perform one verification HTTP call for a credential.
377async fn verify_credential(
378    client: &Client,
379    spec: &keyhog_core::VerifySpec,
380    credential: &str,
381    companion: Option<&str>,
382    timeout: Duration,
383) -> VerificationAttempt {
384    let timeout = verification_timeout(spec, timeout);
385    let raw_url = interpolate(&spec.url, credential, companion);
386    let resolved_target = match resolved_client_for_url(client, &raw_url, timeout).await {
387        Ok(resolved_target) => resolved_target,
388        Err(result) => {
389            return VerificationAttempt {
390                result,
391                metadata: HashMap::new(),
392                transient: false,
393            };
394        }
395    };
396
397    // SSRF protection: block verification against private/internal IPs.
398    if is_private_url(resolved_target.url.as_str()) {
399        return VerificationAttempt {
400            result: VerificationResult::Error(PRIVATE_URL_ERROR.into()),
401            metadata: HashMap::new(),
402            transient: false,
403        };
404    }
405
406    let base_request = build_request(
407        &resolved_target.client,
408        spec,
409        resolved_target.url.clone(),
410        credential,
411        companion,
412        timeout,
413    )
414    .await;
415    let mut request = match base_request {
416        RequestBuildResult::Ready(request) => request,
417        RequestBuildResult::Final(result, metadata) => {
418            return VerificationAttempt {
419                result,
420                metadata,
421                transient: false,
422            };
423        }
424    };
425
426    // Apply additional headers.
427    for header in &spec.headers {
428        let value = interpolate(&header.value, credential, companion);
429        request = request.header(&header.name, &value);
430    }
431
432    // Apply body.
433    if let Some(body_template) = &spec.body {
434        let body = interpolate(body_template, credential, companion);
435        request = request.body(body);
436    }
437
438    // Execute.
439    let response = match execute_request(request).await {
440        Ok(resp) => resp,
441        Err(error) => {
442            return VerificationAttempt {
443                result: error.result,
444                metadata: HashMap::new(),
445                transient: error.transient,
446            };
447        }
448    };
449
450    let status = response.status().as_u16();
451    let body = match read_response_body(response).await {
452        Ok(body) => body,
453        Err(error) => {
454            return VerificationAttempt {
455                result: error.result,
456                metadata: HashMap::new(),
457                transient: error.transient,
458            };
459        }
460    };
461
462    // Evaluate success condition.
463    let is_live = evaluate_success(&spec.success, status, &body);
464
465    let is_actually_live = is_live && !body_indicates_error(&body);
466
467    let metadata = extract_metadata(&spec.metadata, &body);
468
469    let verification_result = if is_actually_live {
470        VerificationResult::Live
471    } else if status == 429 {
472        VerificationResult::RateLimited
473    } else {
474        VerificationResult::Dead
475    };
476
477    VerificationAttempt {
478        result: verification_result,
479        metadata,
480        transient: false,
481    }
482}
483
484fn verification_timeout(spec: &keyhog_core::VerifySpec, default_timeout: Duration) -> Duration {
485    spec.timeout_ms
486        .map(Duration::from_millis)
487        .unwrap_or(default_timeout)
488}
489
490async fn resolved_client_for_url(
491    client: &Client,
492    url: &str,
493    timeout: Duration,
494) -> Result<ResolvedTarget, VerificationResult> {
495    if is_private_url(url) {
496        return Err(VerificationResult::Error(PRIVATE_URL_ERROR.into()));
497    }
498    let parsed = reqwest::Url::parse(url)
499        .map_err(|_| VerificationResult::Error(GENERIC_REQUEST_ERROR.into()))?;
500    if parsed.scheme() != "https" {
501        return Err(VerificationResult::Error(HTTPS_ONLY_ERROR.into()));
502    }
503    let Some(host) = parsed.host_str() else {
504        return Err(VerificationResult::Error(GENERIC_REQUEST_ERROR.into()));
505    };
506    if let Ok(ip) = host.parse::<IpAddr>() {
507        if is_private_ip(ip) {
508            return Err(VerificationResult::Error(PRIVATE_URL_ERROR.into()));
509        }
510        return Ok(ResolvedTarget {
511            client: client.clone(),
512            url: parsed,
513        });
514    }
515    if let Some(ip) = parse_numeric_ipv4_host(host) {
516        if is_private_ipv4(ip) {
517            return Err(VerificationResult::Error(PRIVATE_URL_ERROR.into()));
518        }
519        return Ok(ResolvedTarget {
520            client: client.clone(),
521            url: parsed,
522        });
523    }
524
525    let port = parsed.port_or_known_default().unwrap_or(443);
526    let addrs = tokio::time::timeout(timeout, tokio::net::lookup_host((host, port)))
527        .await
528        .map_err(|_| VerificationResult::Error(TIMEOUT_ERROR.into()))?
529        .map_err(|_| VerificationResult::Error(CONNECTION_FAILED_ERROR.into()))?
530        .collect::<Vec<SocketAddr>>();
531    if addrs.is_empty() || addrs.iter().any(|addr| is_private_ip(addr.ip())) {
532        return Err(VerificationResult::Error(PRIVATE_URL_ERROR.into()));
533    }
534    let pinned_addrs = addrs
535        .into_iter()
536        .map(|addr| SocketAddr::new(addr.ip(), port))
537        .collect::<Vec<_>>();
538
539    let resolved_client = reqwest::Client::builder()
540        .timeout(timeout)
541        .danger_accept_invalid_certs(false)
542        .redirect(reqwest::redirect::Policy::none())
543        // SAFETY: this dedicated client is paired with the already-parsed URL
544        // below and only ever resolves `host` to the vetted address set from
545        // this function, so reqwest cannot perform a fresh DNS lookup later.
546        .resolve_to_addrs(host, &pinned_addrs)
547        .build()
548        .map_err(|_| VerificationResult::Error(GENERIC_REQUEST_ERROR.into()))?;
549
550    Ok(ResolvedTarget {
551        client: resolved_client,
552        url: parsed,
553    })
554}
555
556enum RequestBuildResult {
557    Ready(reqwest::RequestBuilder),
558    Final(VerificationResult, HashMap<String, String>),
559}
560
561async fn build_request(
562    client: &Client,
563    spec: &keyhog_core::VerifySpec,
564    url: reqwest::Url,
565    credential: &str,
566    companion: Option<&str>,
567    timeout: Duration,
568) -> RequestBuildResult {
569    let request = request_for_method(client, &spec.method, url).timeout(timeout);
570    apply_auth(request, &spec.auth, credential, companion, timeout, client).await
571}
572
573fn request_for_method(
574    client: &Client,
575    method: &HttpMethod,
576    url: reqwest::Url,
577) -> reqwest::RequestBuilder {
578    match method {
579        HttpMethod::Get => client.get(url),
580        HttpMethod::Post => client.post(url),
581        HttpMethod::Put => client.put(url),
582        HttpMethod::Delete => client.delete(url),
583        HttpMethod::Head => client.head(url),
584        HttpMethod::Patch => client.patch(url),
585    }
586}
587
588async fn apply_auth(
589    request: reqwest::RequestBuilder,
590    auth: &AuthSpec,
591    credential: &str,
592    companion: Option<&str>,
593    timeout: Duration,
594    client: &Client,
595) -> RequestBuildResult {
596    match auth {
597        AuthSpec::None => RequestBuildResult::Ready(request),
598        AuthSpec::Bearer { field } => {
599            let token = resolve_field(field, credential, companion);
600            RequestBuildResult::Ready(request.bearer_auth(&token))
601        }
602        AuthSpec::Basic { username, password } => {
603            let user = resolve_field(username, credential, companion);
604            let pass = resolve_field(password, credential, companion);
605            RequestBuildResult::Ready(request.basic_auth(&user, Some(&pass)))
606        }
607        AuthSpec::Header { name, template } => {
608            let value = interpolate(template, credential, companion);
609            RequestBuildResult::Ready(request.header(name, &value))
610        }
611        AuthSpec::Query { param, field } => {
612            let value = resolve_field(field, credential, companion);
613            RequestBuildResult::Ready(request.query(&[(param.as_str(), value.as_str())]))
614        }
615        AuthSpec::AwsV4 {
616            access_key,
617            secret_key,
618            region,
619            ..
620        } => {
621            build_aws_probe(
622                access_key, secret_key, region, credential, companion, timeout, client,
623            )
624            .await
625        }
626    }
627}
628
629/// Build and execute an AWS SigV4-signed `GetCallerIdentity` request.
630///
631/// This performs real authentication against the AWS STS endpoint:
632/// - Constructs a canonical request per AWS Signature Version 4
633/// - Signs with the provided secret key using HMAC-SHA256
634/// - Returns `Live` if STS responds 200, `Dead` on 403
635///
636/// # Security
637///
638/// - Only contacts `sts.<region>.amazonaws.com` over HTTPS
639/// - The secret key is used only for HMAC signing and never transmitted
640/// - No data mutation: `GetCallerIdentity` is a read-only STS action
641async fn build_aws_probe(
642    access_key: &str,
643    secret_key: &str,
644    region: &str,
645    credential: &str,
646    companion: Option<&str>,
647    timeout: Duration,
648    client: &Client,
649) -> RequestBuildResult {
650    let access_key = resolve_field(access_key, credential, companion);
651    let secret_key = resolve_field(secret_key, credential, companion);
652
653    if secret_key.is_empty() {
654        return RequestBuildResult::Final(VerificationResult::Unverifiable, HashMap::new());
655    }
656
657    if !valid_aws_format(&access_key, &secret_key) {
658        return RequestBuildResult::Final(
659            VerificationResult::Dead,
660            HashMap::from([("format_valid".into(), "false".into())]),
661        );
662    }
663
664    let host = format!("sts.{region}.amazonaws.com");
665    let url = format!("https://{host}/");
666    let body = "Action=GetCallerIdentity&Version=2011-06-15";
667
668    // Build SigV4 signed request
669    match build_sigv4_request(
670        client,
671        &url,
672        &host,
673        body,
674        &access_key,
675        &secret_key,
676        region,
677        "sts",
678        timeout,
679    )
680    .await
681    {
682        Ok((result, metadata)) => RequestBuildResult::Final(result, metadata),
683        Err(error_msg) => RequestBuildResult::Final(
684            VerificationResult::Error(error_msg),
685            HashMap::from([("format_valid".into(), "true".into())]),
686        ),
687    }
688}
689
690/// Construct and send an AWS SigV4-signed HTTP POST request.
691#[allow(clippy::too_many_arguments)]
692async fn build_sigv4_request(
693    client: &Client,
694    url: &str,
695    host: &str,
696    body: &str,
697    access_key: &str,
698    secret_key: &str,
699    region: &str,
700    service: &str,
701    timeout: Duration,
702) -> Result<(VerificationResult, HashMap<String, String>), String> {
703    use hmac::{Hmac, Mac};
704    use sha2::Sha256;
705
706    let now = chrono_lite_now();
707    let datestamp = &now[..8]; // YYYYMMDD
708    let amz_date = &now; // YYYYMMDDTHHMMSSZ
709
710    // Step 1: Create canonical request
711    let payload_hash = hex_sha256(body.as_bytes());
712    let canonical_headers = format!(
713        "content-type:application/x-www-form-urlencoded\nhost:{host}\nx-amz-date:{amz_date}\n"
714    );
715    let signed_headers = "content-type;host;x-amz-date";
716    let canonical_request =
717        format!("POST\n/\n\n{canonical_headers}\n{signed_headers}\n{payload_hash}");
718
719    // Step 2: Create string to sign
720    let credential_scope = format!("{datestamp}/{region}/{service}/aws4_request");
721    let canonical_request_hash = hex_sha256(canonical_request.as_bytes());
722    let string_to_sign =
723        format!("AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{canonical_request_hash}");
724
725    // Step 3: Calculate signature
726    let signing_key = derive_signing_key(secret_key, datestamp, region, service);
727    let signature = {
728        let mut mac =
729            Hmac::<Sha256>::new_from_slice(&signing_key).map_err(|_| AWS_SIGNING_ERROR)?;
730        mac.update(string_to_sign.as_bytes());
731        hex::encode(mac.finalize().into_bytes())
732    };
733
734    // Step 4: Build Authorization header
735    let authorization = format!(
736        "AWS4-HMAC-SHA256 Credential={access_key}/{credential_scope}, SignedHeaders={signed_headers}, Signature={signature}"
737    );
738
739    // Send the signed request
740    let response = client
741        .post(url)
742        .timeout(timeout)
743        .header("Content-Type", "application/x-www-form-urlencoded")
744        .header("Host", host)
745        .header("X-Amz-Date", amz_date)
746        .header("Authorization", &authorization)
747        .body(body.to_string())
748        .send()
749        .await
750        .map_err(|_| AWS_STS_UNREACHABLE_ERROR)?;
751
752    let status = response.status().as_u16();
753    let response_body = response.text().await.unwrap_or_default();
754
755    let mut metadata = HashMap::from([("format_valid".into(), "true".into())]);
756
757    let result = match status {
758        200 => {
759            // Parse GetCallerIdentity response for account info
760            if let Some(account) = extract_xml_field(&response_body, "Account") {
761                metadata.insert("aws_account".into(), account);
762            }
763            if let Some(arn) = extract_xml_field(&response_body, "Arn") {
764                metadata.insert("aws_arn".into(), arn);
765            }
766            VerificationResult::Live
767        }
768        403 => VerificationResult::Dead,
769        429 => VerificationResult::RateLimited,
770        _ => VerificationResult::Error(format!("unexpected STS response status: {status}")),
771    };
772
773    Ok((result, metadata))
774}
775
776/// Derive the SigV4 signing key: HMAC(HMAC(HMAC(HMAC("AWS4"+secret, date), region), service), "aws4_request")
777fn derive_signing_key(secret_key: &str, datestamp: &str, region: &str, service: &str) -> Vec<u8> {
778    let k_secret = format!("AWS4{secret_key}");
779    let k_date = hmac_sha256(k_secret.as_bytes(), datestamp.as_bytes());
780    let k_region = hmac_sha256(&k_date, region.as_bytes());
781    let k_service = hmac_sha256(&k_region, service.as_bytes());
782    hmac_sha256(&k_service, b"aws4_request")
783}
784
785/// Compute HMAC-SHA256.
786fn hmac_sha256(key: &[u8], data: &[u8]) -> Vec<u8> {
787    use hmac::{Hmac, Mac};
788    use sha2::Sha256;
789
790    let mut mac = Hmac::<Sha256>::new_from_slice(key).expect("HMAC accepts any key length");
791    mac.update(data);
792    mac.finalize().into_bytes().to_vec()
793}
794
795/// Compute hex-encoded SHA-256 digest.
796fn hex_sha256(data: &[u8]) -> String {
797    use sha2::{Digest, Sha256};
798    let hash = Sha256::digest(data);
799    hex::encode(hash)
800}
801
802/// Generate a UTC timestamp in AWS format: YYYYMMDDTHHMMSSZ.
803/// Avoids pulling in the `chrono` crate by using `SystemTime`.
804fn chrono_lite_now() -> String {
805    use std::time::SystemTime;
806    let now = SystemTime::now()
807        .duration_since(SystemTime::UNIX_EPOCH)
808        .expect("system clock is before epoch");
809    let secs = now.as_secs();
810    // Break epoch seconds into date/time components
811    let days = secs / 86400;
812    let time_of_day = secs % 86400;
813    let hours = time_of_day / 3600;
814    let minutes = (time_of_day % 3600) / 60;
815    let seconds = time_of_day % 60;
816
817    // Civil date from days since epoch (simplified Rata Die algorithm)
818    let (year, month, day) = civil_from_days(days as i64);
819    format!("{year:04}{month:02}{day:02}T{hours:02}{minutes:02}{seconds:02}Z")
820}
821
822/// Convert days since Unix epoch to (year, month, day).
823/// Algorithm from Howard Hinnant's date algorithms.
824fn civil_from_days(days: i64) -> (i32, u32, u32) {
825    let z = days + 719468;
826    let era = if z >= 0 { z } else { z - 146096 } / 146097;
827    let doe = (z - era * 146097) as u32;
828    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
829    let y = yoe as i64 + era * 400;
830    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
831    let mp = (5 * doy + 2) / 153;
832    let d = doy - (153 * mp + 2) / 5 + 1;
833    let m = if mp < 10 { mp + 3 } else { mp - 9 };
834    let y = if m <= 2 { y + 1 } else { y };
835    (y as i32, m, d)
836}
837
838/// Extract a simple XML field value: `<Tag>value</Tag>`.
839fn extract_xml_field(xml: &str, tag: &str) -> Option<String> {
840    let open = format!("<{tag}>");
841    let close = format!("</{tag}>");
842    let start = xml.find(&open)? + open.len();
843    let end = xml[start..].find(&close)? + start;
844    Some(xml[start..end].to_string())
845}
846
847/// Validate that the access key and secret key have valid AWS format.
848fn valid_aws_format(access_key: &str, secret_key: &str) -> bool {
849    AWS_VALID_ACCESS_KEY_PREFIXES
850        .iter()
851        .any(|prefix| access_key.starts_with(prefix))
852        && access_key.len() == AWS_ACCESS_KEY_LEN
853        && secret_key.len() >= AWS_MIN_SECRET_KEY_LEN
854        && secret_key
855            .chars()
856            .all(|c| c.is_ascii_alphanumeric() || matches!(c, '+' | '/' | '='))
857}
858
859const AWS_SIGNING_ERROR: &str = "failed to create HMAC signing key";
860const AWS_STS_UNREACHABLE_ERROR: &str = "aws sts endpoint unreachable";
861
862struct VerificationFailure {
863    result: VerificationResult,
864    transient: bool,
865}
866
867async fn execute_request(
868    request: reqwest::RequestBuilder,
869) -> Result<reqwest::Response, VerificationFailure> {
870    request.send().await.map_err(|error| VerificationFailure {
871        result: VerificationResult::Error(sanitize_request_error(&error).into()),
872        transient: error.is_timeout() || error.is_connect() || error.is_request(),
873    })
874}
875
876fn sanitize_request_error(error: &reqwest::Error) -> &'static str {
877    if error.is_timeout() {
878        TIMEOUT_ERROR
879    } else if error.is_connect() {
880        CONNECTION_FAILED_ERROR
881    } else if error.is_redirect() {
882        TOO_MANY_REDIRECTS_ERROR
883    } else {
884        GENERIC_REQUEST_ERROR
885    }
886}
887
888async fn read_response_body(response: reqwest::Response) -> Result<String, VerificationFailure> {
889    // First check: Content-Length header as a fast-path rejection. This header
890    // is optional and attacker-controlled, so it's only used to reject
891    // obviously-too-large responses without starting to stream.
892    let content_length = response.content_length().unwrap_or(0) as usize;
893    if content_length > MAX_RESPONSE_BODY_BYTES {
894        return Err(VerificationFailure {
895            result: VerificationResult::Error(BODY_TOO_LARGE_ERROR.into()),
896            transient: false,
897        });
898    }
899
900    // Stream the body in chunks, aborting early if the accumulated size exceeds
901    // the limit. This prevents OOM from malicious endpoints that send large
902    // bodies via chunked transfer encoding without a Content-Length header.
903    let mut accumulated = Vec::with_capacity(content_length.min(MAX_RESPONSE_BODY_BYTES));
904    let mut stream = response.bytes_stream();
905    use futures_util::StreamExt;
906    while let Some(chunk_result) = stream.next().await {
907        let chunk = chunk_result.map_err(|_| VerificationFailure {
908            result: VerificationResult::Error(BODY_ERROR_MESSAGE.into()),
909            transient: true,
910        })?;
911        if accumulated.len() + chunk.len() > MAX_RESPONSE_BODY_BYTES {
912            return Err(VerificationFailure {
913                result: VerificationResult::Error(BODY_TOO_LARGE_ERROR.into()),
914                transient: false,
915            });
916        }
917        accumulated.extend_from_slice(&chunk);
918    }
919
920    Ok(String::from_utf8(accumulated).unwrap_or_default())
921}
922
923/// Check if a response body contains error indicators despite a 200 status.
924/// Many APIs return 200 with error JSON instead of proper HTTP status codes.
925///
926/// Matches JSON key patterns like `"error":` or `"invalid_token":` to reduce
927/// false positives from values containing error-like words (e.g.,
928/// `"invalid_login_count": 0` should not trigger this).
929///
930/// `SUCCESS_OVERRIDES` are only considered when no explicit error key is found.
931/// This prevents responses like `{"ok":true, "error":"rate_limited"}` from
932/// being incorrectly treated as successful.
933fn body_indicates_error(body: &str) -> bool {
934    let lower = body.to_lowercase();
935    let has_error = ERROR_INDICATORS.iter().any(|indicator| {
936        lower.match_indices(indicator).any(|(pos, _)| {
937            let before = lower[..pos].trim_end();
938            let after = lower[pos + indicator.len()..].trim_start();
939            let valid_key_start =
940                before.is_empty() || before.ends_with('{') || before.ends_with(',');
941            valid_key_start && after.starts_with(':')
942        })
943    });
944
945    if !has_error {
946        return false;
947    }
948
949    // An explicit error key takes precedence over success overrides.
950    // APIs that return both `"ok":true` and `"error":"..."` should be
951    // treated as errors — the error field is more specific and the `ok`
952    // field often reflects request delivery, not auth success.
953    // However, `"error": null` is a common pattern meaning "no error"
954    // and should NOT trigger error detection.
955    let has_explicit_error_key = lower.match_indices("\"error\"").any(|(pos, _)| {
956        let after = lower[pos + "\"error\"".len()..].trim_start();
957        after.starts_with(':') && {
958            let value_start = after[1..].trim_start();
959            // "error": null means "no error" — don't treat as error
960            !value_start.starts_with("null")
961        }
962    });
963
964    if has_explicit_error_key {
965        return true;
966    }
967
968    !contains_any(&lower, SUCCESS_OVERRIDES)
969}
970
971/// Evaluate whether a verification response meets the success criteria.
972fn evaluate_success(spec: &SuccessSpec, status: u16, body: &str) -> bool {
973    if !status_matches(spec, status) || !body_matches(spec, body) {
974        return false;
975    }
976
977    if let Some(ref json_path) = spec.json_path {
978        let Ok(parsed) = serde_json::from_str::<serde_json::Value>(body) else {
979            return false;
980        };
981        return json_expectation_matches(spec, &parsed, json_path);
982    }
983    true
984}
985
986fn status_matches(spec: &SuccessSpec, status: u16) -> bool {
987    if let Some(expected_status) = spec.status
988        && status != expected_status
989    {
990        return false;
991    }
992
993    if let Some(not_status) = spec.status_not
994        && status == not_status
995    {
996        return false;
997    }
998
999    true
1000}
1001
1002fn body_matches(spec: &SuccessSpec, body: &str) -> bool {
1003    if let Some(ref needle) = spec.body_contains
1004        && !body.contains(needle)
1005    {
1006        return false;
1007    }
1008
1009    if let Some(ref needle) = spec.body_not_contains
1010        && body.contains(needle)
1011    {
1012        return false;
1013    }
1014
1015    true
1016}
1017
1018fn json_expectation_matches(
1019    spec: &SuccessSpec,
1020    parsed: &serde_json::Value,
1021    json_path: &str,
1022) -> bool {
1023    let value = json_pointer_get(parsed, json_path);
1024    match &spec.equals {
1025        Some(expected) => value.is_some_and(|actual| json_value_to_string(actual) == *expected),
1026        None => value.is_some(),
1027    }
1028}
1029
1030fn json_value_to_string(value: &serde_json::Value) -> String {
1031    match value {
1032        serde_json::Value::String(text) => text.clone(),
1033        serde_json::Value::Bool(boolean) => boolean.to_string(),
1034        serde_json::Value::Number(number) => number.to_string(),
1035        other => other.to_string(),
1036    }
1037}
1038
1039fn contains_any(haystack: &str, needles: &[&str]) -> bool {
1040    needles.iter().any(|needle| haystack.contains(needle))
1041}
1042
1043const ERROR_INDICATORS: &[&str] = &[
1044    "\"error\"",
1045    "\"unauthorized\"",
1046    "\"forbidden\"",
1047    "\"invalid\"",
1048    "\"invalid_token\"",
1049    "\"invalid_key\"",
1050    "\"invalid_api_key\"",
1051    "\"authentication_error\"",
1052    "\"auth_error\"",
1053    "\"unauthenticated\"",
1054    "\"not_authenticated\"",
1055    "\"access_denied\"",
1056    "\"permission_denied\"",
1057    "\"invalid_credentials\"",
1058    "\"bad_credentials\"",
1059    "\"expired\"",
1060    "\"token_expired\"",
1061    "\"key_expired\"",
1062    "\"revoked\"",
1063    "\"inactive\"",
1064    "\"disabled\"",
1065    "\"suspended\"",
1066];
1067
1068const SUCCESS_OVERRIDES: &[&str] = &[
1069    "\"ok\":true",
1070    "\"ok\": true",
1071    "\"success\":true",
1072    "\"success\": true",
1073    "\"authenticated\":true",
1074    "\"valid\":true",
1075];
1076
1077/// Simple dot-path JSON accessor: "ok" → root["ok"], "data.user.name" → root["data"]["user"]["name"].
1078fn json_pointer_get<'a>(value: &'a serde_json::Value, path: &str) -> Option<&'a serde_json::Value> {
1079    const MAX_JSON_PATH_DEPTH: usize = 20;
1080
1081    let mut current = value;
1082    let mut depth = 0usize;
1083    for segment in path.split('.') {
1084        depth += 1;
1085        if depth > MAX_JSON_PATH_DEPTH || segment.is_empty() {
1086            return None;
1087        }
1088        current = current.get(segment)?;
1089    }
1090    Some(current)
1091}
1092
1093/// Extract metadata fields from a verification response body.
1094fn extract_metadata(specs: &[MetadataSpec], body: &str) -> HashMap<String, String> {
1095    let mut metadata = HashMap::new();
1096
1097    let parsed: Option<serde_json::Value> = serde_json::from_str(body).ok();
1098
1099    for spec in specs {
1100        if let Some(ref json_path) = spec.json_path
1101            && let Some(ref parsed) = parsed
1102            && let Some(value) = json_pointer_get(parsed, json_path)
1103        {
1104            let s = match value {
1105                serde_json::Value::String(s) => s.clone(),
1106                other => other.to_string(),
1107            };
1108            metadata.insert(spec.name.clone(), s);
1109        }
1110        if let Some(ref header_name) = spec.header {
1111            // Header extraction would need the actual response headers.
1112            // For now, we only support JSON-based extraction since we consume the body.
1113            tracing::debug!(
1114                "header extraction for '{}' not supported in body-only mode",
1115                header_name
1116            );
1117        }
1118    }
1119
1120    metadata
1121}
1122
1123#[cfg(test)]
1124mod tests {
1125    use super::*;
1126    use keyhog_core::Severity;
1127    use std::sync::Arc;
1128    use std::sync::atomic::{AtomicUsize, Ordering};
1129    use tokio::io::{AsyncReadExt, AsyncWriteExt};
1130    use tokio::net::TcpListener;
1131
1132    #[test]
1133    fn interpolation() {
1134        assert_eq!(
1135            interpolate(
1136                "https://api.example.com/check?key={{match}}",
1137                "abc123",
1138                None
1139            ),
1140            "https://api.example.com/check?key=abc123"
1141        );
1142        assert_eq!(
1143            interpolate("{{companion.secret}}", "key", Some("mysecret")),
1144            "mysecret"
1145        );
1146    }
1147
1148    #[test]
1149    fn interpolation_handles_empty_companion_replacements() {
1150        assert_eq!(
1151            interpolate(
1152                "https://api.example.com/{{companion.secret}}/{{companion.secret}}",
1153                "key",
1154                Some("")
1155            ),
1156            "https://api.example.com//"
1157        );
1158    }
1159
1160    #[test]
1161    fn field_resolution() {
1162        assert_eq!(resolve_field("match", "cred", None), "cred");
1163        assert_eq!(
1164            resolve_field("companion.secret", "cred", Some("sec")),
1165            "sec"
1166        );
1167        assert_eq!(
1168            resolve_field("literal_value", "cred", None),
1169            "literal_value"
1170        );
1171        assert_eq!(resolve_field("", "cred", None), "");
1172    }
1173
1174    #[test]
1175    fn success_status_check() {
1176        let spec = SuccessSpec {
1177            status: Some(200),
1178            status_not: None,
1179            body_contains: None,
1180            body_not_contains: None,
1181            json_path: None,
1182            equals: None,
1183        };
1184        assert!(evaluate_success(&spec, 200, ""));
1185        assert!(!evaluate_success(&spec, 401, ""));
1186    }
1187
1188    #[test]
1189    fn success_json_path_check() {
1190        let spec = SuccessSpec {
1191            status: Some(200),
1192            status_not: None,
1193            body_contains: None,
1194            body_not_contains: None,
1195            json_path: Some("ok".into()),
1196            equals: Some("true".into()),
1197        };
1198        assert!(evaluate_success(&spec, 200, r#"{"ok": true}"#));
1199        assert!(!evaluate_success(&spec, 200, r#"{"ok": false}"#));
1200        assert!(!evaluate_success(&spec, 401, r#"{"ok": true}"#));
1201    }
1202
1203    #[test]
1204    fn dedup_merges_locations() {
1205        let m1 = RawMatch {
1206            detector_id: "test".into(),
1207            detector_name: "Test".into(),
1208            service: "test".into(),
1209            severity: Severity::High,
1210            credential: "SECRET123".into(),
1211            companion: None,
1212            location: MatchLocation {
1213                source: "fs".into(),
1214                file_path: Some("a.py".into()),
1215                line: Some(1),
1216                offset: 0,
1217                commit: None,
1218                author: None,
1219                date: None,
1220            },
1221            entropy: None,
1222            confidence: Some(0.75),
1223        };
1224        let m2 = RawMatch {
1225            location: MatchLocation {
1226                file_path: Some("b.py".into()),
1227                line: Some(10),
1228                ..m1.location.clone()
1229            },
1230            ..m1.clone()
1231        };
1232
1233        let groups = dedup_matches(vec![m1, m2], &DedupScope::Credential);
1234        assert_eq!(groups.len(), 1);
1235        assert_eq!(groups[0].additional_locations.len(), 1);
1236    }
1237
1238    #[test]
1239    fn json_pointer_nested() {
1240        let document: serde_json::Value =
1241            serde_json::from_str(r#"{"data": {"user": {"name": "alice"}}}"#).unwrap();
1242        assert_eq!(
1243            json_pointer_get(&document, "data.user.name"),
1244            Some(&serde_json::Value::String("alice".into()))
1245        );
1246        assert!(json_pointer_get(&document, "data.missing").is_none());
1247    }
1248
1249    #[test]
1250    fn json_pointer_rejects_excessive_depth() {
1251        let value: serde_json::Value = serde_json::from_str(r#"{"a":{"b":{"c":true}}}"#).unwrap();
1252        let path = (0..21)
1253            .map(|i| format!("level{i}"))
1254            .collect::<Vec<_>>()
1255            .join(".");
1256        assert!(json_pointer_get(&value, &path).is_none());
1257        assert!(json_pointer_get(&value, "a.b.c").is_some());
1258    }
1259
1260    #[tokio::test]
1261    async fn verify_all_blocks_integer_private_hosts() {
1262        let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
1263        let addr = listener.local_addr().unwrap();
1264        let requests = Arc::new(AtomicUsize::new(0));
1265        let requests_clone = requests.clone();
1266
1267        tokio::spawn(async move {
1268            loop {
1269                let Ok((mut stream, _)) = listener.accept().await else {
1270                    break;
1271                };
1272                let count = requests_clone.clone();
1273                tokio::spawn(async move {
1274                    let mut buf = [0u8; 1024];
1275                    let _ = stream.read(&mut buf).await;
1276                    count.fetch_add(1, Ordering::SeqCst);
1277                    tokio::time::sleep(Duration::from_millis(25)).await;
1278                    let _ = stream
1279                        .write_all(b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nOK")
1280                        .await;
1281                });
1282            }
1283        });
1284
1285        let detector = DetectorSpec {
1286            id: "test".into(),
1287            name: "Test".into(),
1288            service: "test".into(),
1289            severity: Severity::High,
1290            patterns: vec![],
1291            companion: None,
1292            verify: Some(keyhog_core::VerifySpec {
1293                method: HttpMethod::Get,
1294                url: format!("http://2130706433:{}/verify", addr.port()),
1295                auth: AuthSpec::None,
1296                headers: vec![],
1297                body: None,
1298                success: SuccessSpec {
1299                    status: Some(200),
1300                    status_not: None,
1301                    body_contains: None,
1302                    body_not_contains: None,
1303                    json_path: None,
1304                    equals: None,
1305                },
1306                metadata: vec![],
1307                timeout_ms: None,
1308            }),
1309            keywords: vec![],
1310        };
1311
1312        let engine = VerificationEngine::new(
1313            &[detector],
1314            VerifyConfig {
1315                timeout: Duration::from_secs(1),
1316                max_concurrent_per_service: 50,
1317                max_concurrent_global: 50,
1318                ..Default::default()
1319            },
1320        )
1321        .unwrap();
1322
1323        let make_match = || RawMatch {
1324            detector_id: "test".into(),
1325            detector_name: "Test".into(),
1326            service: "test".into(),
1327            severity: Severity::High,
1328            credential: "same-credential".into(),
1329            companion: None,
1330            location: MatchLocation {
1331                source: "fs".into(),
1332                file_path: Some("a.txt".into()),
1333                line: Some(1),
1334                offset: 0,
1335                commit: None,
1336                author: None,
1337                date: None,
1338            },
1339            entropy: None,
1340            confidence: Some(0.9),
1341        };
1342
1343        let group = dedup_matches(vec![make_match()], &DedupScope::Credential).pop().unwrap();
1344        let groups = (0..20).map(|_| group.clone()).collect();
1345        let findings = engine.verify_all(groups).await;
1346        assert_eq!(findings.len(), 20);
1347        assert!(findings.iter().all(|finding| {
1348            matches!(
1349                &finding.verification,
1350                VerificationResult::Error(message) if message == PRIVATE_URL_ERROR
1351            )
1352        }));
1353        assert_eq!(requests.load(Ordering::SeqCst), 0);
1354    }
1355
1356    #[tokio::test]
1357    async fn aws_sigv4_probe_fails_on_unreachable_endpoint() {
1358        let client = Client::new();
1359        let result = build_sigv4_request(
1360            &client,
1361            "https://127.0.0.1:1/",
1362            "127.0.0.1:1",
1363            "Action=GetCallerIdentity&Version=2011-06-15",
1364            "AKIAIOSFODNN7EXAMPLE",
1365            "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
1366            "us-east-1",
1367            "sts",
1368            Duration::from_millis(50),
1369        )
1370        .await;
1371
1372        assert!(result.is_err(), "should fail on unreachable endpoint");
1373    }
1374
1375    #[test]
1376    fn aws_sigv4_signing_key_derivation_is_deterministic() {
1377        let key1 = derive_signing_key("secret", "20260325", "us-east-1", "sts");
1378        let key2 = derive_signing_key("secret", "20260325", "us-east-1", "sts");
1379        assert_eq!(key1, key2, "signing key must be deterministic");
1380        assert_eq!(key1.len(), 32, "HMAC-SHA256 output is 32 bytes");
1381    }
1382
1383    #[test]
1384    fn chrono_lite_now_produces_valid_format() {
1385        let timestamp = chrono_lite_now();
1386        assert_eq!(timestamp.len(), 16, "YYYYMMDDTHHMMSSZ = 16 chars");
1387        assert!(timestamp.ends_with('Z'));
1388        assert!(timestamp.contains('T'));
1389    }
1390
1391    // =========================================================================
1392    // SSRF Protection Tests
1393    // =========================================================================
1394
1395    #[test]
1396    fn ssrf_blocks_localhost() {
1397        assert!(is_private_url("http://localhost/api"));
1398        assert!(is_private_url("https://localhost:8080/verify"));
1399        assert!(is_private_url("http://LOCALHOST/path"));
1400    }
1401
1402    #[test]
1403    fn ssrf_blocks_loopback() {
1404        assert!(is_private_url("http://127.0.0.1/api"));
1405        assert!(is_private_url("http://127.0.0.1:3000/check"));
1406        assert!(is_private_url("https://127.0.0.1/secret"));
1407    }
1408
1409    #[test]
1410    fn ssrf_blocks_private_class_a() {
1411        assert!(is_private_url("http://10.0.0.1/api"));
1412        assert!(is_private_url("http://10.255.255.255/verify"));
1413        assert!(is_private_url("https://10.10.10.10/check"));
1414    }
1415
1416    #[test]
1417    fn ssrf_blocks_private_class_b() {
1418        assert!(is_private_url("http://172.16.0.1/api"));
1419        assert!(is_private_url("http://172.17.1.1/verify"));
1420        assert!(is_private_url("http://172.18.2.2/check"));
1421        assert!(is_private_url("http://172.19.3.3/test"));
1422        assert!(is_private_url("http://172.20.0.0/api"));
1423        assert!(is_private_url("http://172.30.0.0/api"));
1424        assert!(is_private_url("http://172.31.255.255/verify"));
1425    }
1426
1427    #[test]
1428    fn ssrf_blocks_private_class_c() {
1429        assert!(is_private_url("http://192.168.0.1/api"));
1430        assert!(is_private_url("http://192.168.1.1/verify"));
1431        assert!(is_private_url("https://192.168.255.255/check"));
1432    }
1433
1434    #[test]
1435    fn ssrf_blocks_link_local() {
1436        assert!(is_private_url("http://169.254.0.1/metadata"));
1437        assert!(is_private_url("http://169.254.169.254/latest"));
1438        assert!(is_private_url("https://169.254.1.1/api"));
1439    }
1440
1441    #[test]
1442    fn ssrf_blocks_ipv6_loopback() {
1443        assert!(is_private_url("http://[::1]/api"));
1444        assert!(is_private_url("https://[::1]:8080/verify"));
1445    }
1446
1447    #[test]
1448    fn ssrf_blocks_ipv6_private_ranges_and_mapped_ipv4() {
1449        assert!(is_private_url("http://[fd00::1]/api"));
1450        assert!(is_private_url("http://[fe80::1]/api"));
1451        assert!(is_private_url("http://[::ffff:127.0.0.1]/api"));
1452    }
1453
1454    #[test]
1455    fn ssrf_blocks_zero_address() {
1456        assert!(is_private_url("http://0.0.0.0/api"));
1457        assert!(is_private_url("http://0.0.0.0:3000/verify"));
1458    }
1459
1460    #[test]
1461    fn ssrf_blocks_integer_loopback_host() {
1462        assert!(is_private_url("http://2130706433/api"));
1463    }
1464
1465    #[test]
1466    fn ssrf_blocks_hex_and_octal_ipv4_hosts() {
1467        assert!(is_private_url("http://0x7f000001/api"));
1468        assert!(is_private_url("http://0177.0.0.1/api"));
1469        assert!(is_private_url("http://0x7f.0x0.0x0.0x1/api"));
1470    }
1471
1472    #[test]
1473    fn ssrf_blocks_short_dotted_ipv4_hosts() {
1474        assert!(is_private_url("http://127.1/api"));
1475        assert!(is_private_url("http://127.0.1/api"));
1476    }
1477
1478    #[test]
1479    fn ssrf_blocks_cloud_metadata() {
1480        assert!(is_private_url("http://metadata.google.internal/"));
1481        assert!(is_private_url("http://169.254.169.254/latest/meta-data/"));
1482        assert!(is_private_url("https://metadata.google/computeMetadata"));
1483    }
1484
1485    #[test]
1486    fn ssrf_blocks_percent_encoded_private_hosts_after_decoding() {
1487        assert!(is_private_url("http://%31%32%37.0.0.1/api"));
1488    }
1489
1490    #[tokio::test]
1491    async fn resolved_client_rejects_private_dns_results() {
1492        let client = reqwest::Client::builder().build().unwrap();
1493        let resolved_client =
1494            resolved_client_for_url(&client, "http://localhost/api", Duration::from_secs(1)).await;
1495        assert!(matches!(
1496            resolved_client,
1497            Err(VerificationResult::Error(message)) if message == PRIVATE_URL_ERROR
1498        ));
1499    }
1500
1501    #[tokio::test]
1502    async fn resolved_client_rejects_private_ip_literals_and_numeric_ipv4_hosts() {
1503        let client = reqwest::Client::builder().build().unwrap();
1504
1505        for url in ["http://127.0.0.1/api", "http://2130706433/api"] {
1506            let resolved_client =
1507                resolved_client_for_url(&client, url, Duration::from_secs(1)).await;
1508            assert!(
1509                matches!(resolved_client, Err(VerificationResult::Error(ref message)) if message == PRIVATE_URL_ERROR),
1510                "expected private URL rejection for {url}, got {resolved_client:?}"
1511            );
1512        }
1513    }
1514
1515    #[tokio::test]
1516    async fn resolved_client_rejects_non_https_public_urls() {
1517        let client = reqwest::Client::builder().build().unwrap();
1518        let resolved_client =
1519            resolved_client_for_url(&client, "http://example.com/api", Duration::from_secs(1))
1520                .await;
1521        assert!(matches!(
1522            resolved_client,
1523            Err(VerificationResult::Error(message)) if message == HTTPS_ONLY_ERROR
1524        ));
1525    }
1526
1527    #[test]
1528    fn ssrf_allows_public_urls() {
1529        assert!(!is_private_url("https://api.github.com/users/octocat"));
1530        assert!(!is_private_url("https://api.openai.com/v1/models"));
1531        assert!(!is_private_url(
1532            "https://hooks.slack.com/services/T000/B000/XXXX"
1533        ));
1534        assert!(!is_private_url("http://example.com/api"));
1535        assert!(!is_private_url("http://134744072/api"));
1536    }
1537
1538    // =========================================================================
1539    // Interpolation Security Tests
1540    // =========================================================================
1541
1542    #[test]
1543    fn interpolation_url_encodes_special_chars() {
1544        let cred = "key/with/slashes";
1545        assert_eq!(
1546            interpolate("https://api.example.com/{{match}}", cred, None),
1547            "https://api.example.com/key%2Fwith%2Fslashes"
1548        );
1549    }
1550
1551    #[test]
1552    fn interpolation_url_encodes_query_params() {
1553        let cred = "key=value&other=test";
1554        assert_eq!(
1555            interpolate("https://api.example.com?token={{match}}", cred, None),
1556            "https://api.example.com?token=key%3Dvalue%26other%3Dtest"
1557        );
1558    }
1559
1560    #[test]
1561    fn interpolation_prevents_template_injection() {
1562        let cred = "{{malicious}}";
1563        let interpolated_url = interpolate("https://api.example.com/{{match}}", cred, None);
1564        assert_eq!(
1565            interpolated_url,
1566            "https://api.example.com/%7B%7Bmalicious%7D%7D"
1567        );
1568    }
1569
1570    #[test]
1571    fn interpolation_handles_newlines() {
1572        let cred = "key\nwith\nnewlines";
1573        let interpolated_url = interpolate("https://api.example.com/{{match}}", cred, None);
1574        assert!(interpolated_url.contains("%0A"));
1575        assert!(!interpolated_url.contains('\n'));
1576    }
1577
1578    #[test]
1579    fn interpolation_handles_companion_with_special_chars() {
1580        let companion = "secret/with/chars";
1581        let interpolated_url = interpolate(
1582            "https://api.example.com?key={{companion.token}}",
1583            "key",
1584            Some(companion),
1585        );
1586        assert!(interpolated_url.contains("%2F"));
1587    }
1588
1589    // =========================================================================
1590    // Body Analysis Tests
1591    // =========================================================================
1592
1593    #[test]
1594    fn body_indicates_error_null_response() {
1595        assert!(!body_indicates_error("null"));
1596        assert!(!body_indicates_error("NULL"));
1597    }
1598
1599    #[test]
1600    fn body_indicates_error_real_error_patterns() {
1601        assert!(body_indicates_error(r#"{"error": "invalid token"}"#));
1602        assert!(body_indicates_error(r#"{"unauthorized": true}"#));
1603        assert!(body_indicates_error(r#"{"invalid_key": "bad"}"#));
1604        assert!(body_indicates_error(
1605            r#"{"access_denied": "no permission"}"#
1606        ));
1607        assert!(body_indicates_error(r#"{"expired": true}"#));
1608        assert!(body_indicates_error(r#"{"revoked": "yes"}"#));
1609    }
1610
1611    #[test]
1612    fn body_success_override_patterns() {
1613        // These should NOT indicate error — success keys without explicit error
1614        assert!(!body_indicates_error(r#"{"ok":true, "error": null}"#));
1615        assert!(!body_indicates_error(
1616            r#"{"success":true, "warning": "minor"}"#
1617        ));
1618        assert!(!body_indicates_error(r#"{"authenticated":true}"#));
1619        assert!(!body_indicates_error(r#"{"valid":true}"#));
1620    }
1621
1622    #[test]
1623    fn body_error_explicit_key_overrides_success() {
1624        // An explicit "error" key with a real value should be detected as an
1625        // error even when "ok":true is also present. This prevents dead
1626        // credentials from being reported as live.
1627        assert!(body_indicates_error(
1628            r#"{"ok":true, "error": "rate_limited"}"#
1629        ));
1630        assert!(body_indicates_error(
1631            r#"{"ok":true, "error": "invalid_token"}"#
1632        ));
1633        assert!(body_indicates_error(
1634            r#"{"success":true, "error": "unauthorized"}"#
1635        ));
1636    }
1637
1638    #[test]
1639    fn body_indicates_error_empty_body() {
1640        assert!(!body_indicates_error(""));
1641    }
1642
1643    #[test]
1644    fn body_indicates_error_non_json() {
1645        assert!(!body_indicates_error("plain text response"));
1646        assert!(!body_indicates_error("<html><body>Error</body></html>"));
1647        assert!(!body_indicates_error("this has \"error\" in it"));
1648    }
1649
1650    macro_rules! indicator_case {
1651        ($name:ident, $indicator:expr) => {
1652            #[test]
1653            fn $name() {
1654                let body = format!(r#"{{"{}": true}}"#, $indicator);
1655                assert!(body_indicates_error(&body));
1656            }
1657        };
1658    }
1659
1660    indicator_case!(indicator_unauthorized_detected, "unauthorized");
1661    indicator_case!(indicator_forbidden_detected, "forbidden");
1662    indicator_case!(indicator_invalid_detected, "invalid");
1663    indicator_case!(indicator_invalid_token_detected, "invalid_token");
1664    indicator_case!(indicator_invalid_key_detected, "invalid_key");
1665    indicator_case!(indicator_invalid_api_key_detected, "invalid_api_key");
1666    indicator_case!(
1667        indicator_authentication_error_detected,
1668        "authentication_error"
1669    );
1670    indicator_case!(indicator_auth_error_detected, "auth_error");
1671    indicator_case!(indicator_unauthenticated_detected, "unauthenticated");
1672    indicator_case!(indicator_not_authenticated_detected, "not_authenticated");
1673    indicator_case!(indicator_access_denied_detected, "access_denied");
1674    indicator_case!(indicator_permission_denied_detected, "permission_denied");
1675    indicator_case!(
1676        indicator_invalid_credentials_detected,
1677        "invalid_credentials"
1678    );
1679    indicator_case!(indicator_bad_credentials_detected, "bad_credentials");
1680    indicator_case!(indicator_expired_detected, "expired");
1681    indicator_case!(indicator_token_expired_detected, "token_expired");
1682    indicator_case!(indicator_key_expired_detected, "key_expired");
1683    indicator_case!(indicator_revoked_detected, "revoked");
1684    indicator_case!(indicator_inactive_detected, "inactive");
1685    indicator_case!(indicator_disabled_detected, "disabled");
1686
1687    #[test]
1688    fn success_override_ok_true_is_not_error() {
1689        assert!(!body_indicates_error(r#"{"ok": true}"#));
1690    }
1691
1692    #[test]
1693    fn success_override_success_true_is_not_error() {
1694        assert!(!body_indicates_error(r#"{"success": true}"#));
1695    }
1696
1697    #[test]
1698    fn success_override_authenticated_true_is_not_error() {
1699        assert!(!body_indicates_error(r#"{"authenticated": true}"#));
1700    }
1701
1702    #[test]
1703    fn success_override_valid_true_is_not_error() {
1704        assert!(!body_indicates_error(r#"{"valid": true}"#));
1705    }
1706
1707    #[test]
1708    fn body_indicates_error_ignores_indicator_inside_string_values() {
1709        assert!(!body_indicates_error(
1710            r#"{"message":"this text mentions \"error\" but is not an error key"}"#
1711        ));
1712        assert!(!body_indicates_error(
1713            r#"{"detail":"the word \"invalid\" appears here as content"}"#
1714        ));
1715    }
1716
1717    // =========================================================================
1718    // Cache Tests
1719    // =========================================================================
1720
1721    #[test]
1722    fn cache_basic_hit() {
1723        let cache = cache::VerificationCache::default_ttl();
1724        cache.put(
1725            "test-cred",
1726            "test-detector",
1727            VerificationResult::Live,
1728            HashMap::from([("key".into(), "value".into())]),
1729        );
1730
1731        let cached_verification = cache.get("test-cred", "test-detector");
1732        assert!(cached_verification.is_some());
1733        let (verification, metadata) = cached_verification.unwrap();
1734        assert!(matches!(verification, VerificationResult::Live));
1735        assert_eq!(metadata.get("key"), Some(&"value".to_string()));
1736    }
1737
1738    #[test]
1739    fn cache_miss_different_credential() {
1740        let cache = cache::VerificationCache::default_ttl();
1741        cache.put(
1742            "cred-1",
1743            "detector",
1744            VerificationResult::Live,
1745            HashMap::new(),
1746        );
1747
1748        let cached_verification = cache.get("cred-2", "detector");
1749        assert!(cached_verification.is_none());
1750    }
1751
1752    #[test]
1753    fn cache_miss_different_detector() {
1754        let cache = cache::VerificationCache::default_ttl();
1755        cache.put(
1756            "cred",
1757            "detector-1",
1758            VerificationResult::Live,
1759            HashMap::new(),
1760        );
1761
1762        let cached_verification = cache.get("cred", "detector-2");
1763        assert!(cached_verification.is_none());
1764    }
1765
1766    #[test]
1767    fn cache_ttl_expiration() {
1768        let cache = cache::VerificationCache::new(Duration::from_millis(10));
1769        cache.put(
1770            "test-cred",
1771            "test-detector",
1772            VerificationResult::Live,
1773            HashMap::new(),
1774        );
1775
1776        // Immediately should be available
1777        assert!(cache.get("test-cred", "test-detector").is_some());
1778
1779        // Wait for expiration
1780        std::thread::sleep(Duration::from_millis(50));
1781
1782        // Should be expired now
1783        assert!(cache.get("test-cred", "test-detector").is_none());
1784    }
1785
1786    #[test]
1787    fn cache_eviction_of_expired_entries() {
1788        // Test that expired entries are properly evicted
1789        let cache = cache::VerificationCache::new(Duration::from_millis(1));
1790
1791        cache.put("cred-1", "det", VerificationResult::Live, HashMap::new());
1792        std::thread::sleep(Duration::from_millis(5));
1793        cache.put("cred-2", "det", VerificationResult::Live, HashMap::new());
1794
1795        // First entry should be expired, second should be present
1796        assert!(cache.get("cred-1", "det").is_none());
1797        assert!(cache.get("cred-2", "det").is_some());
1798    }
1799
1800    #[test]
1801    fn cache_integrity_after_multiple_puts() {
1802        let cache = cache::VerificationCache::default_ttl();
1803
1804        // Put same credential with different results
1805        cache.put("cred", "det", VerificationResult::Dead, HashMap::new());
1806        cache.put("cred", "det", VerificationResult::Live, HashMap::new());
1807
1808        // Should have the latest value
1809        let (verification, _) = cache.get("cred", "det").unwrap();
1810        assert!(matches!(verification, VerificationResult::Live));
1811    }
1812
1813    // =========================================================================
1814    // Dedup Mode Tests
1815    // =========================================================================
1816
1817    #[test]
1818    fn dedup_per_location_same_detector_different_files() {
1819        let m1 = RawMatch {
1820            detector_id: "test-det".into(),
1821            detector_name: "Test".into(),
1822            service: "svc".into(),
1823            severity: Severity::High,
1824            credential: "SAME_SECRET".into(),
1825            companion: None,
1826            location: MatchLocation {
1827                source: "fs".into(),
1828                file_path: Some("a.py".into()),
1829                line: Some(1),
1830                offset: 0,
1831                commit: None,
1832                author: None,
1833                date: None,
1834            },
1835            entropy: None,
1836            confidence: Some(0.9),
1837        };
1838        let m2 = RawMatch {
1839            location: MatchLocation {
1840                file_path: Some("b.py".into()),
1841                line: Some(10),
1842                ..m1.location.clone()
1843            },
1844            ..m1.clone()
1845        };
1846
1847        let groups = dedup_matches(vec![m1, m2], &DedupScope::Credential);
1848        assert_eq!(groups.len(), 1);
1849        assert_eq!(groups[0].additional_locations.len(), 1);
1850        assert_eq!(groups[0].primary_location.file_path, Some("a.py".into()));
1851    }
1852
1853    #[test]
1854    fn dedup_consolidated_different_detectors_same_credential() {
1855        let m1 = RawMatch {
1856            detector_id: "detector-1".into(),
1857            detector_name: "Detector 1".into(),
1858            service: "svc".into(),
1859            severity: Severity::High,
1860            credential: "SAME_SECRET".into(),
1861            companion: None,
1862            location: MatchLocation {
1863                source: "fs".into(),
1864                file_path: Some("a.py".into()),
1865                line: Some(1),
1866                offset: 0,
1867                commit: None,
1868                author: None,
1869                date: None,
1870            },
1871            entropy: None,
1872            confidence: Some(0.9),
1873        };
1874        let m2 = RawMatch {
1875            detector_id: "detector-2".into(),
1876            detector_name: "Detector 2".into(),
1877            location: MatchLocation {
1878                file_path: Some("b.py".into()),
1879                line: Some(10),
1880                ..m1.location.clone()
1881            },
1882            ..m1.clone()
1883        };
1884
1885        let groups = dedup_matches(vec![m1, m2], &DedupScope::Credential);
1886        // Should create separate groups because detector_id is different
1887        assert_eq!(groups.len(), 2);
1888    }
1889
1890    #[test]
1891    fn dedup_preserves_companion() {
1892        let m1 = RawMatch {
1893            detector_id: "test".into(),
1894            detector_name: "Test".into(),
1895            service: "svc".into(),
1896            severity: Severity::High,
1897            credential: "SECRET".into(),
1898            companion: None,
1899            location: MatchLocation {
1900                source: "fs".into(),
1901                file_path: Some("a.py".into()),
1902                line: Some(1),
1903                offset: 0,
1904                commit: None,
1905                author: None,
1906                date: None,
1907            },
1908            entropy: None,
1909            confidence: Some(0.9),
1910        };
1911        let m2 = RawMatch {
1912            companion: Some("companion-value".into()),
1913            location: MatchLocation {
1914                file_path: Some("b.py".into()),
1915                line: Some(10),
1916                ..m1.location.clone()
1917            },
1918            ..m1.clone()
1919        };
1920
1921        let groups = dedup_matches(vec![m1, m2], &DedupScope::Credential);
1922        assert_eq!(groups.len(), 1);
1923        assert_eq!(groups[0].companion, Some("companion-value".into()));
1924    }
1925
1926    // =========================================================================
1927    // Edge Case Tests
1928    // =========================================================================
1929
1930    #[test]
1931    fn evaluate_success_handles_redirect_status() {
1932        let spec = SuccessSpec {
1933            status: Some(301),
1934            status_not: None,
1935            body_contains: None,
1936            body_not_contains: None,
1937            json_path: None,
1938            equals: None,
1939        };
1940        assert!(evaluate_success(&spec, 301, ""));
1941        assert!(!evaluate_success(&spec, 200, ""));
1942    }
1943
1944    #[test]
1945    fn evaluate_success_rate_limit_status() {
1946        let spec = SuccessSpec {
1947            status: None,
1948            status_not: Some(429),
1949            body_contains: None,
1950            body_not_contains: None,
1951            json_path: None,
1952            equals: None,
1953        };
1954        assert!(!evaluate_success(&spec, 429, ""));
1955        assert!(evaluate_success(&spec, 200, ""));
1956    }
1957
1958    #[test]
1959    fn detector_timeout_override_takes_precedence() {
1960        let spec = keyhog_core::VerifySpec {
1961            method: HttpMethod::Get,
1962            url: "https://example.com/verify".into(),
1963            auth: AuthSpec::None,
1964            headers: vec![],
1965            body: None,
1966            success: SuccessSpec {
1967                status: Some(200),
1968                status_not: None,
1969                body_contains: None,
1970                body_not_contains: None,
1971                json_path: None,
1972                equals: None,
1973            },
1974            metadata: vec![],
1975            timeout_ms: Some(250),
1976        };
1977
1978        assert_eq!(
1979            verification_timeout(&spec, Duration::from_secs(5)),
1980            Duration::from_millis(250)
1981        );
1982
1983        let without_override = keyhog_core::VerifySpec {
1984            timeout_ms: None,
1985            ..spec
1986        };
1987        assert_eq!(
1988            verification_timeout(&without_override, Duration::from_secs(5)),
1989            Duration::from_secs(5)
1990        );
1991    }
1992
1993    #[test]
1994    fn verify_empty_url_returns_error() {
1995        // Empty URL should trigger connection error handling
1996        let rt = tokio::runtime::Runtime::new().unwrap();
1997        rt.block_on(async {
1998            let client = Client::new();
1999            let spec = keyhog_core::VerifySpec {
2000                method: HttpMethod::Get,
2001                url: "".to_string(),
2002                auth: AuthSpec::None,
2003                headers: vec![],
2004                body: None,
2005                success: SuccessSpec {
2006                    status: Some(200),
2007                    status_not: None,
2008                    body_contains: None,
2009                    body_not_contains: None,
2010                    json_path: None,
2011                    equals: None,
2012                },
2013                metadata: vec![],
2014                timeout_ms: Some(1000),
2015            };
2016
2017            let verification =
2018                verify_credential(&client, &spec, "test", None, Duration::from_secs(1))
2019                    .await
2020                    .result;
2021            assert!(matches!(verification, VerificationResult::Error(_)));
2022        });
2023    }
2024
2025    #[test]
2026    fn verify_missing_verify_spec_returns_unverifiable() {
2027        let detector = DetectorSpec {
2028            id: "test".into(),
2029            name: "Test".into(),
2030            service: "test".into(),
2031            severity: Severity::Low,
2032            patterns: vec![],
2033            companion: None,
2034            verify: None, // Missing verify spec
2035            keywords: vec![],
2036        };
2037
2038        let engine = VerificationEngine::new(&[detector], VerifyConfig::default()).unwrap();
2039
2040        let rt = tokio::runtime::Runtime::new().unwrap();
2041        rt.block_on(async {
2042            let group = DedupedMatch {
2043                detector_id: "test".into(),
2044                detector_name: "Test".into(),
2045                service: "test".into(),
2046                severity: Severity::Low,
2047                credential: "test-cred".into(),
2048                companion: None,
2049                primary_location: MatchLocation {
2050                    source: "fs".into(),
2051                    file_path: Some("test.txt".into()),
2052                    line: Some(1),
2053                    offset: 0,
2054                    commit: None,
2055                    author: None,
2056                    date: None,
2057                },
2058                additional_locations: vec![],
2059                confidence: Some(0.5),
2060            };
2061
2062            let findings = engine.verify_all(vec![group]).await;
2063            assert_eq!(findings.len(), 1);
2064            assert!(matches!(
2065                findings[0].verification,
2066                VerificationResult::Unverifiable
2067            ));
2068        });
2069    }
2070
2071    #[test]
2072    fn success_body_contains_check() {
2073        let spec = SuccessSpec {
2074            status: Some(200),
2075            status_not: None,
2076            body_contains: Some("verified".into()),
2077            body_not_contains: None,
2078            json_path: None,
2079            equals: None,
2080        };
2081        assert!(evaluate_success(&spec, 200, r#"{"status": "verified"}"#));
2082        assert!(!evaluate_success(&spec, 200, r#"{"status": "pending"}"#));
2083    }
2084
2085    #[test]
2086    fn success_body_not_contains_check() {
2087        let spec = SuccessSpec {
2088            status: Some(200),
2089            status_not: None,
2090            body_contains: None,
2091            body_not_contains: Some("error".into()),
2092            json_path: None,
2093            equals: None,
2094        };
2095        assert!(evaluate_success(&spec, 200, r#"{"ok": true}"#));
2096        assert!(!evaluate_success(&spec, 200, r#"{"error": "failed"}"#));
2097    }
2098
2099    // =========================================================================
2100    // Verification Edge Cases
2101    // =========================================================================
2102
2103    #[test]
2104    fn verify_url_exactly_8kb_max_length() {
2105        // URL exactly 8KB (8192 bytes) should be valid for interpolation
2106        let long_path = "a".repeat(8192 - "https://api.example.com/".len());
2107        let url = format!("https://api.example.com/{}", long_path);
2108        assert_eq!(url.len(), 8192);
2109
2110        // Interpolation should handle this without issues
2111        let interpolated_url = interpolate(&url, "test-cred", None);
2112        assert_eq!(interpolated_url.len(), 8192);
2113        assert!(interpolated_url.starts_with("https://api.example.com/"));
2114    }
2115
2116    #[test]
2117    fn credential_10kb_long() {
2118        // Credential that is 10KB long should be handled properly
2119        let long_credential = "x".repeat(10240);
2120        assert_eq!(long_credential.len(), 10240);
2121
2122        // Interpolation with exact template should return credential unchanged
2123        let interpolated_credential = interpolate("{{match}}", &long_credential, None);
2124        assert_eq!(interpolated_credential.len(), 10240);
2125        assert_eq!(interpolated_credential, long_credential);
2126
2127        // URL interpolation should URL-encode it
2128        let url_result = interpolate(
2129            "https://api.example.com/?key={{match}}",
2130            &long_credential,
2131            None,
2132        );
2133        assert!(url_result.contains("xxxxxxxxxx"));
2134    }
2135
2136    #[test]
2137    fn credential_all_printable_ascii() {
2138        // Credential containing every printable ASCII character (32-126)
2139        let all_ascii: String = (32..=126).map(|c| c as u8 as char).collect();
2140        assert_eq!(all_ascii.len(), 95);
2141
2142        // Test interpolation doesn't corrupt special characters when used as literal
2143        let interpolated_credential = interpolate("{{match}}", &all_ascii, None);
2144        assert_eq!(interpolated_credential, all_ascii);
2145
2146        // URL encoding should handle all special characters
2147        let url_result = interpolate("https://api.example.com/{{match}}", &all_ascii, None);
2148        // All non-alphanumeric characters should be percent-encoded
2149        assert!(url_result.starts_with("https://api.example.com/"));
2150    }
2151
2152    #[test]
2153    fn companion_identical_to_primary_credential() {
2154        // Companion that is identical to the primary credential
2155        let credential = "SAME_CREDENTIAL_12345";
2156
2157        let interpolated_credential = interpolate("{{match}}", credential, Some(credential));
2158        assert_eq!(interpolated_credential, credential);
2159
2160        // Test with companion template
2161        let comp_result = interpolate("{{companion.secret}}", credential, Some(credential));
2162        assert_eq!(comp_result, credential);
2163
2164        // URL interpolation with both
2165        let url_result = interpolate(
2166            "https://api.example.com/?primary={{match}}&companion={{companion.secret}}",
2167            credential,
2168            Some(credential),
2169        );
2170        // Both should be URL-encoded when embedded
2171        assert!(url_result.contains("primary="));
2172        assert!(url_result.contains("companion="));
2173    }
2174
2175    #[test]
2176    fn verify_spec_json_path_with_dots_in_field_names() {
2177        // JSON path containing dots in field names (needs proper escaping handling)
2178        // Note: json_pointer_get uses dot-separated paths, so field names with dots
2179        // are not directly supported - this tests the current behavior
2180        let document: serde_json::Value =
2181            serde_json::from_str(r#"{"field.with.dots": {"nested.key": "value"}}"#).unwrap();
2182        assert!(json_pointer_get(&document, "field.with.dots").is_none());
2183
2184        // Normal nested access works fine
2185        let normal_val: serde_json::Value =
2186            serde_json::from_str(r#"{"data": {"user.name": "alice"}}"#).unwrap();
2187        assert_eq!(
2188            json_pointer_get(&normal_val, "data"),
2189            Some(&serde_json::Value::Object(
2190                [(
2191                    "user.name".to_string(),
2192                    serde_json::Value::String("alice".into())
2193                )]
2194                .into_iter()
2195                .collect()
2196            ))
2197        );
2198    }
2199
2200    #[test]
2201    fn success_body_contains_matches_credential_itself() {
2202        // When body_contains pattern is the credential itself
2203        let credential = format!("sk_test_{}", "4242424242424242");
2204        let body = format!(r#"{{"token": "{}", "valid": true}}"#, credential);
2205
2206        let spec = SuccessSpec {
2207            status: Some(200),
2208            status_not: None,
2209            body_contains: Some(credential.into()),
2210            body_not_contains: None,
2211            json_path: None,
2212            equals: None,
2213        };
2214
2215        assert!(evaluate_success(&spec, 200, &body));
2216
2217        // Should fail if credential not in body
2218        let wrong_body = r#"{"token": "other", "valid": true}"#;
2219        assert!(!evaluate_success(&spec, 200, wrong_body));
2220    }
2221
2222    #[tokio::test]
2223    async fn consecutive_verifications_cache_poisoning_protection() {
2224        use std::sync::atomic::{AtomicUsize, Ordering};
2225
2226        let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
2227        let addr = listener.local_addr().unwrap();
2228        let request_count = Arc::new(AtomicUsize::new(0));
2229        let count_clone = request_count.clone();
2230
2231        tokio::spawn(async move {
2232            loop {
2233                let Ok((mut stream, _)) = listener.accept().await else {
2234                    break;
2235                };
2236                let count = count_clone.clone();
2237                tokio::spawn(async move {
2238                    let mut buf = [0u8; 1024];
2239                    let _ = stream.read(&mut buf).await;
2240                    count.fetch_add(1, Ordering::SeqCst);
2241                    let _ = stream
2242                        .write_all(
2243                            b"HTTP/1.1 200 OK\r\nContent-Length: 13\r\n\r\n{\"valid\": true}",
2244                        )
2245                        .await;
2246                });
2247            }
2248        });
2249
2250        let detector = DetectorSpec {
2251            id: "cache-test".into(),
2252            name: "Cache Test".into(),
2253            service: "cache-service".into(),
2254            severity: Severity::High,
2255            patterns: vec![],
2256            companion: None,
2257            verify: Some(keyhog_core::VerifySpec {
2258                method: HttpMethod::Get,
2259                url: format!("http://127.0.0.1:{}/verify", addr.port()),
2260                auth: AuthSpec::None,
2261                headers: vec![],
2262                body: None,
2263                success: SuccessSpec {
2264                    status: Some(200),
2265                    status_not: None,
2266                    body_contains: None,
2267                    body_not_contains: None,
2268                    json_path: None,
2269                    equals: None,
2270                },
2271                metadata: vec![],
2272                timeout_ms: None,
2273            }),
2274            keywords: vec![],
2275        };
2276
2277        let engine = VerificationEngine::new(
2278            &[detector],
2279            VerifyConfig {
2280                timeout: Duration::from_secs(1),
2281                max_concurrent_per_service: 50,
2282                max_concurrent_global: 50,
2283                ..Default::default()
2284            },
2285        )
2286        .unwrap();
2287
2288        let make_match = |cred: &str| RawMatch {
2289            detector_id: "cache-test".into(),
2290            detector_name: "Cache Test".into(),
2291            service: "cache-service".into(),
2292            severity: Severity::High,
2293            credential: cred.into(),
2294            companion: None,
2295            location: MatchLocation {
2296                source: "fs".into(),
2297                file_path: Some("test.txt".into()),
2298                line: Some(1),
2299                offset: 0,
2300                commit: None,
2301                author: None,
2302                date: None,
2303            },
2304            entropy: None,
2305            confidence: Some(0.9),
2306        };
2307
2308        // First verification with credential A
2309        let group_a = dedup_matches(vec![make_match("cred-a")], &DedupScope::Credential).pop().unwrap();
2310        let findings_a = engine.verify_all(vec![group_a.clone()]).await;
2311        assert_eq!(findings_a.len(), 1);
2312
2313        // Second verification with same credential A (should use cache)
2314        let findings_a2 = engine.verify_all(vec![group_a.clone()]).await;
2315        assert_eq!(findings_a2.len(), 1);
2316
2317        // Both results should be identical (cache hit)
2318        assert_eq!(
2319            std::mem::discriminant(&findings_a[0].verification),
2320            std::mem::discriminant(&findings_a2[0].verification)
2321        );
2322
2323        // Different credential B should be independent
2324        let group_b = dedup_matches(vec![make_match("cred-b")], &DedupScope::Credential).pop().unwrap();
2325        let findings_b = engine.verify_all(vec![group_b]).await;
2326        assert_eq!(findings_b.len(), 1);
2327
2328        // Cache should not have cross-contaminated results
2329        assert!(matches!(
2330            findings_a[0].verification,
2331            VerificationResult::Live | VerificationResult::Dead | VerificationResult::Error(_)
2332        ));
2333    }
2334
2335    #[test]
2336    fn verify_with_delete_method() {
2337        // Verify that DELETE method is properly supported
2338        let rt = tokio::runtime::Runtime::new().unwrap();
2339        rt.block_on(async {
2340            let client = Client::new();
2341
2342            // Build a DELETE request - should not panic
2343            let request = request_for_method(
2344                &client,
2345                &HttpMethod::Delete,
2346                reqwest::Url::parse("https://example.com/resource/123").unwrap(),
2347            );
2348
2349            // The request builder should be functional (we can't actually send without a server)
2350            let _ = request;
2351        });
2352    }
2353
2354    #[test]
2355    fn verify_url_with_ipv6_literal() {
2356        // URL with IPv6 literal address should be properly handled
2357        let ipv6_urls = vec![
2358            "http://[::1]:8080/api",
2359            "https://[2001:db8::1]/verify",
2360            "http://[fe80::1]:3000/check",
2361        ];
2362
2363        for url in ipv6_urls {
2364            // parse_url_host should extract the host correctly
2365            let host = parse_url_host(url);
2366            assert!(host.is_some(), "Failed to parse host for: {}", url);
2367
2368            let host_str = host.unwrap();
2369            // IPv6 addresses should be handled (without brackets after parsing)
2370            assert!(
2371                host_str.contains(':')
2372                    || host_str == "::1"
2373                    || host_str.starts_with("fe80")
2374                    || host_str.starts_with("2001"),
2375                "Unexpected host for {}: {}",
2376                url,
2377                host_str
2378            );
2379        }
2380
2381        // IPv6 loopback should be blocked as private
2382        assert!(is_private_url("http://[::1]/api"));
2383        assert!(is_private_url("http://[::1]:8080/verify"));
2384
2385        // IPv6 ULA should be blocked as private
2386        assert!(is_private_url("http://[fd00::1]/api"));
2387
2388        // IPv6 link-local should be blocked as private
2389        assert!(is_private_url("http://[fe80::1]/api"));
2390        assert!(is_private_url("http://[fe80::1]:3000/check"));
2391    }
2392
2393    #[test]
2394    fn body_valid_jsonl_multiple_objects() {
2395        // Body with JSONL format (multiple JSON objects, one per line)
2396        let jsonl_body = r#"{"id": 1, "valid": true}
2397{"id": 2, "valid": false}
2398{"id": 3, "valid": true}"#;
2399
2400        // body_indicates_error should handle JSONL gracefully
2401        // It looks for error indicators as JSON keys
2402        assert!(!body_indicates_error(jsonl_body));
2403
2404        // Success spec with body_contains should work on the entire body
2405        let spec = SuccessSpec {
2406            status: Some(200),
2407            status_not: None,
2408            body_contains: Some("\"valid\": true".into()),
2409            body_not_contains: None,
2410            json_path: None,
2411            equals: None,
2412        };
2413
2414        assert!(evaluate_success(&spec, 200, jsonl_body));
2415
2416        // Should fail if pattern not present
2417        let spec_missing = SuccessSpec {
2418            status: Some(200),
2419            status_not: None,
2420            body_contains: Some("not_found".into()),
2421            body_not_contains: None,
2422            json_path: None,
2423            equals: None,
2424        };
2425        assert!(!evaluate_success(&spec_missing, 200, jsonl_body));
2426
2427        // JSON path won't work because the body as a whole is not valid JSON
2428        let spec_json = SuccessSpec {
2429            status: Some(200),
2430            status_not: None,
2431            body_contains: None,
2432            body_not_contains: None,
2433            json_path: Some("id".into()),
2434            equals: None,
2435        };
2436        assert!(!evaluate_success(&spec_json, 200, jsonl_body));
2437    }
2438}