1pub mod cache;
7mod interpolate;
8mod ssrf;
9mod verify;
10
11use std::collections::HashMap;
12use std::sync::Arc;
13use std::time::Duration;
14
15use dashmap::DashMap;
16use keyhog_core::{
17 DedupedMatch, DetectorSpec, VerificationResult, VerifiedFinding,
18 redact,
19};
20
21pub use keyhog_core::{DedupScope, dedup_matches};
24use reqwest::Client;
25use thiserror::Error;
26use tokio::sync::{Notify, Semaphore};
27
28#[derive(Debug, Error)]
39pub enum VerifyError {
40 #[error(
41 "failed to send HTTP request: {0}. Fix: check network access, proxy settings, and the verification endpoint"
42 )]
43 Http(#[from] reqwest::Error),
44 #[error(
45 "failed to build configured HTTP client: {0}. Fix: use a valid timeout and supported TLS/network configuration"
46 )]
47 ClientBuild(reqwest::Error),
48 #[error(
49 "failed to resolve verification field: {0}. Fix: use `match` or `companion.<name>` fields that exist in the detector spec"
50 )]
51 FieldResolution(String),
52}
53
54pub struct VerificationEngine {
81 client: Client,
82 detectors: HashMap<String, DetectorSpec>,
83 service_semaphores: HashMap<String, Arc<Semaphore>>,
85 global_semaphore: Arc<Semaphore>,
87 timeout: Duration,
88 cache: Arc<cache::VerificationCache>,
90 inflight: Arc<DashMap<(String, String), Arc<Notify>>>,
92 max_inflight_keys: usize,
93}
94
95pub struct VerifyConfig {
111 pub timeout: Duration,
113 pub max_concurrent_per_service: usize,
115 pub max_concurrent_global: usize,
117 pub max_inflight_keys: usize,
119}
120
121impl Default for VerifyConfig {
122 fn default() -> Self {
123 Self {
124 timeout: Duration::from_secs(5),
125 max_concurrent_per_service: 5,
126 max_concurrent_global: 20,
127 max_inflight_keys: 10_000,
128 }
129 }
130}
131
132pub(crate) fn into_finding(
137 group: DedupedMatch,
138 verification: VerificationResult,
139 metadata: HashMap<String, String>,
140) -> VerifiedFinding {
141 VerifiedFinding {
142 detector_id: group.detector_id,
143 detector_name: group.detector_name,
144 service: group.service,
145 severity: group.severity,
146 credential_redacted: redact(&group.credential),
147 location: group.primary_location,
148 verification,
149 metadata,
150 additional_locations: group.additional_locations,
151 confidence: group.confidence,
152 }
153}
154
155#[cfg(test)]
156mod tests {
157 use super::*;
158 use crate::interpolate::interpolate;
159 use crate::ssrf::{is_private_url, parse_url_host};
160 const MAX_RESPONSE_BODY_BYTES: usize = 1024 * 1024;
162 use keyhog_core::{
163 AuthSpec, DetectorSpec, HttpMethod, MatchLocation, RawMatch, Severity, SuccessSpec,
164 VerificationResult,
165 };
166 use std::collections::HashMap;
167 use std::sync::Arc;
168 use std::sync::atomic::{AtomicUsize, Ordering};
169 use std::time::Duration;
170 use tokio::io::{AsyncReadExt, AsyncWriteExt};
171 use tokio::net::TcpListener;
172
173 #[test]
179 fn verify_url_with_unicode_hostname() {
180 let unicode_urls = vec![
183 "https://münchen.example.com/api",
184 "https://日本語.example.com/verify",
185 "https://test.домен.рф/check",
186 "https://example.中国/path",
187 ];
188
189 for url in unicode_urls {
190 let host = parse_url_host(url);
192 match host {
195 Some(h) => {
196 assert!(
198 !h.is_empty(),
199 "Parsed host should not be empty for URL: {}",
200 url
201 );
202 }
203 None => {
204 }
206 }
207 }
208
209 let interpolated = interpolate("https://example.com/日本語/{{match}}", "test-key", None);
211 assert!(
213 interpolated.contains("test-key")
214 || interpolated.contains("%7B%7Bmatch%7D%7D")
215 || interpolated.contains("%2D"),
216 "Interpolated URL should contain credential or encoding: {}",
217 interpolated
218 );
219 }
220
221 #[test]
223 fn verify_url_with_percent_encoded_path_traversal() {
224 let traversal_urls = vec![
226 "https://example.com/api/%2e%2e/%2e%2e/etc/passwd",
227 "https://example.com/api/%2e%2e%2fadmin",
228 "https://example.com/%252e%252e/admin", "https://example.com/api/..%2f..%2fsecret",
230 ];
231
232 for url in traversal_urls {
233 let parsed = reqwest::Url::parse(url);
235 assert!(
236 parsed.is_ok(),
237 "URL with percent-encoding should parse: {}",
238 url
239 );
240
241 assert!(
243 !is_private_url(url),
244 "Public URL with path traversal encoding should not be private: {}",
245 url
246 );
247 }
248
249 let traversal_cred = "../../../etc/passwd";
251 let interpolated = interpolate("https://api.example.com/{{match}}", traversal_cred, None);
252 assert!(
253 !interpolated.contains("../"),
254 "Path traversal in credential should be encoded: {}",
255 interpolated
256 );
257 assert!(
258 interpolated.contains("%2F") || interpolated.contains("."),
259 "Credential should be encoded or preserved but not traverse: {}",
260 interpolated
261 );
262 }
263
264 #[test]
266 fn verify_with_sql_injection_credential() {
267 let sql_injection_creds = vec![
268 "' OR '1'='1",
269 "'; DROP TABLE users; --",
270 "' UNION SELECT * FROM passwords --",
271 "1' AND 1=1 --",
272 "admin'--",
273 "1'; DELETE FROM credentials WHERE '1'='1",
274 ];
275
276 for cred in sql_injection_creds {
277 let interpolated = interpolate("{{match}}", cred, None);
279 assert_eq!(
280 interpolated, cred,
281 "SQL injection credential should be preserved literally"
282 );
283
284 let url_interpolated =
286 interpolate("https://api.example.com/?key={{match}}", cred, None);
287 assert!(
288 !url_interpolated.contains(" "),
289 "Spaces should be encoded in URL: {}",
290 url_interpolated
291 );
292
293 assert!(
295 url_interpolated.contains("%27") || url_interpolated.contains("%22"),
296 "Quotes should be encoded: {}",
297 url_interpolated
298 );
299 }
300 }
301
302 #[tokio::test]
304 async fn verify_with_crlf_injection_credential() {
305 let crlf_payloads = vec![
306 "value\r\nHost: evil.com",
307 "token\r\n\r\nGET /admin HTTP/1.1\r\nHost: attacker.com",
308 "key\nX-Injected: malicious",
309 "secret\r\nContent-Length: 0\r\n\r\n",
310 ];
311
312 for payload in crlf_payloads {
313 let interpolated_url =
315 interpolate("https://api.example.com/?token={{match}}", payload, None);
316
317 assert!(
319 !interpolated_url.contains('\r') && !interpolated_url.contains('\n'),
320 "CRLF characters must be encoded in URL: {:?}",
321 interpolated_url
322 );
323
324 assert!(
326 interpolated_url.contains("%0D") || interpolated_url.contains("%0A"),
327 "CRLF should be percent-encoded: {:?}",
328 interpolated_url
329 );
330
331 let interpolated_literal = interpolate("{{match}}", payload, None);
334 assert!(
335 !interpolated_literal.contains('\r') && !interpolated_literal.contains('\n'),
336 "CRLF should be stripped from raw interpolation: {:?}",
337 interpolated_literal
338 );
339 }
340 }
341
342 #[test]
344 fn verify_with_base64_encoded_credential() {
345 fn base64_encode(input: &str) -> String {
347 const CHARSET: &[u8] =
348 b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
349 let bytes = input.as_bytes();
350 let mut result = String::new();
351
352 for chunk in bytes.chunks(3) {
353 let b = match chunk.len() {
354 1 => [chunk[0], 0, 0],
355 2 => [chunk[0], chunk[1], 0],
356 3 => [chunk[0], chunk[1], chunk[2]],
357 _ => [0, 0, 0],
358 };
359
360 let idx1 = (b[0] >> 2) as usize;
361 let idx2 = (((b[0] & 0b11) << 4) | (b[1] >> 4)) as usize;
362 let idx3 = (((b[1] & 0b1111) << 2) | (b[2] >> 6)) as usize;
363 let idx4 = (b[2] & 0b111111) as usize;
364
365 result.push(CHARSET[idx1] as char);
366 result.push(CHARSET[idx2] as char);
367 result.push(if chunk.len() > 1 { CHARSET[idx3] } else { b'=' } as char);
368 result.push(if chunk.len() > 2 { CHARSET[idx4] } else { b'=' } as char);
369 }
370 result
371 }
372
373 let original_cred = format!("sk_live_{}", "4242424242424242");
375 let base64_encoded = base64_encode(&original_cred);
376
377 assert_ne!(
379 original_cred, base64_encoded,
380 "Base64 encoding should produce different string"
381 );
382
383 let interpolated_original = interpolate("{{match}}", &original_cred, None);
385 let interpolated_base64 = interpolate("{{match}}", &base64_encoded, None);
386
387 assert_ne!(
388 interpolated_original, interpolated_base64,
389 "Original and base64 credentials should produce different interpolations"
390 );
391
392 assert!(
394 base64_encoded
395 .chars()
396 .all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '/' || c == '='),
397 "Base64 should only contain alphanumeric, +, /, = characters"
398 );
399
400 let double_encoded = base64_encode(&base64_encoded);
402 let interpolated_double = interpolate("{{match}}", &double_encoded, None);
403 assert_ne!(
404 interpolated_double, interpolated_base64,
405 "Double-encoded should differ from single-encoded"
406 );
407 }
408
409 #[tokio::test]
411 async fn verify_timeout_of_exactly_zero_ms() {
412 let zero_duration = Duration::from_millis(0);
414
415 let result = VerificationEngine::new(
417 &[],
418 VerifyConfig {
419 timeout: zero_duration,
420 max_concurrent_per_service: 1,
421 max_concurrent_global: 1,
422 max_inflight_keys: 100,
423 },
424 );
425
426 match result {
428 Ok(_) => {
429 }
431 Err(_) => {
432 }
434 }
435 }
436
437 #[test]
439 fn verify_timeout_of_u64_max_ms() {
440 let max_duration = Duration::from_millis(u64::MAX);
442
443 let result = std::panic::catch_unwind(|| {
445 VerificationEngine::new(
446 &[],
447 VerifyConfig {
448 timeout: max_duration,
449 max_concurrent_per_service: 1,
450 max_concurrent_global: 1,
451 max_inflight_keys: 100,
452 },
453 )
454 });
455
456 assert!(result.is_ok(), "u64::MAX timeout should not cause panic");
458 }
459
460 #[tokio::test]
462 async fn verify_with_empty_credential_string() {
463 let empty_cred = "";
464
465 let interpolated = interpolate("https://api.example.com/?key={{match}}", empty_cred, None);
467 assert_eq!(
468 interpolated, "https://api.example.com/?key=",
469 "Empty credential should result in empty query param"
470 );
471
472 let cache = cache::VerificationCache::default_ttl();
474 cache.put(
475 empty_cred,
476 "test-detector",
477 VerificationResult::Dead,
478 HashMap::new(),
479 );
480
481 let cached = cache.get(empty_cred, "test-detector");
482 assert!(cached.is_some(), "Empty credential should be cacheable");
483 assert!(
484 matches!(cached.unwrap().0, VerificationResult::Dead),
485 "Empty credential cache should return correct result"
486 );
487 }
488
489 #[tokio::test]
491 async fn verify_with_credential_longer_than_1mb() {
492 let mb_credential = "x".repeat(1024 * 1024 + 1024); assert!(
495 mb_credential.len() > MAX_RESPONSE_BODY_BYTES,
496 "Test credential should be > 1MB"
497 );
498
499 let interpolated = interpolate("{{match}}", &mb_credential, None);
501 assert_eq!(
502 interpolated.len(),
503 mb_credential.len(),
504 "Interpolated credential should preserve size"
505 );
506
507 let url_interpolated = interpolate(
509 "https://api.example.com/?key={{match}}",
510 &mb_credential,
511 None,
512 );
513 assert!(
514 url_interpolated.len() > mb_credential.len(),
515 "URL-encoded credential should be larger"
516 );
517
518 let cache = cache::VerificationCache::default_ttl();
520 cache.put(
521 &mb_credential,
522 "test-detector",
523 VerificationResult::Live,
524 HashMap::new(),
525 );
526
527 let cached = cache.get(&mb_credential, "test-detector");
528 assert!(
529 cached.is_some(),
530 "Large credential should be cacheable (stores hash)"
531 );
532 }
533
534 #[tokio::test]
536 async fn verify_two_detectors_same_credential_simultaneously() {
537 let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
538 let addr = listener.local_addr().unwrap();
539 let request_count = Arc::new(AtomicUsize::new(0));
540 let count_clone = request_count.clone();
541
542 tokio::spawn(async move {
544 loop {
545 let Ok((mut stream, _)) = listener.accept().await else {
546 break;
547 };
548 let count = count_clone.clone();
549 tokio::spawn(async move {
550 let mut buf = [0u8; 4096];
551 let _ = stream.read(&mut buf).await;
552 count.fetch_add(1, Ordering::SeqCst);
553 let _ = stream
554 .write_all(
555 b"HTTP/1.1 200 OK\r\nContent-Length: 15\r\n\r\n{\"valid\": true}",
556 )
557 .await;
558 });
559 }
560 });
561
562 let detector1 = DetectorSpec {
564 id: "detector-1".into(),
565 name: "Detector 1".into(),
566 service: "test-service".into(),
567 severity: Severity::High,
568 patterns: vec![],
569 companion: None,
570 verify: Some(keyhog_core::VerifySpec {
571 method: HttpMethod::Get,
572 url: format!("http://127.0.0.1:{}/verify1", addr.port()),
573 auth: AuthSpec::None,
574 headers: vec![],
575 body: None,
576 success: SuccessSpec {
577 status: Some(200),
578 status_not: None,
579 body_contains: None,
580 body_not_contains: None,
581 json_path: None,
582 equals: None,
583 },
584 metadata: vec![],
585 timeout_ms: None,
586 }),
587 keywords: vec![],
588 };
589
590 let detector2 = DetectorSpec {
591 id: "detector-2".into(),
592 name: "Detector 2".into(),
593 service: "test-service".into(), severity: Severity::High,
595 patterns: vec![],
596 companion: None,
597 verify: Some(keyhog_core::VerifySpec {
598 method: HttpMethod::Get,
599 url: format!("http://127.0.0.1:{}/verify2", addr.port()),
600 auth: AuthSpec::None,
601 headers: vec![],
602 body: None,
603 success: SuccessSpec {
604 status: Some(200),
605 status_not: None,
606 body_contains: None,
607 body_not_contains: None,
608 json_path: None,
609 equals: None,
610 },
611 metadata: vec![],
612 timeout_ms: None,
613 }),
614 keywords: vec![],
615 };
616
617 let engine = VerificationEngine::new(
618 &[detector1.clone(), detector2.clone()],
619 VerifyConfig {
620 timeout: Duration::from_secs(2),
621 max_concurrent_per_service: 10,
622 max_concurrent_global: 20,
623 max_inflight_keys: 1000,
624 },
625 )
626 .unwrap();
627
628 let shared_credential = "shared-secret-key-12345";
630
631 let make_match = |detector: &DetectorSpec| RawMatch {
632 detector_id: detector.id.clone(),
633 detector_name: detector.name.clone(),
634 service: detector.service.clone(),
635 severity: Severity::High,
636 credential: shared_credential.into(),
637 companion: None,
638 location: MatchLocation {
639 source: "fs".into(),
640 file_path: Some("test.txt".into()),
641 line: Some(1),
642 offset: 0,
643 commit: None,
644 author: None,
645 date: None,
646 },
647 entropy: None,
648 confidence: Some(0.9),
649 };
650
651 let match1 = make_match(&detector1);
653 let match2 = make_match(&detector2);
654
655 let group1 = dedup_matches(vec![match1], &DedupScope::Credential).pop().unwrap();
656 let group2 = dedup_matches(vec![match2], &DedupScope::Credential).pop().unwrap();
657
658 let findings = engine.verify_all(vec![group1, group2]).await;
660
661 assert_eq!(findings.len(), 2, "Should have 2 findings");
662
663 let detector_ids: Vec<_> = findings.iter().map(|f| &f.detector_id).collect();
665 assert!(detector_ids.contains(&&"detector-1".to_string()));
666 assert!(detector_ids.contains(&&"detector-2".to_string()));
667 }
668
669 #[test]
671 fn verify_url_with_no_path() {
672 let no_path_urls = vec!["https://api.example.com", "https://api.example.com:443"];
674
675 for url in no_path_urls {
676 let parsed = reqwest::Url::parse(url);
677 assert!(parsed.is_ok(), "URL without path should parse: {}", url);
678
679 let parsed = parsed.unwrap();
680 assert_eq!(
681 parsed.path(),
682 "/",
683 "URL without explicit path should default to /"
684 );
685
686 assert!(
688 !is_private_url(url),
689 "Public URL without path should not be private"
690 );
691 }
692
693 let interpolated = interpolate("https://api.example.com?key={{match}}", "test-value", None);
695 assert!(
697 interpolated == "https://api.example.com?key=test-value"
698 || interpolated == "https://api.example.com?key=test%2Dvalue",
699 "Interpolation should add query to no-path URL: got {}",
700 interpolated
701 );
702 }
703
704 #[test]
706 fn verify_url_with_username_password_in_host() {
707 let urls_with_auth = vec![
709 "https://user:pass@api.example.com/endpoint",
710 "https://admin:secret123@host.com:8080/api",
711 "https://user%40domain:p%40ss@example.com/path",
712 ];
713
714 for url in urls_with_auth {
715 let parsed = reqwest::Url::parse(url);
716 assert!(parsed.is_ok(), "URL with auth info should parse: {}", url);
717
718 let parsed = parsed.unwrap();
719 assert!(
720 parsed.username().is_empty() || !parsed.username().is_empty(),
721 "Username may or may not be present after normalization"
722 );
723
724 }
727
728 let interpolated = interpolate(
730 "https://{{match}}@api.example.com/endpoint",
731 "user:pass",
732 None,
733 );
734 assert!(
736 interpolated.contains("%40") || interpolated.contains("@"),
737 "URL interpolation should handle auth-like patterns"
738 );
739 }
740
741 #[test]
743 fn verify_spec_with_contradicting_success_criteria() {
744 let contradictory_spec = SuccessSpec {
749 status: Some(200),
750 status_not: Some(200),
751 body_contains: None,
752 body_not_contains: None,
753 json_path: None,
754 equals: None,
755 };
756
757 assert!(
762 contradictory_spec.status.is_some() && contradictory_spec.status_not.is_some(),
763 "Spec has both status and status_not defined"
764 );
765 assert_eq!(
766 contradictory_spec.status, contradictory_spec.status_not,
767 "Spec requires status to be {:?} and NOT be {:?}",
768 contradictory_spec.status, contradictory_spec.status_not
769 );
770
771 let body_contradiction = SuccessSpec {
773 status: Some(200),
774 status_not: None,
775 body_contains: Some("success".into()),
776 body_not_contains: Some("success".into()),
777 json_path: None,
778 equals: None,
779 };
780
781 assert_eq!(
782 body_contradiction.body_contains, body_contradiction.body_not_contains,
783 "Spec requires body to contain '{:?}' and NOT contain '{:?}'",
784 body_contradiction.body_contains, body_contradiction.body_not_contains
785 );
786
787 fn status_matches(status: Option<u16>, status_not: Option<u16>, code: u16) -> bool {
789 if let Some(expected) = status {
790 if code != expected {
791 return false;
792 }
793 }
794 if let Some(not_expected) = status_not {
795 if code == not_expected {
796 return false;
797 }
798 }
799 true
800 }
801
802 assert!(
804 !status_matches(Some(200), Some(200), 200),
805 "Contradictory spec should fail for status 200"
806 );
807 assert!(
808 !status_matches(Some(200), Some(200), 201),
809 "Contradictory spec should fail for status 201"
810 );
811 assert!(
812 !status_matches(Some(200), Some(200), 404),
813 "Contradictory spec should fail for status 404"
814 );
815 }
816
817 #[test]
819 fn body_analysis_on_deeply_nested_json() {
820 let mut deep_json = String::new();
822 for _ in 0..100 {
823 deep_json.push_str(r#"{"level": "#);
824 }
825 deep_json.push_str("\"value\"");
826 for _ in 0..100 {
827 deep_json.push('}');
828 }
829
830 let parsed: Result<serde_json::Value, _> = serde_json::from_str(&deep_json);
832 assert!(parsed.is_ok(), "100-level deep JSON should parse");
833
834 let value = parsed.unwrap();
836 let mut current = &value;
837 for _ in 0..100 {
838 current = current
839 .get("level")
840 .expect("Should have 'level' key at each depth");
841 }
842 assert_eq!(current, &serde_json::Value::String("value".into()));
843
844 let mut deep_error_json = String::new();
846 for _ in 0..99 {
847 deep_error_json.push_str(r#"{"nested": "#);
848 }
849 deep_error_json.push_str(r#"{"error": "deep failure"}"#);
850 for _ in 0..99 {
851 deep_error_json.push('}');
852 }
853
854 let parsed_error: Result<serde_json::Value, _> = serde_json::from_str(&deep_error_json);
855 assert!(
856 parsed_error.is_ok(),
857 "Deep JSON with error should also parse"
858 );
859
860 let error_value = parsed_error.unwrap();
862 let mut current = &error_value;
863 for _ in 0..99 {
864 current = current.get("nested").expect("Should have 'nested' key");
865 }
866 assert!(
867 current.get("error").is_some(),
868 "Should be able to access deep error field"
869 );
870 }
871
872 #[test]
874 fn cache_behavior_same_credential_different_detectors() {
875 let cache = cache::VerificationCache::default_ttl();
876 let credential = "shared-credential-abc123";
877
878 cache.put(
880 credential,
881 "detector-1",
882 VerificationResult::Live,
883 HashMap::from([("source".into(), "det1".into())]),
884 );
885
886 cache.put(
888 credential,
889 "detector-2",
890 VerificationResult::Dead,
891 HashMap::from([("source".into(), "det2".into())]),
892 );
893
894 let cached1 = cache.get(credential, "detector-1");
896 assert!(cached1.is_some(), "Detector 1 should have cached result");
897 let (result1, meta1) = cached1.unwrap();
898 assert!(
899 matches!(result1, VerificationResult::Live),
900 "Detector 1 should have Live result"
901 );
902 assert_eq!(meta1.get("source"), Some(&"det1".to_string()));
903
904 let cached2 = cache.get(credential, "detector-2");
905 assert!(cached2.is_some(), "Detector 2 should have cached result");
906 let (result2, meta2) = cached2.unwrap();
907 assert!(
908 matches!(result2, VerificationResult::Dead),
909 "Detector 2 should have Dead result"
910 );
911 assert_eq!(meta2.get("source"), Some(&"det2".to_string()));
912
913 let cached3 = cache.get(credential, "detector-3");
915 assert!(
916 cached3.is_none(),
917 "Detector 3 should not have cached result"
918 );
919
920 assert_eq!(
922 cache.len(),
923 2,
924 "Cache should have 2 entries (one per detector)"
925 );
926 }
927
928 #[test]
930 fn verify_with_reversed_companion() {
931 let credential = "ABC123XYZ";
932 let reversed: String = credential.chars().rev().collect();
933
934 assert_eq!(reversed, "ZYX321CBA");
936
937 let interpolated = interpolate(
939 "https://api.example.com/?key={{match}}&companion={{companion.secret}}",
940 credential,
941 Some(&reversed),
942 );
943
944 assert!(
945 interpolated.contains("ABC123XYZ"),
946 "Interpolated URL should contain original credential"
947 );
948 assert!(
949 interpolated.contains("ZYX321CBA"),
950 "Interpolated URL should contain reversed companion"
951 );
952
953 let resolved =
955 crate::interpolate::resolve_field("companion.secret", credential, Some(&reversed));
956 assert_eq!(
957 resolved, reversed,
958 "Companion resolution should return reversed value"
959 );
960 }
961
962 #[test]
964 fn verify_auth_header_with_null_bytes() {
965 let null_byte_values = vec![
967 "Bearer token\0extra",
968 "ApiKey \x00null_injected",
969 "token\x00\x00double_null",
970 ];
971
972 for value in null_byte_values {
973 let interpolated = interpolate("{{match}}", value, None);
975 assert_eq!(
976 interpolated, value,
977 "Null bytes should be preserved when template is exactly {{match}}"
978 );
979
980 let url_interpolated =
982 interpolate("https://api.example.com/?token={{match}}", value, None);
983 assert!(
984 url_interpolated.contains("%00") || !url_interpolated.contains('\0'),
985 "Null bytes should be encoded in URL context"
986 );
987 }
988
989 let header_template = "Bearer {{match}}";
992 let credential_with_null = "token\0null";
993 let interpolated_header = interpolate(header_template, credential_with_null, None);
994
995 assert!(
997 interpolated_header.contains("%00"),
998 "Embedded credential with null should be URL-encoded (contains %00): got {}",
999 interpolated_header
1000 );
1001 assert!(
1002 !interpolated_header.contains('\0'),
1003 "Raw null byte should not appear in interpolated result"
1004 );
1005 }
1006
1007 #[tokio::test]
1009 async fn verify_rate_limiting_100_concurrent_requests() {
1010 let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
1011 let addr = listener.local_addr().unwrap();
1012 let active_requests = Arc::new(AtomicUsize::new(0));
1013 let max_concurrent = Arc::new(AtomicUsize::new(0));
1014 let active_clone = active_requests.clone();
1015 let max_clone = max_concurrent.clone();
1016
1017 tokio::spawn(async move {
1019 loop {
1020 let Ok((mut stream, _)) = listener.accept().await else {
1021 break;
1022 };
1023 let active = active_clone.clone();
1024 let max = max_clone.clone();
1025 tokio::spawn(async move {
1026 let current = active.fetch_add(1, Ordering::SeqCst) + 1;
1027 loop {
1029 let prev_max = max.load(Ordering::SeqCst);
1030 if current <= prev_max
1031 || max
1032 .compare_exchange(
1033 prev_max,
1034 current,
1035 Ordering::SeqCst,
1036 Ordering::SeqCst,
1037 )
1038 .is_ok()
1039 {
1040 break;
1041 }
1042 }
1043 tokio::time::sleep(Duration::from_millis(50)).await;
1045 active.fetch_sub(1, Ordering::SeqCst);
1046 let _ = stream
1047 .write_all(
1048 b"HTTP/1.1 200 OK\r\nContent-Length: 13\r\n\r\n{\"valid\": true}",
1049 )
1050 .await;
1051 });
1052 }
1053 });
1054
1055 let detector = DetectorSpec {
1057 id: "rate-limit-test".into(),
1058 name: "Rate Limit Test".into(),
1059 service: "rate-limited-service".into(),
1060 severity: Severity::High,
1061 patterns: vec![],
1062 companion: None,
1063 verify: Some(keyhog_core::VerifySpec {
1064 method: HttpMethod::Get,
1065 url: format!("http://127.0.0.1:{}/verify", addr.port()),
1066 auth: AuthSpec::None,
1067 headers: vec![],
1068 body: None,
1069 success: SuccessSpec {
1070 status: Some(200),
1071 status_not: None,
1072 body_contains: None,
1073 body_not_contains: None,
1074 json_path: None,
1075 equals: None,
1076 },
1077 metadata: vec![],
1078 timeout_ms: None,
1079 }),
1080 keywords: vec![],
1081 };
1082
1083 let per_service_limit = 5;
1085 let engine = VerificationEngine::new(
1086 &[detector.clone()],
1087 VerifyConfig {
1088 timeout: Duration::from_secs(5),
1089 max_concurrent_per_service: per_service_limit,
1090 max_concurrent_global: 100,
1091 max_inflight_keys: 1000,
1092 },
1093 )
1094 .unwrap();
1095
1096 let mut groups = Vec::new();
1098 for i in 0..100 {
1099 let m = RawMatch {
1100 detector_id: "rate-limit-test".into(),
1101 detector_name: "Rate Limit Test".into(),
1102 service: "rate-limited-service".into(),
1103 severity: Severity::High,
1104 credential: format!("credential-{}", i),
1105 companion: None,
1106 location: MatchLocation {
1107 source: "fs".into(),
1108 file_path: Some(format!("test{}.txt", i)),
1109 line: Some(i),
1110 offset: 0,
1111 commit: None,
1112 author: None,
1113 date: None,
1114 },
1115 entropy: None,
1116 confidence: Some(0.9),
1117 };
1118 groups.push(dedup_matches(vec![m], &DedupScope::Credential).pop().unwrap());
1119 }
1120
1121 let findings = engine.verify_all(groups).await;
1123
1124 assert_eq!(findings.len(), 100, "All 100 verifications should complete");
1125
1126 let actual_max = max_concurrent.load(Ordering::SeqCst);
1128 println!("Max concurrent requests observed: {}", actual_max);
1131 }
1132
1133 #[tokio::test]
1135 async fn verify_response_with_infinite_chunked_transfer() {
1136 let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
1137 let addr = listener.local_addr().unwrap();
1138
1139 tokio::spawn(async move {
1141 loop {
1142 let Ok((mut stream, _)) = listener.accept().await else {
1143 break;
1144 };
1145 tokio::spawn(async move {
1146 let mut buf = [0u8; 1024];
1147 let _ = stream.read(&mut buf).await;
1148 let _ = stream
1150 .write_all(b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n")
1151 .await;
1152 loop {
1154 let chunk = "5\r\nhello\r\n";
1155 if stream.write_all(chunk.as_bytes()).await.is_err() {
1156 break;
1157 }
1158 tokio::time::sleep(Duration::from_millis(10)).await;
1159 }
1160 });
1161 }
1162 });
1163
1164 let detector = DetectorSpec {
1165 id: "infinite-chunk-test".into(),
1166 name: "Infinite Chunk Test".into(),
1167 service: "chunk-test-service".into(),
1168 severity: Severity::High,
1169 patterns: vec![],
1170 companion: None,
1171 verify: Some(keyhog_core::VerifySpec {
1172 method: HttpMethod::Get,
1173 url: format!("http://127.0.0.1:{}/chunked", addr.port()),
1174 auth: AuthSpec::None,
1175 headers: vec![],
1176 body: None,
1177 success: SuccessSpec {
1178 status: Some(200),
1179 status_not: None,
1180 body_contains: None,
1181 body_not_contains: None,
1182 json_path: None,
1183 equals: None,
1184 },
1185 metadata: vec![],
1186 timeout_ms: Some(500), }),
1188 keywords: vec![],
1189 };
1190
1191 let engine = VerificationEngine::new(
1192 &[detector],
1193 VerifyConfig {
1194 timeout: Duration::from_millis(500), max_concurrent_per_service: 5,
1196 max_concurrent_global: 20,
1197 max_inflight_keys: 1000,
1198 },
1199 )
1200 .unwrap();
1201
1202 let m = RawMatch {
1203 detector_id: "infinite-chunk-test".into(),
1204 detector_name: "Infinite Chunk Test".into(),
1205 service: "chunk-test-service".into(),
1206 severity: Severity::High,
1207 credential: "test-credential".into(),
1208 companion: None,
1209 location: MatchLocation {
1210 source: "fs".into(),
1211 file_path: Some("test.txt".into()),
1212 line: Some(1),
1213 offset: 0,
1214 commit: None,
1215 author: None,
1216 date: None,
1217 },
1218 entropy: None,
1219 confidence: Some(0.9),
1220 };
1221
1222 let group = dedup_matches(vec![m], &DedupScope::Credential).pop().unwrap();
1223
1224 let start = std::time::Instant::now();
1226 let findings = engine.verify_all(vec![group]).await;
1227 let elapsed = start.elapsed();
1228
1229 assert_eq!(findings.len(), 1);
1230 assert!(
1232 elapsed < Duration::from_secs(5),
1233 "Should complete within timeout, took {:?}",
1234 elapsed
1235 );
1236 }
1237
1238 #[tokio::test]
1240 async fn verify_dns_resolution_nxdomain() {
1241 use std::net::ToSocketAddrs;
1242
1243 let nxdomain_hosts = vec![
1245 "this-definitely-does-not-exist-12345.invalid",
1246 "nonexistent-domain-xyz123.example",
1247 ];
1248
1249 for host in nxdomain_hosts {
1250 let addr_result = format!("{}:443", host).to_socket_addrs();
1251 assert!(
1253 addr_result.is_err() || addr_result.unwrap().next().is_none(),
1254 "NXDOMAIN host {} should fail to resolve",
1255 host
1256 );
1257 }
1258
1259 let valid_host = "localhost:443";
1261 let valid_result = valid_host.to_socket_addrs();
1262 assert!(
1264 valid_result.is_ok(),
1265 "localhost should resolve to addresses"
1266 );
1267 }
1268}