1pub mod cache;
7mod interpolate;
8mod ssrf;
9mod verify;
10
11use std::collections::HashMap;
12use std::sync::Arc;
13use std::time::Duration;
14
15use dashmap::DashMap;
16use keyhog_core::{
17 DetectorSpec, MatchLocation, RawMatch, VerificationResult, VerifiedFinding, redact,
18};
19use reqwest::Client;
20use thiserror::Error;
21use tokio::sync::{Notify, Semaphore};
22
23#[derive(Debug, Error)]
34pub enum VerifyError {
35 #[error(
36 "failed to send HTTP request: {0}. Fix: check network access, proxy settings, and the verification endpoint"
37 )]
38 Http(#[from] reqwest::Error),
39 #[error(
40 "failed to build configured HTTP client: {0}. Fix: use a valid timeout and supported TLS/network configuration"
41 )]
42 ClientBuild(reqwest::Error),
43 #[error(
44 "failed to resolve verification field: {0}. Fix: use `match` or `companion.<name>` fields that exist in the detector spec"
45 )]
46 FieldResolution(String),
47}
48
49pub struct VerificationEngine {
76 client: Client,
77 detectors: HashMap<String, DetectorSpec>,
78 service_semaphores: HashMap<String, Arc<Semaphore>>,
80 global_semaphore: Arc<Semaphore>,
82 timeout: Duration,
83 cache: Arc<cache::VerificationCache>,
85 inflight: Arc<DashMap<(String, String), Arc<Notify>>>,
87 max_inflight_keys: usize,
88}
89
90pub struct VerifyConfig {
106 pub timeout: Duration,
108 pub max_concurrent_per_service: usize,
110 pub max_concurrent_global: usize,
112 pub max_inflight_keys: usize,
114}
115
116impl Default for VerifyConfig {
117 fn default() -> Self {
118 Self {
119 timeout: Duration::from_secs(5),
120 max_concurrent_per_service: 5,
121 max_concurrent_global: 20,
122 max_inflight_keys: 10_000,
123 }
124 }
125}
126
127#[derive(Clone)]
159pub struct DedupedMatch {
160 pub detector_id: String,
162 pub detector_name: String,
164 pub service: String,
166 pub severity: keyhog_core::Severity,
168 pub credential: String,
170 pub companion: Option<String>,
172 pub primary_location: MatchLocation,
174 pub additional_locations: Vec<MatchLocation>,
176
177 pub confidence: Option<f64>,
179}
180
181impl DedupedMatch {
182 fn into_finding(
186 self,
187 verification: VerificationResult,
188 metadata: HashMap<String, String>,
189 ) -> VerifiedFinding {
190 VerifiedFinding {
191 detector_id: self.detector_id,
192 detector_name: self.detector_name,
193 service: self.service,
194 severity: self.severity,
195 credential_redacted: redact(&self.credential),
196 location: self.primary_location,
197 verification,
198 metadata,
199 additional_locations: self.additional_locations,
200 confidence: self.confidence,
201 }
202 }
203}
204
205pub fn dedup_matches(matches: Vec<RawMatch>) -> Vec<DedupedMatch> {
236 let mut groups: HashMap<(String, String), DedupedMatch> = HashMap::new();
237
238 for m in matches {
239 let key = m.deduplication_key();
240 match groups.get_mut(&key) {
241 Some(existing) => {
242 existing.additional_locations.push(m.location);
243 if existing.companion.is_none() && m.companion.is_some() {
245 existing.companion = m.companion;
246 }
247 }
248 None => {
249 groups.insert(
250 key,
251 DedupedMatch {
252 detector_id: m.detector_id,
253 detector_name: m.detector_name,
254 service: m.service,
255 severity: m.severity,
256 credential: m.credential,
257 companion: m.companion,
258 primary_location: m.location,
259 additional_locations: Vec::new(),
260 confidence: m.confidence,
261 },
262 );
263 }
264 }
265 }
266
267 groups.into_values().collect()
268}
269
270#[cfg(test)]
271mod tests {
272 use super::*;
273 use crate::interpolate::interpolate;
274 use crate::ssrf::{is_private_url, parse_url_host};
275 const MAX_RESPONSE_BODY_BYTES: usize = 1024 * 1024;
277 use keyhog_core::{
278 AuthSpec, DetectorSpec, HttpMethod, MatchLocation, RawMatch, Severity, SuccessSpec,
279 VerificationResult,
280 };
281 use std::collections::HashMap;
282 use std::sync::Arc;
283 use std::sync::atomic::{AtomicUsize, Ordering};
284 use std::time::Duration;
285 use tokio::io::{AsyncReadExt, AsyncWriteExt};
286 use tokio::net::TcpListener;
287
288 #[test]
294 fn verify_url_with_unicode_hostname() {
295 let unicode_urls = vec![
298 "https://münchen.example.com/api",
299 "https://日本語.example.com/verify",
300 "https://test.домен.рф/check",
301 "https://example.中国/path",
302 ];
303
304 for url in unicode_urls {
305 let host = parse_url_host(url);
307 match host {
310 Some(h) => {
311 assert!(
313 !h.is_empty(),
314 "Parsed host should not be empty for URL: {}",
315 url
316 );
317 }
318 None => {
319 }
321 }
322 }
323
324 let interpolated = interpolate("https://example.com/日本語/{{match}}", "test-key", None);
326 assert!(
328 interpolated.contains("test-key")
329 || interpolated.contains("%7B%7Bmatch%7D%7D")
330 || interpolated.contains("%2D"),
331 "Interpolated URL should contain credential or encoding: {}",
332 interpolated
333 );
334 }
335
336 #[test]
338 fn verify_url_with_percent_encoded_path_traversal() {
339 let traversal_urls = vec![
341 "https://example.com/api/%2e%2e/%2e%2e/etc/passwd",
342 "https://example.com/api/%2e%2e%2fadmin",
343 "https://example.com/%252e%252e/admin", "https://example.com/api/..%2f..%2fsecret",
345 ];
346
347 for url in traversal_urls {
348 let parsed = reqwest::Url::parse(url);
350 assert!(
351 parsed.is_ok(),
352 "URL with percent-encoding should parse: {}",
353 url
354 );
355
356 assert!(
358 !is_private_url(url),
359 "Public URL with path traversal encoding should not be private: {}",
360 url
361 );
362 }
363
364 let traversal_cred = "../../../etc/passwd";
366 let interpolated = interpolate("https://api.example.com/{{match}}", traversal_cred, None);
367 assert!(
368 !interpolated.contains("../"),
369 "Path traversal in credential should be encoded: {}",
370 interpolated
371 );
372 assert!(
373 interpolated.contains("%2F") || interpolated.contains("."),
374 "Credential should be encoded or preserved but not traverse: {}",
375 interpolated
376 );
377 }
378
379 #[test]
381 fn verify_with_sql_injection_credential() {
382 let sql_injection_creds = vec![
383 "' OR '1'='1",
384 "'; DROP TABLE users; --",
385 "' UNION SELECT * FROM passwords --",
386 "1' AND 1=1 --",
387 "admin'--",
388 "1'; DELETE FROM credentials WHERE '1'='1",
389 ];
390
391 for cred in sql_injection_creds {
392 let interpolated = interpolate("{{match}}", cred, None);
394 assert_eq!(
395 interpolated, cred,
396 "SQL injection credential should be preserved literally"
397 );
398
399 let url_interpolated =
401 interpolate("https://api.example.com/?key={{match}}", cred, None);
402 assert!(
403 !url_interpolated.contains(" "),
404 "Spaces should be encoded in URL: {}",
405 url_interpolated
406 );
407
408 assert!(
410 url_interpolated.contains("%27") || url_interpolated.contains("%22"),
411 "Quotes should be encoded: {}",
412 url_interpolated
413 );
414 }
415 }
416
417 #[tokio::test]
419 async fn verify_with_crlf_injection_credential() {
420 let crlf_payloads = vec![
421 "value\r\nHost: evil.com",
422 "token\r\n\r\nGET /admin HTTP/1.1\r\nHost: attacker.com",
423 "key\nX-Injected: malicious",
424 "secret\r\nContent-Length: 0\r\n\r\n",
425 ];
426
427 for payload in crlf_payloads {
428 let interpolated_url =
430 interpolate("https://api.example.com/?token={{match}}", payload, None);
431
432 assert!(
434 !interpolated_url.contains('\r') && !interpolated_url.contains('\n'),
435 "CRLF characters must be encoded in URL: {:?}",
436 interpolated_url
437 );
438
439 assert!(
441 interpolated_url.contains("%0D") || interpolated_url.contains("%0A"),
442 "CRLF should be percent-encoded: {:?}",
443 interpolated_url
444 );
445
446 let interpolated_literal = interpolate("{{match}}", payload, None);
449 assert!(
450 !interpolated_literal.contains('\r') && !interpolated_literal.contains('\n'),
451 "CRLF should be stripped from raw interpolation: {:?}",
452 interpolated_literal
453 );
454 }
455 }
456
457 #[test]
459 fn verify_with_base64_encoded_credential() {
460 fn base64_encode(input: &str) -> String {
462 const CHARSET: &[u8] =
463 b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
464 let bytes = input.as_bytes();
465 let mut result = String::new();
466
467 for chunk in bytes.chunks(3) {
468 let b = match chunk.len() {
469 1 => [chunk[0], 0, 0],
470 2 => [chunk[0], chunk[1], 0],
471 3 => [chunk[0], chunk[1], chunk[2]],
472 _ => [0, 0, 0],
473 };
474
475 let idx1 = (b[0] >> 2) as usize;
476 let idx2 = (((b[0] & 0b11) << 4) | (b[1] >> 4)) as usize;
477 let idx3 = (((b[1] & 0b1111) << 2) | (b[2] >> 6)) as usize;
478 let idx4 = (b[2] & 0b111111) as usize;
479
480 result.push(CHARSET[idx1] as char);
481 result.push(CHARSET[idx2] as char);
482 result.push(if chunk.len() > 1 { CHARSET[idx3] } else { b'=' } as char);
483 result.push(if chunk.len() > 2 { CHARSET[idx4] } else { b'=' } as char);
484 }
485 result
486 }
487
488 let original_cred = "sk_live_4242424242424242";
490 let base64_encoded = base64_encode(original_cred);
491
492 assert_ne!(
494 original_cred, base64_encoded,
495 "Base64 encoding should produce different string"
496 );
497
498 let interpolated_original = interpolate("{{match}}", original_cred, None);
500 let interpolated_base64 = interpolate("{{match}}", &base64_encoded, None);
501
502 assert_ne!(
503 interpolated_original, interpolated_base64,
504 "Original and base64 credentials should produce different interpolations"
505 );
506
507 assert!(
509 base64_encoded
510 .chars()
511 .all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '/' || c == '='),
512 "Base64 should only contain alphanumeric, +, /, = characters"
513 );
514
515 let double_encoded = base64_encode(&base64_encoded);
517 let interpolated_double = interpolate("{{match}}", &double_encoded, None);
518 assert_ne!(
519 interpolated_double, interpolated_base64,
520 "Double-encoded should differ from single-encoded"
521 );
522 }
523
524 #[tokio::test]
526 async fn verify_timeout_of_exactly_zero_ms() {
527 let zero_duration = Duration::from_millis(0);
529
530 let result = VerificationEngine::new(
532 &[],
533 VerifyConfig {
534 timeout: zero_duration,
535 max_concurrent_per_service: 1,
536 max_concurrent_global: 1,
537 max_inflight_keys: 100,
538 },
539 );
540
541 match result {
543 Ok(_) => {
544 }
546 Err(_) => {
547 }
549 }
550 }
551
552 #[test]
554 fn verify_timeout_of_u64_max_ms() {
555 let max_duration = Duration::from_millis(u64::MAX);
557
558 let result = std::panic::catch_unwind(|| {
560 VerificationEngine::new(
561 &[],
562 VerifyConfig {
563 timeout: max_duration,
564 max_concurrent_per_service: 1,
565 max_concurrent_global: 1,
566 max_inflight_keys: 100,
567 },
568 )
569 });
570
571 assert!(result.is_ok(), "u64::MAX timeout should not cause panic");
573 }
574
575 #[tokio::test]
577 async fn verify_with_empty_credential_string() {
578 let empty_cred = "";
579
580 let interpolated = interpolate("https://api.example.com/?key={{match}}", empty_cred, None);
582 assert_eq!(
583 interpolated, "https://api.example.com/?key=",
584 "Empty credential should result in empty query param"
585 );
586
587 let cache = cache::VerificationCache::default_ttl();
589 cache.put(
590 empty_cred,
591 "test-detector",
592 VerificationResult::Dead,
593 HashMap::new(),
594 );
595
596 let cached = cache.get(empty_cred, "test-detector");
597 assert!(cached.is_some(), "Empty credential should be cacheable");
598 assert!(
599 matches!(cached.unwrap().0, VerificationResult::Dead),
600 "Empty credential cache should return correct result"
601 );
602 }
603
604 #[tokio::test]
606 async fn verify_with_credential_longer_than_1mb() {
607 let mb_credential = "x".repeat(1024 * 1024 + 1024); assert!(
610 mb_credential.len() > MAX_RESPONSE_BODY_BYTES,
611 "Test credential should be > 1MB"
612 );
613
614 let interpolated = interpolate("{{match}}", &mb_credential, None);
616 assert_eq!(
617 interpolated.len(),
618 mb_credential.len(),
619 "Interpolated credential should preserve size"
620 );
621
622 let url_interpolated = interpolate(
624 "https://api.example.com/?key={{match}}",
625 &mb_credential,
626 None,
627 );
628 assert!(
629 url_interpolated.len() > mb_credential.len(),
630 "URL-encoded credential should be larger"
631 );
632
633 let cache = cache::VerificationCache::default_ttl();
635 cache.put(
636 &mb_credential,
637 "test-detector",
638 VerificationResult::Live,
639 HashMap::new(),
640 );
641
642 let cached = cache.get(&mb_credential, "test-detector");
643 assert!(
644 cached.is_some(),
645 "Large credential should be cacheable (stores hash)"
646 );
647 }
648
649 #[tokio::test]
651 async fn verify_two_detectors_same_credential_simultaneously() {
652 let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
653 let addr = listener.local_addr().unwrap();
654 let request_count = Arc::new(AtomicUsize::new(0));
655 let count_clone = request_count.clone();
656
657 tokio::spawn(async move {
659 loop {
660 let Ok((mut stream, _)) = listener.accept().await else {
661 break;
662 };
663 let count = count_clone.clone();
664 tokio::spawn(async move {
665 let mut buf = [0u8; 4096];
666 let _ = stream.read(&mut buf).await;
667 count.fetch_add(1, Ordering::SeqCst);
668 let _ = stream
669 .write_all(
670 b"HTTP/1.1 200 OK\r\nContent-Length: 15\r\n\r\n{\"valid\": true}",
671 )
672 .await;
673 });
674 }
675 });
676
677 let detector1 = DetectorSpec {
679 id: "detector-1".into(),
680 name: "Detector 1".into(),
681 service: "test-service".into(),
682 severity: Severity::High,
683 patterns: vec![],
684 companion: None,
685 verify: Some(keyhog_core::VerifySpec {
686 method: HttpMethod::Get,
687 url: format!("http://127.0.0.1:{}/verify1", addr.port()),
688 auth: AuthSpec::None,
689 headers: vec![],
690 body: None,
691 success: SuccessSpec {
692 status: Some(200),
693 status_not: None,
694 body_contains: None,
695 body_not_contains: None,
696 json_path: None,
697 equals: None,
698 },
699 metadata: vec![],
700 timeout_ms: None,
701 }),
702 keywords: vec![],
703 };
704
705 let detector2 = DetectorSpec {
706 id: "detector-2".into(),
707 name: "Detector 2".into(),
708 service: "test-service".into(), severity: Severity::High,
710 patterns: vec![],
711 companion: None,
712 verify: Some(keyhog_core::VerifySpec {
713 method: HttpMethod::Get,
714 url: format!("http://127.0.0.1:{}/verify2", addr.port()),
715 auth: AuthSpec::None,
716 headers: vec![],
717 body: None,
718 success: SuccessSpec {
719 status: Some(200),
720 status_not: None,
721 body_contains: None,
722 body_not_contains: None,
723 json_path: None,
724 equals: None,
725 },
726 metadata: vec![],
727 timeout_ms: None,
728 }),
729 keywords: vec![],
730 };
731
732 let engine = VerificationEngine::new(
733 &[detector1.clone(), detector2.clone()],
734 VerifyConfig {
735 timeout: Duration::from_secs(2),
736 max_concurrent_per_service: 10,
737 max_concurrent_global: 20,
738 max_inflight_keys: 1000,
739 },
740 )
741 .unwrap();
742
743 let shared_credential = "shared-secret-key-12345";
745
746 let make_match = |detector: &DetectorSpec| RawMatch {
747 detector_id: detector.id.clone(),
748 detector_name: detector.name.clone(),
749 service: detector.service.clone(),
750 severity: Severity::High,
751 credential: shared_credential.into(),
752 companion: None,
753 location: MatchLocation {
754 source: "fs".into(),
755 file_path: Some("test.txt".into()),
756 line: Some(1),
757 offset: 0,
758 commit: None,
759 author: None,
760 date: None,
761 },
762 entropy: None,
763 confidence: Some(0.9),
764 };
765
766 let match1 = make_match(&detector1);
768 let match2 = make_match(&detector2);
769
770 let group1 = dedup_matches(vec![match1]).pop().unwrap();
771 let group2 = dedup_matches(vec![match2]).pop().unwrap();
772
773 let findings = engine.verify_all(vec![group1, group2]).await;
775
776 assert_eq!(findings.len(), 2, "Should have 2 findings");
777
778 let detector_ids: Vec<_> = findings.iter().map(|f| &f.detector_id).collect();
780 assert!(detector_ids.contains(&&"detector-1".to_string()));
781 assert!(detector_ids.contains(&&"detector-2".to_string()));
782 }
783
784 #[test]
786 fn verify_url_with_no_path() {
787 let no_path_urls = vec!["https://api.example.com", "https://api.example.com:443"];
789
790 for url in no_path_urls {
791 let parsed = reqwest::Url::parse(url);
792 assert!(parsed.is_ok(), "URL without path should parse: {}", url);
793
794 let parsed = parsed.unwrap();
795 assert_eq!(
796 parsed.path(),
797 "/",
798 "URL without explicit path should default to /"
799 );
800
801 assert!(
803 !is_private_url(url),
804 "Public URL without path should not be private"
805 );
806 }
807
808 let interpolated = interpolate("https://api.example.com?key={{match}}", "test-value", None);
810 assert!(
812 interpolated == "https://api.example.com?key=test-value"
813 || interpolated == "https://api.example.com?key=test%2Dvalue",
814 "Interpolation should add query to no-path URL: got {}",
815 interpolated
816 );
817 }
818
819 #[test]
821 fn verify_url_with_username_password_in_host() {
822 let urls_with_auth = vec![
824 "https://user:pass@api.example.com/endpoint",
825 "https://admin:secret123@host.com:8080/api",
826 "https://user%40domain:p%40ss@example.com/path",
827 ];
828
829 for url in urls_with_auth {
830 let parsed = reqwest::Url::parse(url);
831 assert!(parsed.is_ok(), "URL with auth info should parse: {}", url);
832
833 let parsed = parsed.unwrap();
834 assert!(
835 parsed.username().is_empty() || !parsed.username().is_empty(),
836 "Username may or may not be present after normalization"
837 );
838
839 }
842
843 let interpolated = interpolate(
845 "https://{{match}}@api.example.com/endpoint",
846 "user:pass",
847 None,
848 );
849 assert!(
851 interpolated.contains("%40") || interpolated.contains("@"),
852 "URL interpolation should handle auth-like patterns"
853 );
854 }
855
856 #[test]
858 fn verify_spec_with_contradicting_success_criteria() {
859 let contradictory_spec = SuccessSpec {
864 status: Some(200),
865 status_not: Some(200),
866 body_contains: None,
867 body_not_contains: None,
868 json_path: None,
869 equals: None,
870 };
871
872 assert!(
877 contradictory_spec.status.is_some() && contradictory_spec.status_not.is_some(),
878 "Spec has both status and status_not defined"
879 );
880 assert_eq!(
881 contradictory_spec.status, contradictory_spec.status_not,
882 "Spec requires status to be {:?} and NOT be {:?}",
883 contradictory_spec.status, contradictory_spec.status_not
884 );
885
886 let body_contradiction = SuccessSpec {
888 status: Some(200),
889 status_not: None,
890 body_contains: Some("success".into()),
891 body_not_contains: Some("success".into()),
892 json_path: None,
893 equals: None,
894 };
895
896 assert_eq!(
897 body_contradiction.body_contains, body_contradiction.body_not_contains,
898 "Spec requires body to contain '{:?}' and NOT contain '{:?}'",
899 body_contradiction.body_contains, body_contradiction.body_not_contains
900 );
901
902 fn status_matches(status: Option<u16>, status_not: Option<u16>, code: u16) -> bool {
904 if let Some(expected) = status {
905 if code != expected {
906 return false;
907 }
908 }
909 if let Some(not_expected) = status_not {
910 if code == not_expected {
911 return false;
912 }
913 }
914 true
915 }
916
917 assert!(
919 !status_matches(Some(200), Some(200), 200),
920 "Contradictory spec should fail for status 200"
921 );
922 assert!(
923 !status_matches(Some(200), Some(200), 201),
924 "Contradictory spec should fail for status 201"
925 );
926 assert!(
927 !status_matches(Some(200), Some(200), 404),
928 "Contradictory spec should fail for status 404"
929 );
930 }
931
932 #[test]
934 fn body_analysis_on_deeply_nested_json() {
935 let mut deep_json = String::new();
937 for _ in 0..100 {
938 deep_json.push_str(r#"{"level": "#);
939 }
940 deep_json.push_str("\"value\"");
941 for _ in 0..100 {
942 deep_json.push('}');
943 }
944
945 let parsed: Result<serde_json::Value, _> = serde_json::from_str(&deep_json);
947 assert!(parsed.is_ok(), "100-level deep JSON should parse");
948
949 let value = parsed.unwrap();
951 let mut current = &value;
952 for _ in 0..100 {
953 current = current
954 .get("level")
955 .expect("Should have 'level' key at each depth");
956 }
957 assert_eq!(current, &serde_json::Value::String("value".into()));
958
959 let mut deep_error_json = String::new();
961 for _ in 0..99 {
962 deep_error_json.push_str(r#"{"nested": "#);
963 }
964 deep_error_json.push_str(r#"{"error": "deep failure"}"#);
965 for _ in 0..99 {
966 deep_error_json.push('}');
967 }
968
969 let parsed_error: Result<serde_json::Value, _> = serde_json::from_str(&deep_error_json);
970 assert!(
971 parsed_error.is_ok(),
972 "Deep JSON with error should also parse"
973 );
974
975 let error_value = parsed_error.unwrap();
977 let mut current = &error_value;
978 for _ in 0..99 {
979 current = current.get("nested").expect("Should have 'nested' key");
980 }
981 assert!(
982 current.get("error").is_some(),
983 "Should be able to access deep error field"
984 );
985 }
986
987 #[test]
989 fn cache_behavior_same_credential_different_detectors() {
990 let cache = cache::VerificationCache::default_ttl();
991 let credential = "shared-credential-abc123";
992
993 cache.put(
995 credential,
996 "detector-1",
997 VerificationResult::Live,
998 HashMap::from([("source".into(), "det1".into())]),
999 );
1000
1001 cache.put(
1003 credential,
1004 "detector-2",
1005 VerificationResult::Dead,
1006 HashMap::from([("source".into(), "det2".into())]),
1007 );
1008
1009 let cached1 = cache.get(credential, "detector-1");
1011 assert!(cached1.is_some(), "Detector 1 should have cached result");
1012 let (result1, meta1) = cached1.unwrap();
1013 assert!(
1014 matches!(result1, VerificationResult::Live),
1015 "Detector 1 should have Live result"
1016 );
1017 assert_eq!(meta1.get("source"), Some(&"det1".to_string()));
1018
1019 let cached2 = cache.get(credential, "detector-2");
1020 assert!(cached2.is_some(), "Detector 2 should have cached result");
1021 let (result2, meta2) = cached2.unwrap();
1022 assert!(
1023 matches!(result2, VerificationResult::Dead),
1024 "Detector 2 should have Dead result"
1025 );
1026 assert_eq!(meta2.get("source"), Some(&"det2".to_string()));
1027
1028 let cached3 = cache.get(credential, "detector-3");
1030 assert!(
1031 cached3.is_none(),
1032 "Detector 3 should not have cached result"
1033 );
1034
1035 assert_eq!(
1037 cache.len(),
1038 2,
1039 "Cache should have 2 entries (one per detector)"
1040 );
1041 }
1042
1043 #[test]
1045 fn verify_with_reversed_companion() {
1046 let credential = "ABC123XYZ";
1047 let reversed: String = credential.chars().rev().collect();
1048
1049 assert_eq!(reversed, "ZYX321CBA");
1051
1052 let interpolated = interpolate(
1054 "https://api.example.com/?key={{match}}&companion={{companion.secret}}",
1055 credential,
1056 Some(&reversed),
1057 );
1058
1059 assert!(
1060 interpolated.contains("ABC123XYZ"),
1061 "Interpolated URL should contain original credential"
1062 );
1063 assert!(
1064 interpolated.contains("ZYX321CBA"),
1065 "Interpolated URL should contain reversed companion"
1066 );
1067
1068 let resolved =
1070 crate::interpolate::resolve_field("companion.secret", credential, Some(&reversed));
1071 assert_eq!(
1072 resolved, reversed,
1073 "Companion resolution should return reversed value"
1074 );
1075 }
1076
1077 #[test]
1079 fn verify_auth_header_with_null_bytes() {
1080 let null_byte_values = vec![
1082 "Bearer token\0extra",
1083 "ApiKey \x00null_injected",
1084 "token\x00\x00double_null",
1085 ];
1086
1087 for value in null_byte_values {
1088 let interpolated = interpolate("{{match}}", value, None);
1090 assert_eq!(
1091 interpolated, value,
1092 "Null bytes should be preserved when template is exactly {{match}}"
1093 );
1094
1095 let url_interpolated =
1097 interpolate("https://api.example.com/?token={{match}}", value, None);
1098 assert!(
1099 url_interpolated.contains("%00") || !url_interpolated.contains('\0'),
1100 "Null bytes should be encoded in URL context"
1101 );
1102 }
1103
1104 let header_template = "Bearer {{match}}";
1107 let credential_with_null = "token\0null";
1108 let interpolated_header = interpolate(header_template, credential_with_null, None);
1109
1110 assert!(
1112 interpolated_header.contains("%00"),
1113 "Embedded credential with null should be URL-encoded (contains %00): got {}",
1114 interpolated_header
1115 );
1116 assert!(
1117 !interpolated_header.contains('\0'),
1118 "Raw null byte should not appear in interpolated result"
1119 );
1120 }
1121
1122 #[tokio::test]
1124 async fn verify_rate_limiting_100_concurrent_requests() {
1125 let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
1126 let addr = listener.local_addr().unwrap();
1127 let active_requests = Arc::new(AtomicUsize::new(0));
1128 let max_concurrent = Arc::new(AtomicUsize::new(0));
1129 let active_clone = active_requests.clone();
1130 let max_clone = max_concurrent.clone();
1131
1132 tokio::spawn(async move {
1134 loop {
1135 let Ok((mut stream, _)) = listener.accept().await else {
1136 break;
1137 };
1138 let active = active_clone.clone();
1139 let max = max_clone.clone();
1140 tokio::spawn(async move {
1141 let current = active.fetch_add(1, Ordering::SeqCst) + 1;
1142 loop {
1144 let prev_max = max.load(Ordering::SeqCst);
1145 if current <= prev_max
1146 || max
1147 .compare_exchange(
1148 prev_max,
1149 current,
1150 Ordering::SeqCst,
1151 Ordering::SeqCst,
1152 )
1153 .is_ok()
1154 {
1155 break;
1156 }
1157 }
1158 tokio::time::sleep(Duration::from_millis(50)).await;
1160 active.fetch_sub(1, Ordering::SeqCst);
1161 let _ = stream
1162 .write_all(
1163 b"HTTP/1.1 200 OK\r\nContent-Length: 13\r\n\r\n{\"valid\": true}",
1164 )
1165 .await;
1166 });
1167 }
1168 });
1169
1170 let detector = DetectorSpec {
1172 id: "rate-limit-test".into(),
1173 name: "Rate Limit Test".into(),
1174 service: "rate-limited-service".into(),
1175 severity: Severity::High,
1176 patterns: vec![],
1177 companion: None,
1178 verify: Some(keyhog_core::VerifySpec {
1179 method: HttpMethod::Get,
1180 url: format!("http://127.0.0.1:{}/verify", addr.port()),
1181 auth: AuthSpec::None,
1182 headers: vec![],
1183 body: None,
1184 success: SuccessSpec {
1185 status: Some(200),
1186 status_not: None,
1187 body_contains: None,
1188 body_not_contains: None,
1189 json_path: None,
1190 equals: None,
1191 },
1192 metadata: vec![],
1193 timeout_ms: None,
1194 }),
1195 keywords: vec![],
1196 };
1197
1198 let per_service_limit = 5;
1200 let engine = VerificationEngine::new(
1201 &[detector.clone()],
1202 VerifyConfig {
1203 timeout: Duration::from_secs(5),
1204 max_concurrent_per_service: per_service_limit,
1205 max_concurrent_global: 100,
1206 max_inflight_keys: 1000,
1207 },
1208 )
1209 .unwrap();
1210
1211 let mut groups = Vec::new();
1213 for i in 0..100 {
1214 let m = RawMatch {
1215 detector_id: "rate-limit-test".into(),
1216 detector_name: "Rate Limit Test".into(),
1217 service: "rate-limited-service".into(),
1218 severity: Severity::High,
1219 credential: format!("credential-{}", i),
1220 companion: None,
1221 location: MatchLocation {
1222 source: "fs".into(),
1223 file_path: Some(format!("test{}.txt", i)),
1224 line: Some(i),
1225 offset: 0,
1226 commit: None,
1227 author: None,
1228 date: None,
1229 },
1230 entropy: None,
1231 confidence: Some(0.9),
1232 };
1233 groups.push(dedup_matches(vec![m]).pop().unwrap());
1234 }
1235
1236 let findings = engine.verify_all(groups).await;
1238
1239 assert_eq!(findings.len(), 100, "All 100 verifications should complete");
1240
1241 let actual_max = max_concurrent.load(Ordering::SeqCst);
1243 println!("Max concurrent requests observed: {}", actual_max);
1246 }
1247
1248 #[tokio::test]
1250 async fn verify_response_with_infinite_chunked_transfer() {
1251 let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
1252 let addr = listener.local_addr().unwrap();
1253
1254 tokio::spawn(async move {
1256 loop {
1257 let Ok((mut stream, _)) = listener.accept().await else {
1258 break;
1259 };
1260 tokio::spawn(async move {
1261 let mut buf = [0u8; 1024];
1262 let _ = stream.read(&mut buf).await;
1263 let _ = stream
1265 .write_all(b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n")
1266 .await;
1267 loop {
1269 let chunk = "5\r\nhello\r\n";
1270 if stream.write_all(chunk.as_bytes()).await.is_err() {
1271 break;
1272 }
1273 tokio::time::sleep(Duration::from_millis(10)).await;
1274 }
1275 });
1276 }
1277 });
1278
1279 let detector = DetectorSpec {
1280 id: "infinite-chunk-test".into(),
1281 name: "Infinite Chunk Test".into(),
1282 service: "chunk-test-service".into(),
1283 severity: Severity::High,
1284 patterns: vec![],
1285 companion: None,
1286 verify: Some(keyhog_core::VerifySpec {
1287 method: HttpMethod::Get,
1288 url: format!("http://127.0.0.1:{}/chunked", addr.port()),
1289 auth: AuthSpec::None,
1290 headers: vec![],
1291 body: None,
1292 success: SuccessSpec {
1293 status: Some(200),
1294 status_not: None,
1295 body_contains: None,
1296 body_not_contains: None,
1297 json_path: None,
1298 equals: None,
1299 },
1300 metadata: vec![],
1301 timeout_ms: Some(500), }),
1303 keywords: vec![],
1304 };
1305
1306 let engine = VerificationEngine::new(
1307 &[detector],
1308 VerifyConfig {
1309 timeout: Duration::from_millis(500), max_concurrent_per_service: 5,
1311 max_concurrent_global: 20,
1312 max_inflight_keys: 1000,
1313 },
1314 )
1315 .unwrap();
1316
1317 let m = RawMatch {
1318 detector_id: "infinite-chunk-test".into(),
1319 detector_name: "Infinite Chunk Test".into(),
1320 service: "chunk-test-service".into(),
1321 severity: Severity::High,
1322 credential: "test-credential".into(),
1323 companion: None,
1324 location: MatchLocation {
1325 source: "fs".into(),
1326 file_path: Some("test.txt".into()),
1327 line: Some(1),
1328 offset: 0,
1329 commit: None,
1330 author: None,
1331 date: None,
1332 },
1333 entropy: None,
1334 confidence: Some(0.9),
1335 };
1336
1337 let group = dedup_matches(vec![m]).pop().unwrap();
1338
1339 let start = std::time::Instant::now();
1341 let findings = engine.verify_all(vec![group]).await;
1342 let elapsed = start.elapsed();
1343
1344 assert_eq!(findings.len(), 1);
1345 assert!(
1347 elapsed < Duration::from_secs(5),
1348 "Should complete within timeout, took {:?}",
1349 elapsed
1350 );
1351 }
1352
1353 #[tokio::test]
1355 async fn verify_dns_resolution_nxdomain() {
1356 use std::net::ToSocketAddrs;
1357
1358 let nxdomain_hosts = vec![
1360 "this-definitely-does-not-exist-12345.invalid",
1361 "nonexistent-domain-xyz123.example",
1362 ];
1363
1364 for host in nxdomain_hosts {
1365 let addr_result = format!("{}:443", host).to_socket_addrs();
1366 assert!(
1368 addr_result.is_err() || addr_result.unwrap().next().is_none(),
1369 "NXDOMAIN host {} should fail to resolve",
1370 host
1371 );
1372 }
1373
1374 let valid_host = "localhost:443";
1376 let valid_result = valid_host.to_socket_addrs();
1377 assert!(
1379 valid_result.is_ok(),
1380 "localhost should resolve to addresses"
1381 );
1382 }
1383}