1use aho_corasick::AhoCorasick;
8
9const AC_PATTERNS: &[&str] = &[
15 "sk-ant-", "sk-", "AKIA", "\"type\": \"service_account\"", "DefaultEndpointsProtocol=", "ghp_", "ghs_", "xoxb-", "xoxp-", "xoxa-", "postgres://", "mysql://", "mongodb://", "-----BEGIN RSA PRIVATE KEY-----", "-----BEGIN EC PRIVATE KEY-----", "-----BEGIN OPENSSH PRIVATE KEY-----", "-----BEGIN PRIVATE KEY-----", "-----BEGIN PGP PRIVATE KEY BLOCK-----", ];
34
35const AC_KINDS: &[CredentialKind] = &[
37 CredentialKind::AnthropicKey, CredentialKind::OpenAiKey, CredentialKind::AwsAccessKey, CredentialKind::GcpServiceAccount, CredentialKind::AzureConnectionString, CredentialKind::GitHubPat, CredentialKind::GitHubAppToken, CredentialKind::SlackBotToken, CredentialKind::SlackUserToken, CredentialKind::SlackOAuthToken, CredentialKind::PostgresUrl, CredentialKind::MysqlUrl, CredentialKind::MongodbUrl, CredentialKind::RsaPrivateKey, CredentialKind::EcPrivateKey, CredentialKind::OpensshPrivateKey, CredentialKind::PrivateKey, CredentialKind::PgpPrivateKey, ];
56
57#[derive(Debug, Clone, PartialEq, Eq)]
63#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
64pub enum CredentialKind {
65 AnthropicKey,
68 AwsAccessKey,
70 GcpServiceAccount,
72 OpenAiKey,
74 AzureConnectionString,
77 GitHubAppToken,
80 GitHubPat,
82 SlackBotToken,
84 SlackOAuthToken,
86 SlackUserToken,
88 MongodbUrl,
91 MysqlUrl,
93 PostgresUrl,
95 EcPrivateKey,
98 OpensshPrivateKey,
100 PgpPrivateKey,
102 PrivateKey,
104 RsaPrivateKey,
106 CreditCardLuhn,
109 EmailAddress,
111 SsnPattern,
113 GenericHighEntropy,
116 Custom,
119}
120
121impl CredentialKind {
122 pub fn as_str(&self) -> &'static str {
124 match self {
125 Self::AnthropicKey => "AnthropicKey",
126 Self::AwsAccessKey => "AwsAccessKey",
127 Self::AzureConnectionString => "AzureConnectionString",
128 Self::CreditCardLuhn => "CreditCardLuhn",
129 Self::EcPrivateKey => "EcPrivateKey",
130 Self::EmailAddress => "EmailAddress",
131 Self::GcpServiceAccount => "GcpServiceAccount",
132 Self::GenericHighEntropy => "GenericHighEntropy",
133 Self::GitHubAppToken => "GitHubAppToken",
134 Self::GitHubPat => "GitHubPat",
135 Self::MongodbUrl => "MongodbUrl",
136 Self::MysqlUrl => "MysqlUrl",
137 Self::OpenAiKey => "OpenAiKey",
138 Self::OpensshPrivateKey => "OpensshPrivateKey",
139 Self::PgpPrivateKey => "PgpPrivateKey",
140 Self::PostgresUrl => "PostgresUrl",
141 Self::PrivateKey => "PrivateKey",
142 Self::RsaPrivateKey => "RsaPrivateKey",
143 Self::SlackBotToken => "SlackBotToken",
144 Self::SlackOAuthToken => "SlackOAuthToken",
145 Self::SlackUserToken => "SlackUserToken",
146 Self::SsnPattern => "SsnPattern",
147 Self::Custom => "Custom",
148 }
149 }
150}
151
152#[derive(Debug, Clone, PartialEq, Eq)]
161#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
162pub struct CredentialFinding {
163 pub kind: CredentialKind,
165 pub offset: usize,
167 pub matched: String,
169 #[cfg_attr(feature = "serde", serde(skip))]
170 end: usize,
171}
172
173impl CredentialFinding {
174 fn new(kind: CredentialKind, offset: usize, end: usize) -> Self {
175 let label = format!("[REDACTED:{}]", kind.as_str());
176 Self {
177 kind,
178 offset,
179 matched: label,
180 end,
181 }
182 }
183
184 pub fn from_regex_match(offset: usize, end: usize) -> Self {
189 Self::new(CredentialKind::Custom, offset, end)
190 }
191}
192
193#[derive(Debug, Clone, PartialEq, Eq)]
195#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
196pub struct ScanResult {
197 pub findings: Vec<CredentialFinding>,
199}
200
201impl ScanResult {
202 pub fn is_clean(&self) -> bool {
204 self.findings.is_empty()
205 }
206
207 pub fn redact(&self, text: &str) -> String {
213 let mut sorted: Vec<&CredentialFinding> = self.findings.iter().collect();
214 sorted.sort_by_key(|b| std::cmp::Reverse(b.offset));
215 let mut result = text.to_string();
216 for finding in sorted {
217 if finding.end <= result.len() && finding.offset <= finding.end {
218 result.replace_range(finding.offset..finding.end, &finding.matched);
219 }
220 }
221 result
222 }
223}
224
225#[derive(Debug, Clone, Default)]
230pub struct ScannerConfig {
231 pub disabled: bool,
234 pub custom_patterns: Vec<String>,
238}
239
240pub struct CredentialScanner {
246 patterns: AhoCorasick,
247 kinds: Vec<CredentialKind>,
251 disabled: bool,
253}
254
255impl Default for CredentialScanner {
256 fn default() -> Self {
257 Self::new()
258 }
259}
260
261impl CredentialScanner {
262 pub fn new() -> Self {
269 Self::with_config(ScannerConfig::default())
270 }
271
272 pub fn with_config(config: ScannerConfig) -> Self {
278 let mut all_patterns: Vec<&str> = AC_PATTERNS.to_vec();
279 let custom_refs: Vec<&str> = config.custom_patterns.iter().map(|s| s.as_str()).collect();
281 all_patterns.extend_from_slice(&custom_refs);
282
283 let mut kinds: Vec<CredentialKind> = AC_KINDS.to_vec();
284 kinds.extend(std::iter::repeat(CredentialKind::Custom).take(config.custom_patterns.len()));
285
286 let ac = AhoCorasick::builder()
287 .match_kind(aho_corasick::MatchKind::LeftmostFirst)
288 .build(&all_patterns)
289 .expect("AC patterns are always valid");
290
291 Self {
292 patterns: ac,
293 kinds,
294 disabled: config.disabled,
295 }
296 }
297
298 pub fn scan(&self, text: &str) -> ScanResult {
307 if self.disabled {
308 return ScanResult { findings: Vec::new() };
309 }
310
311 let mut findings = Vec::new();
312
313 for mat in self.patterns.find_iter(text) {
316 let kind = self.kinds[mat.pattern()].clone();
317 let offset = mat.start();
318 let end = token_end(text, mat.end());
319 findings.push(CredentialFinding::new(kind, offset, end));
320 }
321
322 scan_digit_sequences(text, &mut findings);
324
325 scan_emails(text, &mut findings);
327
328 scan_high_entropy(text, &mut findings);
330
331 findings.sort_by_key(|f| f.offset);
332 ScanResult { findings }
333 }
334}
335
336fn token_end(text: &str, from: usize) -> usize {
343 text[from..]
344 .find(|c: char| c.is_whitespace() || matches!(c, '"' | '\'' | ',' | ';' | ')' | ']' | '}'))
345 .map(|i| from + i)
346 .unwrap_or(text.len())
347}
348
349fn is_ssn(s: &str) -> bool {
351 let b = s.as_bytes();
352 b.len() == 11
353 && b[0..3].iter().all(u8::is_ascii_digit)
354 && b[3] == b'-'
355 && b[4..6].iter().all(u8::is_ascii_digit)
356 && b[6] == b'-'
357 && b[7..11].iter().all(u8::is_ascii_digit)
358}
359
360fn luhn_valid(digits: &str) -> bool {
363 if digits.len() < 13 || digits.len() > 19 {
364 return false;
365 }
366 let mut sum = 0u32;
367 let mut double = false;
368 for ch in digits.chars().rev() {
369 let Some(d) = ch.to_digit(10) else {
370 return false;
371 };
372 let val = if double {
373 let v = d * 2;
374 if v > 9 {
375 v - 9
376 } else {
377 v
378 }
379 } else {
380 d
381 };
382 sum += val;
383 double = !double;
384 }
385 sum % 10 == 0
386}
387
388fn scan_digit_sequences(text: &str, findings: &mut Vec<CredentialFinding>) {
390 let bytes = text.as_bytes();
391 let mut i = 0;
392 while i < bytes.len() {
393 if !bytes[i].is_ascii_digit() {
394 i += 1;
395 continue;
396 }
397
398 let start = i;
399 let mut digits = String::new();
400 let mut j = i;
401 let limit = (start + 24).min(bytes.len());
402
403 while j < limit {
404 match bytes[j] {
405 b if b.is_ascii_digit() => {
406 digits.push(b as char);
407 j += 1;
408 }
409 b' ' | b'-' if !digits.is_empty() => {
410 j += 1;
411 }
412 _ => break,
413 }
414 }
415
416 let end = j;
417 let segment = &text[start..end];
418
419 if is_ssn(segment) {
420 findings.push(CredentialFinding::new(CredentialKind::SsnPattern, start, end));
421 } else if digits.len() >= 13 && digits.len() <= 19 && luhn_valid(&digits) {
422 findings.push(CredentialFinding::new(CredentialKind::CreditCardLuhn, start, end));
423 }
424 i = end.max(i + 1);
425 }
426}
427
428fn shannon_entropy(s: &str) -> f64 {
430 if s.is_empty() {
431 return 0.0;
432 }
433 let mut freq = [0u32; 256];
434 for &b in s.as_bytes() {
435 freq[b as usize] += 1;
436 }
437 let len = s.len() as f64;
438 freq.iter()
439 .filter(|&&c| c > 0)
440 .map(|&c| {
441 let p = c as f64 / len;
442 -p * p.log2()
443 })
444 .sum()
445}
446
447fn scan_high_entropy(text: &str, findings: &mut Vec<CredentialFinding>) {
450 let mut offset = 0usize;
451 for token in text.split_whitespace() {
452 let token_offset = text[offset..].find(token).map(|i| offset + i).unwrap_or(offset);
453 let token_end_pos = token_offset + token.len();
454 let len = token.len();
455 if (20..=64).contains(&len) && shannon_entropy(token) > 4.5 {
456 findings.push(CredentialFinding::new(
457 CredentialKind::GenericHighEntropy,
458 token_offset,
459 token_end_pos,
460 ));
461 }
462 offset = token_end_pos;
463 }
464}
465
466fn scan_emails(text: &str, findings: &mut Vec<CredentialFinding>) {
468 let mut search = text;
469 let mut base = 0usize;
470
471 while let Some(at) = search.find('@') {
472 let abs_at = base + at;
473
474 let local_start = text[..abs_at]
475 .rfind(|c: char| c.is_whitespace() || matches!(c, '<' | ',' | ';' | '"' | '\''))
476 .map(|i| i + 1)
477 .unwrap_or(0);
478
479 let domain_end = token_end(text, abs_at + 1);
480 let local = &text[local_start..abs_at];
481 let domain = &text[abs_at + 1..domain_end];
482
483 if !local.is_empty() && domain.contains('.') && domain.len() >= 3 {
484 findings.push(CredentialFinding::new(
485 CredentialKind::EmailAddress,
486 local_start,
487 domain_end,
488 ));
489 }
490
491 let next = abs_at + 1;
492 if next >= text.len() {
493 break;
494 }
495 search = &text[next..];
496 base = next;
497 }
498}
499
500#[cfg(test)]
505mod tests {
506 use super::*;
507
508 #[test]
511 fn credential_kind_as_str_round_trips() {
512 assert_eq!(CredentialKind::AnthropicKey.as_str(), "AnthropicKey");
513 assert_eq!(CredentialKind::AwsAccessKey.as_str(), "AwsAccessKey");
514 assert_eq!(CredentialKind::GenericHighEntropy.as_str(), "GenericHighEntropy");
515 }
516
517 #[test]
520 fn detects_anthropic_key() {
521 let scanner = CredentialScanner::new();
522 let result = scanner.scan("auth: sk-ant-api03-XXXXXXXXXXXXXXXXXXXX");
523 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::AnthropicKey));
524 }
525
526 #[test]
527 fn detects_openai_key_not_misclassified_as_anthropic() {
528 let scanner = CredentialScanner::new();
529 let result = scanner.scan("key: sk-proj-XXXXXXXXXXXXXXXXXXXX");
530 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::OpenAiKey));
531 assert!(!result.findings.iter().any(|f| f.kind == CredentialKind::AnthropicKey));
532 }
533
534 #[test]
535 fn detects_aws_access_key() {
536 let scanner = CredentialScanner::new();
537 let result = scanner.scan("AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE");
538 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::AwsAccessKey));
539 }
540
541 #[test]
542 fn detects_gcp_service_account() {
543 let scanner = CredentialScanner::new();
544 let result = scanner.scan(r#"{"type": "service_account", "project_id": "my-project"}"#);
545 assert!(result
546 .findings
547 .iter()
548 .any(|f| f.kind == CredentialKind::GcpServiceAccount));
549 }
550
551 #[test]
554 fn detects_github_pat() {
555 let scanner = CredentialScanner::new();
556 let result = scanner.scan("token: ghp_1234567890abcdefghijklmnopqrstuvwxyz");
557 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::GitHubPat));
558 }
559
560 #[test]
561 fn detects_github_app_token() {
562 let scanner = CredentialScanner::new();
563 let result = scanner.scan("token: ghs_1234567890abcdefghijklmnopqrstuvwxyz");
564 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::GitHubAppToken));
565 }
566
567 #[test]
568 fn detects_slack_bot_token() {
569 let scanner = CredentialScanner::new();
570 let result = scanner.scan("SLACK_BOT_TOKEN=xoxb-123456789012-123456789012-XXXXXXXXXXXX");
571 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::SlackBotToken));
572 }
573
574 #[test]
575 fn detects_slack_user_token() {
576 let scanner = CredentialScanner::new();
577 let result = scanner.scan("token=xoxp-123456789012-123456789012-XXXXXXXXXXXX");
578 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::SlackUserToken));
579 }
580
581 #[test]
582 fn detects_slack_oauth_token() {
583 let scanner = CredentialScanner::new();
584 let result = scanner.scan("oauth=xoxa-123456789012-123456789012-XXXXXXXXXXXX");
585 assert!(result
586 .findings
587 .iter()
588 .any(|f| f.kind == CredentialKind::SlackOAuthToken));
589 }
590
591 #[test]
594 fn detects_azure_connection_string() {
595 let scanner = CredentialScanner::new();
596 let result = scanner.scan("DefaultEndpointsProtocol=https;AccountName=myaccount;AccountKey=XXXX");
597 assert!(result
598 .findings
599 .iter()
600 .any(|f| f.kind == CredentialKind::AzureConnectionString));
601 }
602
603 #[test]
606 fn detects_postgres_url() {
607 let scanner = CredentialScanner::new();
608 let result = scanner.scan("DATABASE_URL=postgres://user:password@host:5432/db");
609 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::PostgresUrl));
610 }
611
612 #[test]
613 fn detects_mysql_url() {
614 let scanner = CredentialScanner::new();
615 let result = scanner.scan("db=mysql://user:secret@localhost/mydb");
616 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::MysqlUrl));
617 }
618
619 #[test]
620 fn detects_mongodb_url() {
621 let scanner = CredentialScanner::new();
622 let result = scanner.scan("uri=mongodb://admin:pass@cluster0.mongodb.net/mydb");
623 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::MongodbUrl));
624 }
625
626 #[test]
629 fn detects_rsa_private_key() {
630 let scanner = CredentialScanner::new();
631 let result =
632 scanner.scan("-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA...\n-----END RSA PRIVATE KEY-----");
633 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::RsaPrivateKey));
634 }
635
636 #[test]
637 fn detects_ec_private_key() {
638 let scanner = CredentialScanner::new();
639 let result = scanner.scan("-----BEGIN EC PRIVATE KEY-----\nMHQCAQEEI...\n-----END EC PRIVATE KEY-----");
640 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::EcPrivateKey));
641 }
642
643 #[test]
644 fn detects_openssh_private_key() {
645 let scanner = CredentialScanner::new();
646 let result = scanner
647 .scan("-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaC1rZXkAAAA=\n-----END OPENSSH PRIVATE KEY-----");
648 assert!(result
649 .findings
650 .iter()
651 .any(|f| f.kind == CredentialKind::OpensshPrivateKey));
652 }
653
654 #[test]
655 fn detects_generic_private_key() {
656 let scanner = CredentialScanner::new();
657 let result = scanner.scan("-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgk=\n-----END PRIVATE KEY-----");
658 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::PrivateKey));
659 }
660
661 #[test]
662 fn detects_pgp_private_key() {
663 let scanner = CredentialScanner::new();
664 let result =
665 scanner.scan("-----BEGIN PGP PRIVATE KEY BLOCK-----\nlQOYBF...\n-----END PGP PRIVATE KEY BLOCK-----");
666 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::PgpPrivateKey));
667 }
668
669 #[test]
672 fn detects_credit_card_luhn() {
673 let scanner = CredentialScanner::new();
674 let result = scanner.scan("card: 4532015112830366");
675 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::CreditCardLuhn));
676 }
677
678 #[test]
679 fn detects_credit_card_with_spaces() {
680 let scanner = CredentialScanner::new();
681 let result = scanner.scan("card: 4532 0151 1283 0366");
682 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::CreditCardLuhn));
683 }
684
685 #[test]
686 fn does_not_flag_invalid_luhn() {
687 let scanner = CredentialScanner::new();
688 let result = scanner.scan("num: 4532015112830367");
689 assert!(!result.findings.iter().any(|f| f.kind == CredentialKind::CreditCardLuhn));
690 }
691
692 #[test]
693 fn detects_ssn() {
694 let scanner = CredentialScanner::new();
695 let result = scanner.scan("SSN: 123-45-6789");
696 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::SsnPattern));
697 }
698
699 #[test]
700 fn detects_email_address() {
701 let scanner = CredentialScanner::new();
702 let result = scanner.scan("contact: user@example.com for support");
703 assert!(result.findings.iter().any(|f| f.kind == CredentialKind::EmailAddress));
704 }
705
706 #[test]
709 fn detects_high_entropy_token() {
710 let scanner = CredentialScanner::new();
711 let result = scanner.scan("secret: xK9mP2nQvR7sT4wY1aB6dF3hJ8lN0eC5");
712 assert!(result
713 .findings
714 .iter()
715 .any(|f| f.kind == CredentialKind::GenericHighEntropy));
716 }
717
718 #[test]
719 fn does_not_flag_short_token_as_high_entropy() {
720 let scanner = CredentialScanner::new();
721 let result = scanner.scan("word: hello");
722 assert!(!result
723 .findings
724 .iter()
725 .any(|f| f.kind == CredentialKind::GenericHighEntropy));
726 }
727
728 #[test]
731 fn luhn_valid_visa_test_number() {
732 assert!(luhn_valid("4532015112830366"));
733 }
734
735 #[test]
736 fn luhn_valid_mastercard_test_number() {
737 assert!(luhn_valid("5425233430109903"));
738 }
739
740 #[test]
741 fn luhn_valid_amex_test_number() {
742 assert!(luhn_valid("371449635398431"));
743 }
744
745 #[test]
746 fn luhn_valid_discover_test_number() {
747 assert!(luhn_valid("6011111111111117"));
748 }
749
750 #[test]
751 fn luhn_invalid_altered_digit() {
752 assert!(!luhn_valid("4532015112830367"));
753 }
754
755 #[test]
756 fn luhn_rejects_too_short() {
757 assert!(!luhn_valid("123456789012"));
758 }
759
760 #[test]
761 fn luhn_rejects_too_long() {
762 assert!(!luhn_valid("45320151128303661234"));
763 }
764
765 #[test]
768 fn entropy_zero_for_empty() {
769 assert_eq!(shannon_entropy(""), 0.0);
770 }
771
772 #[test]
773 fn entropy_low_for_repeated_char() {
774 assert!(shannon_entropy("aaaaaaaaaaaaaaaaaaaaaa") < 1.0);
775 }
776
777 #[test]
778 fn entropy_high_for_random_base64() {
779 assert!(shannon_entropy("xK9mP2nQvR7sT4wY1aB6dF3hJ8lN0") > 4.0);
780 }
781
782 #[test]
783 fn entropy_moderate_for_english_text() {
784 let e = shannon_entropy("Thequickbrownfoxjumpsoverthelazydog");
785 assert!(e > 3.0 && e < 5.0);
786 }
787
788 #[test]
791 fn redact_replaces_github_pat() {
792 let scanner = CredentialScanner::new();
793 let text = "key: ghp_abc123XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX end";
794 let result = scanner.scan(text);
795 let redacted = result.redact(text);
796 assert!(!redacted.contains("ghp_"));
797 assert!(redacted.contains("[REDACTED:GitHubPat]"));
798 }
799
800 #[test]
801 fn redact_is_deterministic() {
802 let scanner = CredentialScanner::new();
803 let text = "key: ghp_abc123XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
804 let result = scanner.scan(text);
805 assert_eq!(result.redact(text), result.redact(text));
806 }
807
808 #[test]
809 fn redact_clean_text_unchanged() {
810 let scanner = CredentialScanner::new();
811 let text = "This is a normal sentence with no secrets.";
812 let result = scanner.scan(text);
813 assert!(result.is_clean());
814 assert_eq!(result.redact(text), text);
815 }
816
817 #[test]
818 fn redact_multiple_findings_in_one_pass() {
819 let scanner = CredentialScanner::new();
820 let text = "a=ghp_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX b=postgres://u:p@host/db";
821 let result = scanner.scan(text);
822 let redacted = result.redact(text);
823 assert!(!redacted.contains("ghp_"));
824 assert!(!redacted.contains("postgres://"));
825 assert!(redacted.contains("[REDACTED:GitHubPat]"));
826 assert!(redacted.contains("[REDACTED:PostgresUrl]"));
827 }
828
829 #[test]
830 fn is_clean_true_for_benign_text() {
831 let scanner = CredentialScanner::new();
832 assert!(scanner.scan("Hello, world! No secrets here.").is_clean());
833 }
834
835 #[test]
838 fn custom_kind_as_str_returns_custom() {
839 assert_eq!(CredentialKind::Custom.as_str(), "Custom");
840 }
841
842 #[test]
843 fn from_regex_match_creates_custom_finding() {
844 let finding = CredentialFinding::from_regex_match(5, 20);
845 assert_eq!(finding.kind, CredentialKind::Custom);
846 assert_eq!(finding.offset, 5);
847 assert_eq!(finding.matched, "[REDACTED:Custom]");
848 }
849
850 #[test]
853 fn false_positive_corpus_has_no_hard_credential_hits() {
854 let scanner = CredentialScanner::new();
855 let corpus = [
856 "The quick brown fox jumps over the lazy dog.",
857 "fn main() { println!(\"Hello, world!\"); }",
858 "SELECT * FROM users WHERE id = 42;",
859 "cargo build --release --features std",
860 "version = \"1.0.0\" edition = \"2021\"",
861 "2026-04-27T15:34:15.377+0800",
862 "error[E0382]: borrow of moved value: `x`",
863 ];
864 for text in &corpus {
865 let result = scanner.scan(text);
866 let hard: Vec<_> = result
867 .findings
868 .iter()
869 .filter(|f| f.kind != CredentialKind::GenericHighEntropy)
870 .collect();
871 assert!(hard.is_empty(), "false positive in: {:?} → {:?}", text, hard);
872 }
873 }
874
875 #[test]
878 fn disabled_scanner_returns_empty_result() {
879 let config = ScannerConfig {
880 disabled: true,
881 ..Default::default()
882 };
883 let scanner = CredentialScanner::with_config(config);
884 let result = scanner.scan("sk-proj-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ghp_XXXXXXXXX");
885 assert!(result.is_clean(), "disabled scanner must return no findings");
886 }
887
888 #[test]
889 fn custom_pattern_detected_as_custom_kind() {
890 let config = ScannerConfig {
891 custom_patterns: vec!["INTERNAL_SECRET_".into()],
892 ..Default::default()
893 };
894 let scanner = CredentialScanner::with_config(config);
895 let result = scanner.scan("token=INTERNAL_SECRET_hello");
896 let custom: Vec<_> = result
897 .findings
898 .iter()
899 .filter(|f| f.kind == CredentialKind::Custom)
900 .collect();
901 assert!(!custom.is_empty(), "custom pattern must produce a Custom finding");
902 assert!(custom[0].matched.contains("[REDACTED:Custom]"));
903 }
904
905 #[test]
906 fn custom_pattern_coexists_with_builtin() {
907 let config = ScannerConfig {
908 custom_patterns: vec!["MY_TOKEN_".into()],
909 ..Default::default()
910 };
911 let scanner = CredentialScanner::with_config(config);
912 let text = "a=ghp_XXXXXXXXX b=MY_TOKEN_secret123";
913 let result = scanner.scan(text);
914 let kinds: Vec<_> = result.findings.iter().map(|f| &f.kind).collect();
915 assert!(kinds.contains(&&CredentialKind::GitHubPat));
916 assert!(kinds.contains(&&CredentialKind::Custom));
917 }
918
919 #[test]
920 fn default_config_matches_new() {
921 let default_scanner = CredentialScanner::new();
922 let config_scanner = CredentialScanner::with_config(ScannerConfig::default());
923 let text = "key=ghp_XXXXXXXXX url=postgres://u:p@host/db";
924 let r1 = default_scanner.scan(text);
925 let r2 = config_scanner.scan(text);
926 assert_eq!(r1.findings.len(), r2.findings.len());
927 for (a, b) in r1.findings.iter().zip(r2.findings.iter()) {
928 assert_eq!(a.kind, b.kind);
929 assert_eq!(a.offset, b.offset);
930 }
931 }
932}