use aho_corasick::AhoCorasick;
const AC_PATTERNS: &[&str] = &[
"sk-ant-", "sk-", "AKIA", "\"type\": \"service_account\"", "DefaultEndpointsProtocol=", "ghp_", "ghs_", "xoxb-", "xoxp-", "xoxa-", "postgres://", "mysql://", "mongodb://", "-----BEGIN RSA PRIVATE KEY-----", "-----BEGIN EC PRIVATE KEY-----", "-----BEGIN OPENSSH PRIVATE KEY-----", "-----BEGIN PRIVATE KEY-----", "-----BEGIN PGP PRIVATE KEY BLOCK-----", ];
const AC_KINDS: &[CredentialKind] = &[
CredentialKind::AnthropicKey, CredentialKind::OpenAiKey, CredentialKind::AwsAccessKey, CredentialKind::GcpServiceAccount, CredentialKind::AzureConnectionString, CredentialKind::GitHubPat, CredentialKind::GitHubAppToken, CredentialKind::SlackBotToken, CredentialKind::SlackUserToken, CredentialKind::SlackOAuthToken, CredentialKind::PostgresUrl, CredentialKind::MysqlUrl, CredentialKind::MongodbUrl, CredentialKind::RsaPrivateKey, CredentialKind::EcPrivateKey, CredentialKind::OpensshPrivateKey, CredentialKind::PrivateKey, CredentialKind::PgpPrivateKey, ];
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum CredentialKind {
AnthropicKey,
AwsAccessKey,
GcpServiceAccount,
OpenAiKey,
AzureConnectionString,
GitHubAppToken,
GitHubPat,
SlackBotToken,
SlackOAuthToken,
SlackUserToken,
MongodbUrl,
MysqlUrl,
PostgresUrl,
EcPrivateKey,
OpensshPrivateKey,
PgpPrivateKey,
PrivateKey,
RsaPrivateKey,
CreditCardLuhn,
EmailAddress,
SsnPattern,
GenericHighEntropy,
Custom,
}
impl CredentialKind {
pub fn as_str(&self) -> &'static str {
match self {
Self::AnthropicKey => "AnthropicKey",
Self::AwsAccessKey => "AwsAccessKey",
Self::AzureConnectionString => "AzureConnectionString",
Self::CreditCardLuhn => "CreditCardLuhn",
Self::EcPrivateKey => "EcPrivateKey",
Self::EmailAddress => "EmailAddress",
Self::GcpServiceAccount => "GcpServiceAccount",
Self::GenericHighEntropy => "GenericHighEntropy",
Self::GitHubAppToken => "GitHubAppToken",
Self::GitHubPat => "GitHubPat",
Self::MongodbUrl => "MongodbUrl",
Self::MysqlUrl => "MysqlUrl",
Self::OpenAiKey => "OpenAiKey",
Self::OpensshPrivateKey => "OpensshPrivateKey",
Self::PgpPrivateKey => "PgpPrivateKey",
Self::PostgresUrl => "PostgresUrl",
Self::PrivateKey => "PrivateKey",
Self::RsaPrivateKey => "RsaPrivateKey",
Self::SlackBotToken => "SlackBotToken",
Self::SlackOAuthToken => "SlackOAuthToken",
Self::SlackUserToken => "SlackUserToken",
Self::SsnPattern => "SsnPattern",
Self::Custom => "Custom",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct CredentialFinding {
pub kind: CredentialKind,
pub offset: usize,
pub matched: String,
#[cfg_attr(feature = "serde", serde(skip))]
end: usize,
}
impl CredentialFinding {
fn new(kind: CredentialKind, offset: usize, end: usize) -> Self {
let label = format!("[REDACTED:{}]", kind.as_str());
Self {
kind,
offset,
matched: label,
end,
}
}
pub fn from_regex_match(offset: usize, end: usize) -> Self {
Self::new(CredentialKind::Custom, offset, end)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ScanResult {
pub findings: Vec<CredentialFinding>,
}
impl ScanResult {
pub fn is_clean(&self) -> bool {
self.findings.is_empty()
}
pub fn redact(&self, text: &str) -> String {
let mut sorted: Vec<&CredentialFinding> = self.findings.iter().collect();
sorted.sort_by_key(|b| std::cmp::Reverse(b.offset));
let mut result = text.to_string();
for finding in sorted {
if finding.end <= result.len() && finding.offset <= finding.end {
result.replace_range(finding.offset..finding.end, &finding.matched);
}
}
result
}
}
#[derive(Debug, Clone, Default)]
pub struct ScannerConfig {
pub disabled: bool,
pub custom_patterns: Vec<String>,
}
pub struct CredentialScanner {
patterns: AhoCorasick,
kinds: Vec<CredentialKind>,
disabled: bool,
}
impl Default for CredentialScanner {
fn default() -> Self {
Self::new()
}
}
impl CredentialScanner {
pub fn new() -> Self {
Self::with_config(ScannerConfig::default())
}
pub fn with_config(config: ScannerConfig) -> Self {
let mut all_patterns: Vec<&str> = AC_PATTERNS.to_vec();
let custom_refs: Vec<&str> = config.custom_patterns.iter().map(|s| s.as_str()).collect();
all_patterns.extend_from_slice(&custom_refs);
let mut kinds: Vec<CredentialKind> = AC_KINDS.to_vec();
kinds.extend(std::iter::repeat(CredentialKind::Custom).take(config.custom_patterns.len()));
let ac = AhoCorasick::builder()
.match_kind(aho_corasick::MatchKind::LeftmostFirst)
.build(&all_patterns)
.expect("AC patterns are always valid");
Self {
patterns: ac,
kinds,
disabled: config.disabled,
}
}
pub fn scan(&self, text: &str) -> ScanResult {
if self.disabled {
return ScanResult { findings: Vec::new() };
}
let mut findings = Vec::new();
for mat in self.patterns.find_iter(text) {
let kind = self.kinds[mat.pattern()].clone();
let offset = mat.start();
let end = token_end(text, mat.end());
findings.push(CredentialFinding::new(kind, offset, end));
}
scan_digit_sequences(text, &mut findings);
scan_emails(text, &mut findings);
scan_high_entropy(text, &mut findings);
findings.sort_by_key(|f| f.offset);
ScanResult { findings }
}
}
fn token_end(text: &str, from: usize) -> usize {
text[from..]
.find(|c: char| c.is_whitespace() || matches!(c, '"' | '\'' | ',' | ';' | ')' | ']' | '}'))
.map(|i| from + i)
.unwrap_or(text.len())
}
fn is_ssn(s: &str) -> bool {
let b = s.as_bytes();
b.len() == 11
&& b[0..3].iter().all(u8::is_ascii_digit)
&& b[3] == b'-'
&& b[4..6].iter().all(u8::is_ascii_digit)
&& b[6] == b'-'
&& b[7..11].iter().all(u8::is_ascii_digit)
}
fn luhn_valid(digits: &str) -> bool {
if digits.len() < 13 || digits.len() > 19 {
return false;
}
let mut sum = 0u32;
let mut double = false;
for ch in digits.chars().rev() {
let Some(d) = ch.to_digit(10) else {
return false;
};
let val = if double {
let v = d * 2;
if v > 9 {
v - 9
} else {
v
}
} else {
d
};
sum += val;
double = !double;
}
sum % 10 == 0
}
fn scan_digit_sequences(text: &str, findings: &mut Vec<CredentialFinding>) {
let bytes = text.as_bytes();
let mut i = 0;
while i < bytes.len() {
if !bytes[i].is_ascii_digit() {
i += 1;
continue;
}
let start = i;
let mut digits = String::new();
let mut j = i;
let limit = (start + 24).min(bytes.len());
while j < limit {
match bytes[j] {
b if b.is_ascii_digit() => {
digits.push(b as char);
j += 1;
}
b' ' | b'-' if !digits.is_empty() => {
j += 1;
}
_ => break,
}
}
let end = j;
let segment = &text[start..end];
if is_ssn(segment) {
findings.push(CredentialFinding::new(CredentialKind::SsnPattern, start, end));
} else if digits.len() >= 13 && digits.len() <= 19 && luhn_valid(&digits) {
findings.push(CredentialFinding::new(CredentialKind::CreditCardLuhn, start, end));
}
i = end.max(i + 1);
}
}
fn shannon_entropy(s: &str) -> f64 {
if s.is_empty() {
return 0.0;
}
let mut freq = [0u32; 256];
for &b in s.as_bytes() {
freq[b as usize] += 1;
}
let len = s.len() as f64;
freq.iter()
.filter(|&&c| c > 0)
.map(|&c| {
let p = c as f64 / len;
-p * p.log2()
})
.sum()
}
fn scan_high_entropy(text: &str, findings: &mut Vec<CredentialFinding>) {
let mut offset = 0usize;
for token in text.split_whitespace() {
let token_offset = text[offset..].find(token).map(|i| offset + i).unwrap_or(offset);
let token_end_pos = token_offset + token.len();
let len = token.len();
if (20..=64).contains(&len) && shannon_entropy(token) > 4.5 {
findings.push(CredentialFinding::new(
CredentialKind::GenericHighEntropy,
token_offset,
token_end_pos,
));
}
offset = token_end_pos;
}
}
fn scan_emails(text: &str, findings: &mut Vec<CredentialFinding>) {
let mut search = text;
let mut base = 0usize;
while let Some(at) = search.find('@') {
let abs_at = base + at;
let local_start = text[..abs_at]
.rfind(|c: char| c.is_whitespace() || matches!(c, '<' | ',' | ';' | '"' | '\''))
.map(|i| i + 1)
.unwrap_or(0);
let domain_end = token_end(text, abs_at + 1);
let local = &text[local_start..abs_at];
let domain = &text[abs_at + 1..domain_end];
if !local.is_empty() && domain.contains('.') && domain.len() >= 3 {
findings.push(CredentialFinding::new(
CredentialKind::EmailAddress,
local_start,
domain_end,
));
}
let next = abs_at + 1;
if next >= text.len() {
break;
}
search = &text[next..];
base = next;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn credential_kind_as_str_round_trips() {
assert_eq!(CredentialKind::AnthropicKey.as_str(), "AnthropicKey");
assert_eq!(CredentialKind::AwsAccessKey.as_str(), "AwsAccessKey");
assert_eq!(CredentialKind::GenericHighEntropy.as_str(), "GenericHighEntropy");
}
#[test]
fn detects_anthropic_key() {
let scanner = CredentialScanner::new();
let result = scanner.scan("auth: sk-ant-api03-XXXXXXXXXXXXXXXXXXXX");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::AnthropicKey));
}
#[test]
fn detects_openai_key_not_misclassified_as_anthropic() {
let scanner = CredentialScanner::new();
let result = scanner.scan("key: sk-proj-XXXXXXXXXXXXXXXXXXXX");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::OpenAiKey));
assert!(!result.findings.iter().any(|f| f.kind == CredentialKind::AnthropicKey));
}
#[test]
fn detects_aws_access_key() {
let scanner = CredentialScanner::new();
let result = scanner.scan("AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::AwsAccessKey));
}
#[test]
fn detects_gcp_service_account() {
let scanner = CredentialScanner::new();
let result = scanner.scan(r#"{"type": "service_account", "project_id": "my-project"}"#);
assert!(result
.findings
.iter()
.any(|f| f.kind == CredentialKind::GcpServiceAccount));
}
#[test]
fn detects_github_pat() {
let scanner = CredentialScanner::new();
let result = scanner.scan("token: ghp_1234567890abcdefghijklmnopqrstuvwxyz");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::GitHubPat));
}
#[test]
fn detects_github_app_token() {
let scanner = CredentialScanner::new();
let result = scanner.scan("token: ghs_1234567890abcdefghijklmnopqrstuvwxyz");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::GitHubAppToken));
}
#[test]
fn detects_slack_bot_token() {
let scanner = CredentialScanner::new();
let result = scanner.scan("SLACK_BOT_TOKEN=xoxb-123456789012-123456789012-XXXXXXXXXXXX");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::SlackBotToken));
}
#[test]
fn detects_slack_user_token() {
let scanner = CredentialScanner::new();
let result = scanner.scan("token=xoxp-123456789012-123456789012-XXXXXXXXXXXX");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::SlackUserToken));
}
#[test]
fn detects_slack_oauth_token() {
let scanner = CredentialScanner::new();
let result = scanner.scan("oauth=xoxa-123456789012-123456789012-XXXXXXXXXXXX");
assert!(result
.findings
.iter()
.any(|f| f.kind == CredentialKind::SlackOAuthToken));
}
#[test]
fn detects_azure_connection_string() {
let scanner = CredentialScanner::new();
let result = scanner.scan("DefaultEndpointsProtocol=https;AccountName=myaccount;AccountKey=XXXX");
assert!(result
.findings
.iter()
.any(|f| f.kind == CredentialKind::AzureConnectionString));
}
#[test]
fn detects_postgres_url() {
let scanner = CredentialScanner::new();
let result = scanner.scan("DATABASE_URL=postgres://user:password@host:5432/db");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::PostgresUrl));
}
#[test]
fn detects_mysql_url() {
let scanner = CredentialScanner::new();
let result = scanner.scan("db=mysql://user:secret@localhost/mydb");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::MysqlUrl));
}
#[test]
fn detects_mongodb_url() {
let scanner = CredentialScanner::new();
let result = scanner.scan("uri=mongodb://admin:pass@cluster0.mongodb.net/mydb");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::MongodbUrl));
}
#[test]
fn detects_rsa_private_key() {
let scanner = CredentialScanner::new();
let result =
scanner.scan("-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA...\n-----END RSA PRIVATE KEY-----");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::RsaPrivateKey));
}
#[test]
fn detects_ec_private_key() {
let scanner = CredentialScanner::new();
let result = scanner.scan("-----BEGIN EC PRIVATE KEY-----\nMHQCAQEEI...\n-----END EC PRIVATE KEY-----");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::EcPrivateKey));
}
#[test]
fn detects_openssh_private_key() {
let scanner = CredentialScanner::new();
let result = scanner
.scan("-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaC1rZXkAAAA=\n-----END OPENSSH PRIVATE KEY-----");
assert!(result
.findings
.iter()
.any(|f| f.kind == CredentialKind::OpensshPrivateKey));
}
#[test]
fn detects_generic_private_key() {
let scanner = CredentialScanner::new();
let result = scanner.scan("-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgk=\n-----END PRIVATE KEY-----");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::PrivateKey));
}
#[test]
fn detects_pgp_private_key() {
let scanner = CredentialScanner::new();
let result =
scanner.scan("-----BEGIN PGP PRIVATE KEY BLOCK-----\nlQOYBF...\n-----END PGP PRIVATE KEY BLOCK-----");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::PgpPrivateKey));
}
#[test]
fn detects_credit_card_luhn() {
let scanner = CredentialScanner::new();
let result = scanner.scan("card: 4532015112830366");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::CreditCardLuhn));
}
#[test]
fn detects_credit_card_with_spaces() {
let scanner = CredentialScanner::new();
let result = scanner.scan("card: 4532 0151 1283 0366");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::CreditCardLuhn));
}
#[test]
fn does_not_flag_invalid_luhn() {
let scanner = CredentialScanner::new();
let result = scanner.scan("num: 4532015112830367");
assert!(!result.findings.iter().any(|f| f.kind == CredentialKind::CreditCardLuhn));
}
#[test]
fn detects_ssn() {
let scanner = CredentialScanner::new();
let result = scanner.scan("SSN: 123-45-6789");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::SsnPattern));
}
#[test]
fn detects_email_address() {
let scanner = CredentialScanner::new();
let result = scanner.scan("contact: user@example.com for support");
assert!(result.findings.iter().any(|f| f.kind == CredentialKind::EmailAddress));
}
#[test]
fn detects_high_entropy_token() {
let scanner = CredentialScanner::new();
let result = scanner.scan("secret: xK9mP2nQvR7sT4wY1aB6dF3hJ8lN0eC5");
assert!(result
.findings
.iter()
.any(|f| f.kind == CredentialKind::GenericHighEntropy));
}
#[test]
fn does_not_flag_short_token_as_high_entropy() {
let scanner = CredentialScanner::new();
let result = scanner.scan("word: hello");
assert!(!result
.findings
.iter()
.any(|f| f.kind == CredentialKind::GenericHighEntropy));
}
#[test]
fn luhn_valid_visa_test_number() {
assert!(luhn_valid("4532015112830366"));
}
#[test]
fn luhn_valid_mastercard_test_number() {
assert!(luhn_valid("5425233430109903"));
}
#[test]
fn luhn_valid_amex_test_number() {
assert!(luhn_valid("371449635398431"));
}
#[test]
fn luhn_valid_discover_test_number() {
assert!(luhn_valid("6011111111111117"));
}
#[test]
fn luhn_invalid_altered_digit() {
assert!(!luhn_valid("4532015112830367"));
}
#[test]
fn luhn_rejects_too_short() {
assert!(!luhn_valid("123456789012"));
}
#[test]
fn luhn_rejects_too_long() {
assert!(!luhn_valid("45320151128303661234"));
}
#[test]
fn entropy_zero_for_empty() {
assert_eq!(shannon_entropy(""), 0.0);
}
#[test]
fn entropy_low_for_repeated_char() {
assert!(shannon_entropy("aaaaaaaaaaaaaaaaaaaaaa") < 1.0);
}
#[test]
fn entropy_high_for_random_base64() {
assert!(shannon_entropy("xK9mP2nQvR7sT4wY1aB6dF3hJ8lN0") > 4.0);
}
#[test]
fn entropy_moderate_for_english_text() {
let e = shannon_entropy("Thequickbrownfoxjumpsoverthelazydog");
assert!(e > 3.0 && e < 5.0);
}
#[test]
fn redact_replaces_github_pat() {
let scanner = CredentialScanner::new();
let text = "key: ghp_abc123XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX end";
let result = scanner.scan(text);
let redacted = result.redact(text);
assert!(!redacted.contains("ghp_"));
assert!(redacted.contains("[REDACTED:GitHubPat]"));
}
#[test]
fn redact_is_deterministic() {
let scanner = CredentialScanner::new();
let text = "key: ghp_abc123XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
let result = scanner.scan(text);
assert_eq!(result.redact(text), result.redact(text));
}
#[test]
fn redact_clean_text_unchanged() {
let scanner = CredentialScanner::new();
let text = "This is a normal sentence with no secrets.";
let result = scanner.scan(text);
assert!(result.is_clean());
assert_eq!(result.redact(text), text);
}
#[test]
fn redact_multiple_findings_in_one_pass() {
let scanner = CredentialScanner::new();
let text = "a=ghp_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX b=postgres://u:p@host/db";
let result = scanner.scan(text);
let redacted = result.redact(text);
assert!(!redacted.contains("ghp_"));
assert!(!redacted.contains("postgres://"));
assert!(redacted.contains("[REDACTED:GitHubPat]"));
assert!(redacted.contains("[REDACTED:PostgresUrl]"));
}
#[test]
fn is_clean_true_for_benign_text() {
let scanner = CredentialScanner::new();
assert!(scanner.scan("Hello, world! No secrets here.").is_clean());
}
#[test]
fn custom_kind_as_str_returns_custom() {
assert_eq!(CredentialKind::Custom.as_str(), "Custom");
}
#[test]
fn from_regex_match_creates_custom_finding() {
let finding = CredentialFinding::from_regex_match(5, 20);
assert_eq!(finding.kind, CredentialKind::Custom);
assert_eq!(finding.offset, 5);
assert_eq!(finding.matched, "[REDACTED:Custom]");
}
#[test]
fn false_positive_corpus_has_no_hard_credential_hits() {
let scanner = CredentialScanner::new();
let corpus = [
"The quick brown fox jumps over the lazy dog.",
"fn main() { println!(\"Hello, world!\"); }",
"SELECT * FROM users WHERE id = 42;",
"cargo build --release --features std",
"version = \"1.0.0\" edition = \"2021\"",
"2026-04-27T15:34:15.377+0800",
"error[E0382]: borrow of moved value: `x`",
];
for text in &corpus {
let result = scanner.scan(text);
let hard: Vec<_> = result
.findings
.iter()
.filter(|f| f.kind != CredentialKind::GenericHighEntropy)
.collect();
assert!(hard.is_empty(), "false positive in: {:?} → {:?}", text, hard);
}
}
#[test]
fn disabled_scanner_returns_empty_result() {
let config = ScannerConfig {
disabled: true,
..Default::default()
};
let scanner = CredentialScanner::with_config(config);
let result = scanner.scan("sk-proj-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ghp_XXXXXXXXX");
assert!(result.is_clean(), "disabled scanner must return no findings");
}
#[test]
fn custom_pattern_detected_as_custom_kind() {
let config = ScannerConfig {
custom_patterns: vec!["INTERNAL_SECRET_".into()],
..Default::default()
};
let scanner = CredentialScanner::with_config(config);
let result = scanner.scan("token=INTERNAL_SECRET_hello");
let custom: Vec<_> = result
.findings
.iter()
.filter(|f| f.kind == CredentialKind::Custom)
.collect();
assert!(!custom.is_empty(), "custom pattern must produce a Custom finding");
assert!(custom[0].matched.contains("[REDACTED:Custom]"));
}
#[test]
fn custom_pattern_coexists_with_builtin() {
let config = ScannerConfig {
custom_patterns: vec!["MY_TOKEN_".into()],
..Default::default()
};
let scanner = CredentialScanner::with_config(config);
let text = "a=ghp_XXXXXXXXX b=MY_TOKEN_secret123";
let result = scanner.scan(text);
let kinds: Vec<_> = result.findings.iter().map(|f| &f.kind).collect();
assert!(kinds.contains(&&CredentialKind::GitHubPat));
assert!(kinds.contains(&&CredentialKind::Custom));
}
#[test]
fn default_config_matches_new() {
let default_scanner = CredentialScanner::new();
let config_scanner = CredentialScanner::with_config(ScannerConfig::default());
let text = "key=ghp_XXXXXXXXX url=postgres://u:p@host/db";
let r1 = default_scanner.scan(text);
let r2 = config_scanner.scan(text);
assert_eq!(r1.findings.len(), r2.findings.len());
for (a, b) in r1.findings.iter().zip(r2.findings.iter()) {
assert_eq!(a.kind, b.kind);
assert_eq!(a.offset, b.offset);
}
}
}