#![allow(dead_code)]
use regex::Regex;
use std::sync::LazyLock;
pub struct FormatMatch {
pub format: &'static str,
pub checksum_valid: Option<bool>,
}
static RE_AWS_ACCESS_KEY_ID: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\b(AKIA|ASIA)[A-Z0-9]{16}\b").expect("valid regex"));
static RE_GITHUB_PAT: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\bgh[posru]_[A-Za-z0-9]{36,255}\b").expect("valid regex"));
static RE_STRIPE_SECRET_KEY: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\b(sk|rk)_live_[A-Za-z0-9]{24,99}\b").expect("valid regex"));
static RE_SLACK_TOKEN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\bxox[baprs]-[A-Za-z0-9\-]{10,}\b").expect("valid regex"));
static RE_GOOGLE_API_KEY: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\bAIza[0-9A-Za-z\-_]{35}\b").expect("valid regex"));
static RE_OPENAI_API_KEY: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\bsk-[A-Za-z0-9]{20,}\b").expect("valid regex"));
static RE_TWILIO_API_KEY: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\bSK[0-9a-fA-F]{32}\b").expect("valid regex"));
static RE_SENDGRID_API_KEY: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\bSG\.[A-Za-z0-9_\-]{22}\.[A-Za-z0-9_\-]{43}\b").expect("valid regex")
});
static RE_NPM_TOKEN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\bnpm_[A-Za-z0-9]{36}\b").expect("valid regex"));
static RE_PRIVATE_KEY_PEM: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"-----BEGIN (RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY-----").expect("valid regex")
});
static RE_JWT: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\beyJ[A-Za-z0-9_\-]+\.eyJ[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\b").expect("valid regex")
});
struct FormatEntry {
name: &'static str,
re: &'static LazyLock<Regex>,
checksum: Option<fn(&str) -> bool>,
}
static FORMAT_TABLE: &[FormatEntry] = &[
FormatEntry {
name: "aws_access_key_id",
re: &RE_AWS_ACCESS_KEY_ID,
checksum: None,
},
FormatEntry {
name: "github_pat",
re: &RE_GITHUB_PAT,
checksum: None,
},
FormatEntry {
name: "stripe_secret_key",
re: &RE_STRIPE_SECRET_KEY,
checksum: None,
},
FormatEntry {
name: "slack_token",
re: &RE_SLACK_TOKEN,
checksum: None,
},
FormatEntry {
name: "google_api_key",
re: &RE_GOOGLE_API_KEY,
checksum: None,
},
FormatEntry {
name: "openai_api_key",
re: &RE_OPENAI_API_KEY,
checksum: None,
},
FormatEntry {
name: "twilio_api_key",
re: &RE_TWILIO_API_KEY,
checksum: None,
},
FormatEntry {
name: "sendgrid_api_key",
re: &RE_SENDGRID_API_KEY,
checksum: None,
},
FormatEntry {
name: "npm_token",
re: &RE_NPM_TOKEN,
checksum: None,
},
FormatEntry {
name: "private_key_pem",
re: &RE_PRIVATE_KEY_PEM,
checksum: None,
},
FormatEntry {
name: "jwt",
re: &RE_JWT,
checksum: None,
},
];
pub fn match_known_format(s: &str) -> Option<FormatMatch> {
for entry in FORMAT_TABLE {
if entry.re.is_match(s) {
let cv = entry.checksum.map(|c| c(s));
if let Some(false) = cv {
continue;
}
return Some(FormatMatch {
format: entry.name,
checksum_valid: cv,
});
}
}
None
}
pub fn shannon_entropy_bits_per_char(s: &str) -> f32 {
if s.is_empty() {
return 0.0;
}
let mut counts: std::collections::HashMap<char, u32> = std::collections::HashMap::new();
for c in s.chars() {
*counts.entry(c).or_insert(0) += 1;
}
let n = s.chars().count() as f32;
-counts
.values()
.map(|&c| {
let p = c as f32 / n;
p * p.log2()
})
.sum::<f32>()
}
pub const GENERIC_ENTROPY_FLOOR: f32 = 4.0;
pub const GENERIC_MIN_LEN: usize = 20;
pub fn contains_as_word(haystack: &str, needle: &str) -> bool {
if needle.is_empty() {
return false;
}
let hb = haystack.as_bytes();
let nb = needle.as_bytes();
let nlen = nb.len();
for i in 0..hb.len().saturating_sub(nlen - 1) {
if &hb[i..i + nlen] == nb {
let before_ok = i == 0 || !hb[i - 1].is_ascii_alphanumeric();
let after_ok = i + nlen >= hb.len() || !hb[i + nlen].is_ascii_alphanumeric();
if before_ok && after_ok {
return true;
}
}
}
false
}
pub fn is_non_credential_context(
file: &str,
surrounding_name: &str,
literal: &str,
in_identifier_list: bool,
) -> bool {
let f = file.to_lowercase();
if f.ends_with(".md")
|| f.ends_with(".rst")
|| f.ends_with(".txt")
|| f.contains("/examples/")
|| f.contains("example")
|| f.contains("/docs/")
|| f.ends_with(".sample")
{
return true;
}
if f.contains("/tests/")
|| f.contains("/test/")
|| f.contains("/__fixtures__/")
|| f.contains("/fixtures/")
|| f.contains("conftest.py")
|| f.contains("_test.")
|| f.contains(".test.")
|| f.contains(".spec.")
{
return true;
}
let name = surrounding_name.to_lowercase();
if in_identifier_list
|| name.contains("sensitive")
|| name.contains("redact")
|| name.contains("filter")
|| name.contains("blocklist")
|| name.contains("blacklist")
|| name.contains("scrub")
|| name.contains("sanitiz")
{
return true;
}
let l = literal.to_lowercase();
if l.contains('<') || l.contains("${") || l.contains("...") {
return true;
}
const PLACEHOLDER_WORDS: &[&str] = &[
"your",
"placeholder",
"example",
"changeme",
"xxxx",
"dummy",
"fake",
"test",
"sample",
];
for word in PLACEHOLDER_WORDS {
if contains_as_word(&l, word) {
return true;
}
}
if !literal.is_empty() {
let first = literal.as_bytes()[0];
if literal.bytes().all(|b| b == first) {
return true;
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn recognizes_known_formats() {
assert_eq!(
match_known_format("AKIAIOSFODNN7EXAMPLE").map(|m| m.format),
Some("aws_access_key_id")
);
assert_eq!(
match_known_format("ghp_1234567890abcdefghijklmnopqrstuvwxyz").map(|m| m.format),
Some("github_pat")
);
assert!(
match_known_format("-----BEGIN RSA PRIVATE KEY-----").map(|m| m.format)
== Some("private_key_pem")
);
assert!(match_known_format("hello world").is_none());
}
#[test]
fn entropy_gate() {
assert!(shannon_entropy_bits_per_char("aaaaaaaaaaaaaaaaaaaa") < 1.0);
assert!(shannon_entropy_bits_per_char("xR7$kP2!mZ9@qW4#vL8&") > 3.5);
}
#[test]
fn context_guard_rejects_redaction_lists_and_fixtures() {
assert!(is_non_credential_context(
"filters.py",
"SENSITIVE_FIELD_PATTERNS",
"password",
true
));
assert!(is_non_credential_context(
"tests/fixtures/auth.py",
"",
"AKIAIOSFODNN7EXAMPLE",
false
));
assert!(!is_non_credential_context(
"src/aws.py",
"",
"AKIAIOSFODNN7EXAMPLE",
false
));
assert!(is_non_credential_context("README.md", "", "AKIA...", false));
assert!(is_non_credential_context(
"src/x.py",
"",
"your-api-key-here",
false
));
}
}