use regex::Regex;
use std::sync::OnceLock;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SecretMatch {
pub pattern_name: String,
pub matched_value: String,
pub start: usize,
pub end: usize,
}
struct CompiledPattern {
name: &'static str,
regex: Regex,
}
static PATTERNS: OnceLock<Vec<CompiledPattern>> = OnceLock::new();
pub fn scan_text(content: &str) -> Vec<SecretMatch> {
if content.is_empty() {
return Vec::new();
}
let patterns = PATTERNS.get_or_init(build_patterns);
let mut findings = Vec::new();
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
for pattern in patterns {
for m in pattern.regex.find_iter(content) {
let key = format!("{}:{}", pattern.name, m.as_str());
if seen.insert(key) {
findings.push(SecretMatch {
pattern_name: pattern.name.to_string(),
matched_value: m.as_str().to_string(),
start: m.start(),
end: m.end(),
});
}
}
}
findings.sort_by_key(|e| e.start);
findings
}
pub fn is_sensitive_filename(name: &str) -> bool {
let lower = name.to_ascii_lowercase();
let sensitive_names = [
".env",
"credentials",
"secrets",
"keystore",
"id_rsa",
"id_ed25519",
"id_ecdsa",
"id_dsa",
"known_hosts",
];
let sensitive_exts = [
".pem",
".key",
".p12",
".pfx",
".jks",
".keystore",
".cer",
".crt",
];
for name in &sensitive_names {
if lower.contains(name) {
return true;
}
}
for ext in &sensitive_exts {
if lower.ends_with(ext) {
return true;
}
}
false
}
pub fn secret_risk_score(pattern_name: &str) -> u32 {
match pattern_name {
"Private Key" => 30,
"Anthropic API Key" | "OpenAI API Key" | "OpenAI Legacy Key" => 25,
"AWS Access Key" | "AWS Secret Key" => 25,
"GitHub Token" | "GitHub Fine-Grained Token" => 20,
"Stripe Live Key" => 25,
"Stripe Test Key" => 10,
"Google API Key" => 20,
"Slack Token" => 15,
"JWT Token" => 15,
"Connection String" => 20,
"Password in URL" => 20,
"Env File Secret" | "Generic API Key" => 15,
_ => 10,
}
}
pub fn is_placeholder_value(value: &str) -> bool {
let lower = value.to_ascii_lowercase();
if lower.contains("example") || lower.contains("your_key") || lower.contains("your-key") {
return true;
}
let placeholders = [
"placeholder",
"changeme",
"replace_me",
"insert_here",
"xxxxxxxx",
"aaaaaaaa",
"12345678",
"abcdefgh",
"1234567890",
"testtest",
"dummy",
"fake",
"sample",
"demo",
"password",
"user:pass",
"user:password",
"MIIEowIBAAKCAQEA",
];
if placeholders.iter().any(|p| lower.contains(p)) {
return true;
}
let alnum: Vec<char> = value
.chars()
.filter(|c| c.is_ascii_alphanumeric())
.collect();
if alnum.len() >= 16 {
let mut freq = [0u32; 128];
for &c in &alnum {
freq[c as usize] += 1;
}
let max_freq = freq.iter().max().copied().unwrap_or(0);
if max_freq as f64 / alnum.len() as f64 > 0.35 {
return true;
}
}
false
}
fn build_patterns() -> Vec<CompiledPattern> {
let defs: &[(&'static str, &str)] = &[
("AWS Access Key", r"AKIA[0-9A-Z]{16}"),
(
"AWS Secret Key",
r"(?i)aws_secret_access_key\s*[=:]\s*[A-Za-z0-9/+=]{40}",
),
("Google API Key", r"AIza[0-9A-Za-z_\-]{35}"),
("Anthropic API Key", r"sk-ant-[a-zA-Z0-9_\-]{20,}"),
("OpenAI API Key", r"sk-proj-[a-zA-Z0-9_\-]{20,}"),
("OpenAI Legacy Key", r"sk-[a-zA-Z0-9]{48}"),
("GitHub Token", r"ghp_[A-Za-z0-9]{36}"),
("GitHub Fine-Grained Token", r"github_pat_[A-Za-z0-9_]{82}"),
("GitLab Token", r"glpat-[A-Za-z0-9\-_]{20}"),
("Stripe Live Key", r"sk_live_[0-9a-zA-Z]{24,}"),
("Stripe Test Key", r"sk_test_[0-9a-zA-Z]{24,}"),
(
"Slack Token",
r"xox[bpraos]-[0-9]{10,12}-[0-9]{10,12}-[a-zA-Z0-9]{24}",
),
(
"Slack Webhook",
r"https://hooks\.slack\.com/services/T[A-Z0-9]{8,12}/B[A-Z0-9]{8,12}/[a-zA-Z0-9]{22,34}",
),
(
"JWT Token",
r"eyJ[A-Za-z0-9_-]{10,}\.eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_\-]+",
),
("Bearer Token", r"(?i)bearer\s+[A-Za-z0-9\-_\.]{30,}"),
(
"Private Key",
r"-----BEGIN (RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY( BLOCK)?-----",
),
(
"Connection String",
r"(?i)(postgres(ql)?|mysql|mongodb(\+srv)?|redis|amqp|mssql|sqlserver)://[^\s]{10,}",
),
(
"Password in URL",
r"://[^:\s]{1,64}:[^@\s]{8,128}@[^:\s]{1,253}",
),
(
"Generic API Key",
r"(?i)(?:api[_\-]?key|apikey|api[_\-]?secret|access[_\-]?key)\s*[=:]\s*[A-Za-z0-9_\-\.]{16,64}",
),
(
"Env File Secret",
r"(?i)^(ANTHROPIC_API_KEY|OPENAI_API_KEY|OPENAI_KEY|GEMINI_API_KEY|DATABASE_URL|DB_PASSWORD|SECRET_KEY|AUTH_SECRET|JWT_SECRET|GITHUB_TOKEN|STRIPE_SECRET|AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|TWILIO_AUTH_TOKEN|SENDGRID_API_KEY|CLOUDINARY_SECRET)\s*=\s*\S{8,}",
),
];
defs.iter()
.filter_map(|(name, pattern)| match Regex::new(pattern) {
Ok(regex) => Some(CompiledPattern { name, regex }),
Err(e) => {
eprintln!("[sandspy] failed to compile pattern '{name}': {e}");
None
}
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn anthropic_key_detected() {
let text = "ANTHROPIC_API_KEY=sk-ant-api01-abcdefghijklmnopqrstuvwxyz0123456789";
let matches = scan_text(text);
assert!(
!matches.is_empty(),
"should detect Anthropic key, got: {:?}",
matches
);
}
#[test]
fn openai_key_detected() {
let text = "OPENAI_API_KEY=sk-proj-abc1234567890abcdefghijklmnopqrstuvwxyz0123";
let matches = scan_text(text);
assert!(
!matches.is_empty(),
"should detect OpenAI key, got: {:?}",
matches
);
}
#[test]
fn aws_key_detected() {
let text = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE";
let matches = scan_text(text);
assert!(!matches.is_empty(), "should detect AWS key");
}
#[test]
fn private_key_detected() {
let text = "-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA...";
let matches = scan_text(text);
assert!(!matches.is_empty(), "should detect private key header");
}
#[test]
fn stripe_live_key_detected() {
let text = "STRIPE_SECRET=sk_live_abcdefghijklmnopqrstuvwxyz";
let matches = scan_text(text);
assert!(!matches.is_empty(), "should detect Stripe live key");
}
#[test]
fn env_file_secret_match() {
let text = "DATABASE_URL=postgres://user:password@host:5432/db";
let matches = scan_text(text);
assert!(!matches.is_empty(), "should detect DATABASE_URL");
}
#[test]
fn jwt_detected() {
let text = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U";
let matches = scan_text(text);
assert!(!matches.is_empty(), "should detect JWT");
}
#[test]
fn no_false_positive_on_normal_text() {
let text = "Hello world, this is a normal README file with no secrets.";
let matches = scan_text(text);
assert!(
matches.is_empty(),
"should not flag normal text: {:?}",
matches
);
}
}