use std::collections::BTreeSet;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DetectedSecret {
pub env_var: String,
pub value: String,
pub provider: Option<&'static str>,
pub confidence: Confidence,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Confidence {
High,
Medium,
Low,
}
const PROVIDER_PREFIXES: &[(&str, &str, usize)] = &[
("sk-ant-", "Anthropic", 20),
("sk-proj-", "OpenAI", 20),
("sk-", "OpenAI", 20),
("ghp_", "GitHub", 36),
("gho_", "GitHub", 36),
("ghu_", "GitHub", 36),
("ghs_", "GitHub", 36),
("ghr_", "GitHub", 36),
("github_pat_", "GitHub", 40),
("xoxa-", "Slack", 10),
("xoxb-", "Slack", 10),
("xoxp-", "Slack", 10),
("xoxr-", "Slack", 10),
("xoxs-", "Slack", 10),
("AKIA", "AWS", 16),
("ASIA", "AWS", 16),
("ANPA", "AWS", 16),
("AROA", "AWS", 16),
("AIDA", "AWS", 16),
("dp.st.", "Doppler", 20),
("dp.pt.", "Doppler", 20),
("dp.ct.", "Doppler", 20),
("dp.sa.", "Doppler", 20),
("glpat-", "GitLab", 20),
("sk_live_", "Stripe", 24),
("sk_test_", "Stripe", 24),
("rk_live_", "Stripe", 24),
("rk_test_", "Stripe", 24),
("psk_", "Postmark", 20),
("EAA", "Facebook", 20),
("ya29.", "Google OAuth", 20),
("AIza", "Google API", 35),
("hf_", "HuggingFace", 30),
];
const SECRET_NAME_SUFFIXES: &[&str] = &[
"_API_KEY",
"_APIKEY",
"_TOKEN",
"_SECRET",
"_PASSWORD",
"_PASSWD",
"_PWD",
"_KEY",
"_AUTH",
"_CREDENTIAL",
"_CREDENTIALS",
];
const NAME_DENYLIST: &[&str] = &[
"PUBLIC_KEY",
"AWS_ACCESS_KEY_ID",
"GPG_KEY_ID",
"DEPLOY_KEY_ID",
"TF_VAR_", ];
const MEDIUM_ENTROPY_FLOOR: f64 = 3.5;
const LOW_ENTROPY_FLOOR: f64 = 4.5;
const MIN_VALUE_LEN: usize = 16;
pub fn detect_in_command(cmd: &str) -> Vec<DetectedSecret> {
let assignments = extract_assignments(cmd);
let mut out = Vec::new();
let mut seen = BTreeSet::new();
for (env_var, value) in assignments {
if value.len() < MIN_VALUE_LEN {
continue;
}
let key = (env_var.clone(), value.clone());
if !seen.insert(key) {
continue;
}
if let Some(detection) = classify(&env_var, &value) {
out.push(detection);
}
}
out
}
fn classify(env_var: &str, value: &str) -> Option<DetectedSecret> {
if let Some((provider, prefix_len, min_after)) = match_provider_prefix(value) {
if value.len() >= prefix_len + min_after {
return Some(DetectedSecret {
env_var: env_var.to_string(),
value: value.to_string(),
provider: Some(provider),
confidence: Confidence::High,
});
}
}
if looks_like_jwt(value) {
return Some(DetectedSecret {
env_var: env_var.to_string(),
value: value.to_string(),
provider: Some("JWT"),
confidence: Confidence::High,
});
}
if name_suggests_secret(env_var) && shannon_bits_per_char(value) >= MEDIUM_ENTROPY_FLOOR {
return Some(DetectedSecret {
env_var: env_var.to_string(),
value: value.to_string(),
provider: None,
confidence: Confidence::Medium,
});
}
if shannon_bits_per_char(value) >= LOW_ENTROPY_FLOOR && value.len() >= 24 {
return Some(DetectedSecret {
env_var: env_var.to_string(),
value: value.to_string(),
provider: None,
confidence: Confidence::Low,
});
}
None
}
fn match_provider_prefix(value: &str) -> Option<(&'static str, usize, usize)> {
let mut best: Option<(&'static str, usize, usize, usize)> = None;
for (prefix, provider, min_after) in PROVIDER_PREFIXES {
if value.starts_with(prefix) {
let plen = prefix.len();
match best {
Some((_, _, _, cur_plen)) if cur_plen >= plen => {}
_ => best = Some((*provider, plen, *min_after, plen)),
}
}
}
best.map(|(p, plen, min, _)| (p, plen, min))
}
fn name_suggests_secret(env_var: &str) -> bool {
let upper = env_var.to_ascii_uppercase();
if NAME_DENYLIST.iter().any(|d| {
if d.ends_with('_') {
upper.starts_with(d)
} else {
upper == *d
}
}) {
return false;
}
SECRET_NAME_SUFFIXES.iter().any(|s| upper.ends_with(s))
}
fn looks_like_jwt(value: &str) -> bool {
if !value.starts_with("eyJ") {
return false;
}
let segs: Vec<&str> = value.split('.').collect();
segs.len() == 3 && segs.iter().all(|s| !s.is_empty()) && value.len() >= 40
}
#[allow(clippy::cast_precision_loss)] fn shannon_bits_per_char(s: &str) -> f64 {
if s.is_empty() {
return 0.0;
}
let mut counts = [0u32; 256];
for &b in s.as_bytes() {
counts[b as usize] += 1;
}
let len = s.len() as f64;
let mut h = 0.0;
for &c in &counts {
if c == 0 {
continue;
}
let p = f64::from(c) / len;
h -= p * p.log2();
}
h
}
fn extract_assignments(cmd: &str) -> Vec<(String, String)> {
let trimmed = cmd.trim();
if trimmed.is_empty() {
return Vec::new();
}
let tokens = tokenize_shell(trimmed);
let mut out = Vec::new();
let mut idx = 0usize;
while idx < tokens.len() {
let tok = &tokens[idx];
if matches!(tok.as_str(), "export" | "env" | "declare" | "typeset") {
idx += 1;
while idx < tokens.len() {
if let Some((k, v)) = parse_assignment(&tokens[idx]) {
out.push((k, v));
idx += 1;
} else {
return out;
}
}
return out;
}
if let Some((k, v)) = parse_assignment(tok) {
out.push((k, v));
idx += 1;
} else {
break;
}
}
out
}
fn parse_assignment(tok: &str) -> Option<(String, String)> {
let eq = tok.find('=')?;
if eq == 0 {
return None;
}
let name = &tok[..eq];
let value = &tok[eq + 1..];
if !is_valid_env_name(name) {
return None;
}
Some((name.to_string(), strip_quotes(value).to_string()))
}
fn is_valid_env_name(s: &str) -> bool {
let mut bytes = s.bytes();
let Some(first) = bytes.next() else {
return false;
};
if !(first.is_ascii_alphabetic() || first == b'_') {
return false;
}
bytes.all(|b| b.is_ascii_alphanumeric() || b == b'_')
}
fn strip_quotes(value: &str) -> &str {
if value.len() >= 2 {
let first = value.as_bytes()[0];
let last = value.as_bytes()[value.len() - 1];
if (first == b'"' && last == b'"') || (first == b'\'' && last == b'\'') {
return &value[1..value.len() - 1];
}
}
value
}
fn tokenize_shell(s: &str) -> Vec<String> {
let mut out = Vec::new();
let mut cur = String::new();
let mut in_single = false;
let mut in_double = false;
let mut escaped = false;
for ch in s.chars() {
if escaped {
cur.push(ch);
escaped = false;
continue;
}
if !in_single && ch == '\\' {
cur.push(ch);
escaped = true;
continue;
}
if ch == '\'' && !in_double {
in_single = !in_single;
cur.push(ch);
continue;
}
if ch == '"' && !in_single {
in_double = !in_double;
cur.push(ch);
continue;
}
if ch.is_whitespace() && !in_single && !in_double {
if !cur.is_empty() {
out.push(std::mem::take(&mut cur));
}
continue;
}
cur.push(ch);
}
if !cur.is_empty() {
out.push(cur);
}
out
}
#[cfg(test)]
mod tests {
use super::*;
fn detect(cmd: &str) -> Vec<DetectedSecret> {
detect_in_command(cmd)
}
#[test]
fn detects_openai_prefix() {
let d = detect("OPENAI_API_KEY=sk-abc123def456ghi789jklmnopqrs node app.js");
assert_eq!(d.len(), 1);
assert_eq!(d[0].env_var, "OPENAI_API_KEY");
assert_eq!(d[0].provider, Some("OpenAI"));
assert_eq!(d[0].confidence, Confidence::High);
}
#[test]
fn detects_anthropic_more_specific_than_openai() {
let d = detect("KEY=sk-ant-api03-abcdefghijklmnopqrstuvwxyz1234567890 cmd");
assert_eq!(d.len(), 1);
assert_eq!(d[0].provider, Some("Anthropic"));
}
#[test]
fn detects_github_token() {
let d = detect("export GITHUB_TOKEN=ghp_AbCdEfGhIjKlMnOpQrStUvWxYz0123456789");
assert_eq!(d.len(), 1);
assert_eq!(d[0].provider, Some("GitHub"));
assert_eq!(d[0].confidence, Confidence::High);
}
#[test]
fn detects_aws_access_key() {
let d = detect("AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE python deploy.py");
assert_eq!(d.len(), 1);
assert_eq!(d[0].provider, Some("AWS"));
}
#[test]
fn detects_jwt() {
let jwt = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIn0.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c";
let d = detect(&format!("AUTH=Bearer {jwt}"));
let d2 = detect(&format!("TOKEN={jwt}"));
assert_eq!(d2.len(), 1);
assert_eq!(d2[0].provider, Some("JWT"));
assert!(d.is_empty() || d[0].confidence != Confidence::High);
}
#[test]
fn ignores_short_values() {
let d = detect("FOO=bar make build");
assert!(d.is_empty());
}
#[test]
fn ignores_pure_filenames() {
let d = detect("cargo run --bin envseal");
assert!(d.is_empty());
}
#[test]
fn handles_double_quotes() {
let d = detect(r#"export OPENAI_API_KEY="sk-abc123def456ghi789jklmnopqrs""#);
assert_eq!(d.len(), 1);
assert!(!d[0].value.starts_with('"'));
}
#[test]
fn handles_single_quotes() {
let d = detect("export GITHUB_TOKEN='ghp_AbCdEfGhIjKlMnOpQrStUvWxYz0123456789'");
assert_eq!(d.len(), 1);
assert_eq!(d[0].provider, Some("GitHub"));
}
#[test]
fn multiple_assignments_in_one_command() {
let d = detect(
"OPENAI_API_KEY=sk-aaaaaaaaaaaaaaaaaaaaaa GITHUB_TOKEN=ghp_AbCdEfGhIjKlMnOpQrStUvWxYz0123456789 node app.js",
);
assert_eq!(d.len(), 2);
assert_eq!(d[0].provider, Some("OpenAI"));
assert_eq!(d[1].provider, Some("GitHub"));
}
#[test]
fn name_match_with_high_entropy() {
let d = detect("MY_SECRET=A8b7C9d1E2f3G4h5I6j7K8l9M0n1O2p3 node");
assert_eq!(d.len(), 1);
assert_eq!(d[0].confidence, Confidence::Medium);
}
#[test]
fn name_match_low_entropy_skipped() {
let d = detect("DB_PASSWORD=password1234567890 node");
assert!(d.is_empty() || d[0].confidence != Confidence::High);
}
#[test]
fn aws_access_key_id_is_denylisted_for_name_match() {
let d = detect("AWS_ACCESS_KEY_ID=not-a-real-id-just-public-info node");
assert!(d.is_empty() || d[0].confidence != Confidence::Medium);
}
#[test]
fn ignores_malformed_assignment() {
let d = detect("=sk-abc123def456ghi789jklmnopqrs cmd");
assert!(d.is_empty());
}
#[test]
fn stops_scanning_at_command_word() {
let d = detect("FOO=value1 node KEY=sk-abc123def456ghi789jklmnopqrs");
assert!(d.iter().all(|x| x.env_var != "KEY"));
}
#[test]
fn empty_command_returns_empty() {
assert!(detect("").is_empty());
assert!(detect(" ").is_empty());
}
#[test]
fn entropy_calculation_basic() {
assert!(shannon_bits_per_char("aaaaaaaa") < 0.5);
assert!(shannon_bits_per_char("A8b7C9d1E2f3G4h5I6j7K8l9M0n1O2p3") > 4.0);
}
#[test]
fn jwt_must_have_three_segments() {
assert!(looks_like_jwt(
"eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.signaturepartheresignaturehere"
));
assert!(!looks_like_jwt(
"eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0"
));
assert!(!looks_like_jwt("notajwt.evenclose.tothis"));
}
}