use crate::error::{RuntimeError, RuntimeResult};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SecretMatch {
pub detector: &'static str,
pub masked: String,
}
impl std::fmt::Display for SecretMatch {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"content matches secret pattern {} at masked excerpt {}",
self.detector, self.masked
)
}
}
pub fn check(content: &str) -> RuntimeResult<()> {
if let Some(m) = scan(content) {
return Err(RuntimeError::SecretDetected(m));
}
Ok(())
}
pub fn check_json(value: &serde_json::Value) -> RuntimeResult<()> {
scan_json_value(value)
}
pub fn check_tags(tags: &[String]) -> RuntimeResult<()> {
for tag in tags {
check(tag)?;
}
Ok(())
}
fn scan_json_value(value: &serde_json::Value) -> RuntimeResult<()> {
match value {
serde_json::Value::String(s) => check(s),
serde_json::Value::Array(arr) => {
for v in arr {
scan_json_value(v)?;
}
Ok(())
}
serde_json::Value::Object(map) => {
for (k, v) in map {
check(k)?;
scan_json_value(v)?;
}
Ok(())
}
_ => Ok(()),
}
}
fn scan(text: &str) -> Option<SecretMatch> {
if let Some(m) = check_known_patterns(text) {
return Some(m);
}
if let Some(m) = check_entropy_heuristic(text) {
return Some(m);
}
None
}
const PREFIX_DETECTORS: &[(&str, &str, usize)] = &[
("aws-access-key-id", "AKIA", 20),
("aws-access-key-id", "ASIA", 20),
("github-token", "ghp_", 36),
("github-token", "gho_", 36),
("github-token", "github_pat_", 20),
("openai-api-key", "sk-proj-", 40),
("anthropic-api-key", "sk-ant-", 20),
("stripe-secret-key", "sk_live_", 30),
("stripe-restricted-key", "rk_live_", 30),
("fly-token", "fm2_", 20),
("vercel-token", "vercel_", 20),
("slack-token", "xoxb-", 40),
("slack-token", "xoxa-", 40),
("slack-token", "xoxp-", 40),
("slack-token", "xoxr-", 40),
("slack-token", "xoxs-", 40),
("age-secret-key", "AGE-SECRET-KEY-", 60),
];
const SK_SAFE_PREFIXES: &[&str] = &["sk-learn", "sk-image", "sk-lego", "sk-base", "sk-misc"];
fn check_known_patterns(text: &str) -> Option<SecretMatch> {
for &(name, needle, min_len) in PREFIX_DETECTORS {
if let Some(m) = find_prefix_token(text, needle, min_len) {
return Some(build_match(name, m));
}
}
if let Some(token) = find_prefix_token(text, "sk-", 30) {
if !SK_SAFE_PREFIXES.iter().any(|safe| token.starts_with(safe)) {
return Some(build_match("openai-api-key", token));
}
}
if let Some(pos) = text.find("FlyV1 ") {
let at_boundary = pos == 0 || {
text[..pos]
.chars()
.next_back()
.is_none_or(|c| !c.is_alphanumeric())
};
if at_boundary {
let payload_start = pos + 6; let payload = extract_token(&text[payload_start..]);
if payload.len() >= 4 {
let candidate = &text[pos..payload_start + payload.len()];
return Some(build_match("fly-token", candidate));
}
}
}
if text.contains("-----BEGIN") && text.contains("PRIVATE KEY-----") {
if let Some(pos) = text.find("-----BEGIN") {
let block_end = text[pos..]
.find("-----END")
.map(|rel| {
text[pos + rel..]
.find('\n')
.map(|l| pos + rel + l + 1)
.unwrap_or(text.len())
})
.unwrap_or(text.len());
let excerpt = &text[pos..block_end];
return Some(build_match("pem-private-key", excerpt));
}
}
if let Some(m) = find_jwt(text) {
return Some(build_match("jwt", m));
}
if let Some(m) = find_url_userinfo(text) {
return Some(build_match("url-userinfo", m));
}
None
}
fn find_prefix_token<'a>(text: &'a str, needle: &str, min_len: usize) -> Option<&'a str> {
let mut start = 0;
while let Some(rel) = text[start..].find(needle) {
let abs = start + rel;
let at_boundary = abs == 0 || {
let prev = text[..abs].chars().next_back().unwrap_or(' ');
!prev.is_alphanumeric()
};
if at_boundary {
let token = extract_token(&text[abs..]);
if token.len() >= min_len {
return Some(token);
}
}
start = abs + needle.len().max(1);
}
None
}
fn find_jwt(text: &str) -> Option<&str> {
let bytes = text.as_bytes();
let mut i = 0;
while i + 4 < bytes.len() {
if bytes[i..].starts_with(b"eyJ") {
let end = bytes[i..]
.iter()
.position(|&b| b == b' ' || b == b'\n' || b == b'\r' || b == b'\t')
.map(|p| i + p)
.unwrap_or(bytes.len());
let candidate = &text[i..end];
let dots = candidate.as_bytes().iter().filter(|&&b| b == b'.').count();
if dots >= 2 {
let parts: Vec<&str> = candidate.splitn(3, '.').collect();
if parts.len() == 3
&& parts[0].starts_with("eyJ")
&& parts[1].starts_with("eyJ")
&& parts[0].len() >= 10
&& parts[1].len() >= 10
{
return Some(candidate);
}
}
i = end + 1;
} else {
i += 1;
}
}
None
}
fn find_url_userinfo(text: &str) -> Option<&str> {
let mut search = text;
let mut base = 0usize;
while let Some(at_rel) = search.find("://") {
let at_abs = base + at_rel;
let rest_start = at_abs + 3;
let rest = &text[rest_start..];
if let Some(at_pos) = rest.find('@') {
let userinfo = &rest[..at_pos];
if let Some(colon) = userinfo.find(':') {
let user = &userinfo[..colon];
let pass = &userinfo[colon + 1..];
if !user.is_empty() && !pass.is_empty() && pass.len() >= 4 {
let scheme_start = text[..at_abs]
.rfind(|c: char| {
!c.is_ascii_alphanumeric() && c != '+' && c != '-' && c != '.'
})
.map(|p| p + 1)
.unwrap_or(0);
if !userinfo.contains(' ') && !userinfo.contains('\n') {
let end = rest_start
+ at_pos
+ 1
+ rest[at_pos + 1..]
.find([' ', '\n', '\r'])
.unwrap_or(rest[at_pos + 1..].len());
return Some(&text[scheme_start..end.min(text.len())]);
}
}
}
}
base = at_abs + 3;
search = &text[base..];
}
None
}
const TRIGGER_WORDS: &[&str] = &[
"key",
"secret",
"password",
"passwd",
"credential",
"bearer",
"auth",
"apikey",
"api_key",
"access_key",
"private_key",
];
const MIN_ENTROPY_LEN: usize = 24;
const ENTROPY_THRESHOLD: f64 = 4.5;
const TRIGGER_WINDOW: usize = 120;
fn check_entropy_heuristic(text: &str) -> Option<SecretMatch> {
let tokens: Vec<(usize, &str)> = text
.split_ascii_whitespace()
.map(|t| {
let offset = t.as_ptr() as usize - text.as_ptr() as usize;
(offset, t)
})
.collect();
for &(tok_offset, raw_token) in &tokens {
let token = strip_delimiters(raw_token);
if token.len() < MIN_ENTROPY_LEN {
continue;
}
if is_uuid_canonical(token) || is_base64_content_hash(token) {
continue;
}
let window_start = tok_offset.saturating_sub(TRIGGER_WINDOW);
let window_end = (tok_offset + raw_token.len() + TRIGGER_WINDOW).min(text.len());
let window = &text[window_start..window_end];
let low_window = window.to_ascii_lowercase();
let near_trigger = TRIGGER_WORDS.iter().any(|tw| low_window.contains(tw))
|| has_standalone_token(&low_window)
|| has_token_assignment(&low_window);
if !near_trigger && is_pure_hex(token) {
continue;
}
const HEX_CREDENTIAL_LENGTHS: &[usize] = &[32, 40, 64, 128];
if near_trigger && is_pure_hex(token) && HEX_CREDENTIAL_LENGTHS.contains(&token.len()) {
return Some(build_match("hex-credential-token", token));
}
let entropy = shannon_entropy(token.as_bytes());
if entropy < ENTROPY_THRESHOLD {
continue;
}
if near_trigger {
return Some(build_match("high-entropy-token", token));
}
}
None
}
fn has_standalone_token(low_window: &str) -> bool {
let needle = "token";
let mut start = 0;
while let Some(rel) = low_window[start..].find(needle) {
let abs = start + rel;
let before_ok = abs == 0
|| low_window[..abs]
.chars()
.next_back()
.is_none_or(|c| !c.is_alphanumeric() && c != '_');
let after_end = abs + needle.len();
let after_ok = after_end >= low_window.len()
|| low_window[after_end..]
.chars()
.next()
.is_none_or(|c| !c.is_alphanumeric() && c != '_');
if before_ok && after_ok {
return true;
}
start = abs + needle.len().max(1);
}
false
}
fn has_token_assignment(low_window: &str) -> bool {
let needle = "token";
let mut start = 0;
while let Some(rel) = low_window[start..].find(needle) {
let abs = start + rel;
let before_ok = abs == 0
|| low_window[..abs]
.chars()
.next_back()
.is_none_or(|c| !c.is_alphanumeric() && c != '_');
let after_end = abs + needle.len();
let after_char = low_window[after_end..].chars().next();
let after_is_assign = matches!(after_char, Some('=') | Some(':'));
if before_ok && after_is_assign {
return true;
}
start = abs + needle.len().max(1);
}
false
}
fn is_pure_hex(token: &str) -> bool {
let hex_part = token
.strip_prefix("0x")
.or(token.strip_prefix("0X"))
.unwrap_or(token);
hex_part.len() >= 8 && hex_part.len() <= 128 && hex_part.bytes().all(|b| b.is_ascii_hexdigit())
}
fn is_base64_content_hash(token: &str) -> bool {
const VENDOR_PREFIXES: &[&str] = &[
"sk-",
"rk_live_",
"fm2_",
"vercel_",
"xoxb-",
"xoxa-",
"xoxp-",
"xoxr-",
"xoxs-",
"ghp_",
"gho_",
"github_pat_",
"AKIA",
"ASIA",
"AGE-SECRET-KEY-",
"FlyV1",
];
if VENDOR_PREFIXES.iter().any(|p| token.starts_with(p)) {
return false;
}
let body = if let Some(rest) = token.strip_prefix("sha") {
let dash = rest.find('-').unwrap_or(rest.len());
let digits = &rest[..dash];
if !digits.is_empty() && digits.bytes().all(|b| b.is_ascii_digit()) && dash < rest.len() {
&rest[dash + 1..] } else {
return false; }
} else {
return false; };
let stripped = body.trim_end_matches('=');
let pad_removed = body.len() - stripped.len();
if pad_removed > 2 {
return false;
}
let n = stripped.len();
if n != 43 && n != 64 && !(86..=88).contains(&n) {
return false;
}
stripped
.bytes()
.all(|b| b.is_ascii_alphanumeric() || b == b'+' || b == b'/' || b == b'-' || b == b'_')
}
fn is_uuid_canonical(s: &str) -> bool {
let b = s.as_bytes();
if b.len() != 36 {
return false;
}
b[8] == b'-'
&& b[13] == b'-'
&& b[18] == b'-'
&& b[23] == b'-'
&& b[..8].iter().all(|c| c.is_ascii_hexdigit())
&& b[9..13].iter().all(|c| c.is_ascii_hexdigit())
&& b[14..18].iter().all(|c| c.is_ascii_hexdigit())
&& b[19..23].iter().all(|c| c.is_ascii_hexdigit())
&& b[24..].iter().all(|c| c.is_ascii_hexdigit())
}
fn strip_delimiters(s: &str) -> &str {
s.trim_matches(|c| matches!(c, '"' | '\'' | '`' | ':' | '=' | ',' | ';'))
}
fn extract_token(s: &str) -> &str {
let end = s
.find(|c: char| c.is_whitespace() || c == '\n' || c == '\r')
.unwrap_or(s.len());
&s[..end]
}
fn shannon_entropy(bytes: &[u8]) -> f64 {
if bytes.is_empty() {
return 0.0;
}
let mut counts = [0u32; 256];
for &b in bytes {
counts[b as usize] += 1;
}
let len = bytes.len() as f64;
counts
.iter()
.filter(|&&c| c > 0)
.map(|&c| {
let p = c as f64 / len;
-p * p.log2()
})
.sum()
}
fn build_match(detector: &'static str, candidate: &str) -> SecretMatch {
let chars: Vec<char> = candidate.chars().collect();
let preview: String = chars.iter().take(6).collect();
let masked = format!("{}...{}chars", preview, chars.len());
SecretMatch { detector, masked }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn blocks_aws_akia() {
let fake = "AKIAFAKEKEY1234567890";
assert!(scan(fake).is_some(), "AKIA must be caught");
let m = scan(fake).unwrap();
assert_eq!(m.detector, "aws-access-key-id");
assert!(
!m.masked.contains("FAKEKEY1234567890"),
"must not echo the secret: {}",
m.masked
);
}
#[test]
fn blocks_aws_asia() {
let fake = "ASIAFAKEKEY00000000000";
let m = scan(fake);
assert!(m.is_some(), "ASIA must be caught");
assert_eq!(m.unwrap().detector, "aws-access-key-id");
}
#[test]
fn blocks_github_ghp() {
let fake = "ghp_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
assert!(scan(fake).is_some(), "ghp_ must be caught");
}
#[test]
fn blocks_github_gho() {
let fake = "gho_BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB";
assert!(scan(fake).is_some(), "gho_ must be caught");
}
#[test]
fn blocks_github_pat() {
let fake = "github_pat_AAAAAABBBBBBCCCCCC";
assert!(scan(fake).is_some(), "github_pat_ must be caught");
}
#[test]
fn blocks_openai_sk() {
let fake = "sk-aaaaaabbbbbbccccccddddddeeeeeeffffgg";
assert!(scan(fake).is_some(), "sk- must be caught");
}
#[test]
fn blocks_anthropic_sk_ant() {
let fake = "sk-ant-api03-AAAAAAAAAAAAAAA";
assert!(scan(fake).is_some(), "sk-ant- must be caught");
assert_eq!(scan(fake).unwrap().detector, "anthropic-api-key");
}
#[test]
fn blocks_stripe_live() {
let fake = "sk_live_FAKESTRIPE0000000000000"; assert!(scan(fake).is_some(), "sk_live_ must be caught");
assert_eq!(scan(fake).unwrap().detector, "stripe-secret-key");
}
#[test]
fn blocks_stripe_restricted() {
let fake = "rk_live_FAKESTRIPE0000000000000"; assert!(scan(fake).is_some(), "rk_live_ must be caught");
assert_eq!(scan(fake).unwrap().detector, "stripe-restricted-key");
}
#[test]
fn blocks_fly_flyv1() {
let fake = "FlyV1 FAKEFLYTOKEN000000000000000000";
assert!(scan(fake).is_some(), "FlyV1 must be caught");
assert_eq!(scan(fake).unwrap().detector, "fly-token");
}
#[test]
fn blocks_fly_fm2() {
let fake = "fm2_FAKEFLYTOKEN00000000000000000";
assert!(scan(fake).is_some(), "fm2_ must be caught");
assert_eq!(scan(fake).unwrap().detector, "fly-token");
}
#[test]
fn blocks_vercel_token() {
let fake = "vercel_FAKETOKEN00000000000000000";
assert!(scan(fake).is_some(), "vercel_ must be caught");
assert_eq!(scan(fake).unwrap().detector, "vercel-token");
}
#[test]
fn blocks_slack_xoxb() {
let fake = "xoxb-FAKE-SLACKTOKEN-000000000000000000000000";
assert!(scan(fake).is_some(), "xoxb- must be caught");
assert_eq!(scan(fake).unwrap().detector, "slack-token");
}
#[test]
fn blocks_pem_private_key() {
let header = ["-----BEGIN RSA", " PRIVATE KEY-----"].concat(); let fake = format!("{}\nMIIEo\u{2026}\n-----END RSA PRIVATE KEY-----", header);
assert!(scan(&fake).is_some(), "PEM private key must be caught");
assert_eq!(scan(&fake).unwrap().detector, "pem-private-key");
}
#[test]
fn blocks_pem_ec_private_key() {
let header = ["-----BEGIN EC", " PRIVATE KEY-----"].concat(); let fake = format!("{}\nMHQCAQEE\u{2026}\n-----END EC PRIVATE KEY-----", header);
assert!(scan(&fake).is_some(), "EC PEM must be caught");
}
#[test]
fn blocks_age_secret_key() {
let fake = "AGE-SECRET-KEY-1QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ";
assert!(scan(fake).is_some(), "AGE-SECRET-KEY- must be caught");
assert_eq!(scan(fake).unwrap().detector, "age-secret-key");
}
#[test]
fn blocks_jwt_triple() {
let fake = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.FAKE_SIG_XXXXXXXXXXXX"; assert!(scan(fake).is_some(), "JWT triple must be caught");
assert_eq!(scan(fake).unwrap().detector, "jwt");
}
#[test]
fn blocks_url_userinfo() {
let fake = "postgresql://dbuser:S3cr3tP4ss@db.example.com:5432/mydb";
assert!(scan(fake).is_some(), "URL userinfo must be caught");
assert_eq!(scan(fake).unwrap().detector, "url-userinfo");
}
#[test]
fn blocks_high_entropy_near_bearer_word() {
let fake = "Bearer token: Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvM"; assert!(
scan(fake).is_some(),
"high-entropy value near 'bearer' must be caught"
);
assert_eq!(scan(fake).unwrap().detector, "high-entropy-token");
}
#[test]
fn blocks_high_entropy_near_secret_word() {
let fake = "secret=Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvM"; assert!(
scan(fake).is_some(),
"high-entropy value near 'secret' must be caught"
);
}
#[test]
fn error_message_masks_secret() {
let fake = "ghp_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
let m = scan(fake).unwrap();
let masked = &m.masked;
assert!(
!masked.contains("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"),
"mask must not echo the full secret value; got: {masked}"
);
assert!(
masked.starts_with("ghp_AA"),
"mask must show first 6 chars; got: {masked}"
);
}
#[test]
fn allows_sha256_hex() {
let sha = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
assert!(
scan(sha).is_none(),
"sha256 hex must pass (allowlisted); fired: {:?}",
scan(sha)
);
}
#[test]
fn allows_uuid() {
let uuid = "550e8400-e29b-41d4-a716-446655440000";
assert!(
scan(uuid).is_none(),
"UUID must pass; fired: {:?}",
scan(uuid)
);
}
#[test]
fn allows_git_sha() {
let sha = "d362950a3c9b1a4cb47d97f1623e38f1a1e6bcdf";
assert!(
scan(sha).is_none(),
"git SHA must pass; fired: {:?}",
scan(sha)
);
}
#[test]
fn allows_normal_prose() {
let prose =
"The FlashAttention paper introduces IO-aware tiling for transformer self-attention.";
assert!(scan(prose).is_none(), "normal prose must pass");
}
#[test]
fn allows_code_snippet() {
let code = r#"fn create_entity(name: &str, kind: &str) -> RuntimeResult<Entity> {
self.validate_entity_kind(kind)?;
Ok(Entity::new("local", kind, name))
}"#;
assert!(
scan(code).is_none(),
"code snippet must pass; fired: {:?}",
scan(code)
);
}
#[test]
fn allows_long_url_without_credentials() {
let url = "https://docs.example.com/api/v2/entities?kind=concept&limit=100";
assert!(scan(url).is_none(), "URL without userinfo must pass");
}
#[test]
fn allows_base64_image_stub() {
let b64 = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAAC0lEQVQI12NgAAIABQ";
assert!(
scan(b64).is_none(),
"base64 image stub without trigger word must pass; fired: {:?}",
scan(b64)
);
}
#[test]
fn allows_long_plain_url() {
let url = "https://api.github.com/repos/ohdearquant/khive/pulls/76/comments?per_page=100";
assert!(
scan(url).is_none(),
"plain URL must pass; fired: {:?}",
scan(url)
);
}
#[test]
fn allows_manifest_content_hash() {
let line =
"checksum = \"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\"";
assert!(
scan(line).is_none(),
"manifest content hash line must pass; fired: {:?}",
scan(line)
);
}
#[test]
fn masked_excerpt_format() {
let fake = "AKIAFAKEKEY1234567890";
let m = scan(fake).unwrap();
assert!(m.masked.contains("..."), "masked must contain '...'");
assert!(m.masked.ends_with("chars"), "masked must end with 'chars'");
}
#[test]
fn check_returns_ok_for_safe_content() {
assert!(check("A normal memory note about LoRA.").is_ok());
}
#[test]
fn check_returns_err_for_secret() {
let fake = "AKIAFAKEKEY1234567890";
let result = check(fake);
assert!(result.is_err(), "check must fail for AKIA key");
let err = result.unwrap_err();
assert!(
matches!(err, RuntimeError::SecretDetected(_)),
"error variant must be SecretDetected"
);
}
#[test]
fn entropy_of_uniform_string_is_zero() {
let s = "aaaaaaaaaaaaaaaa";
assert!(shannon_entropy(s.as_bytes()) < 0.01);
}
#[test]
fn entropy_of_random_bytes_is_high() {
let s = b"X9kZ2vQpLrT8nJwYuAeHfBsDcGiONvM1"; assert!(shannon_entropy(s) > 4.5, "entropy={}", shannon_entropy(s));
}
#[test]
fn allowlist_passes_sha256() {
let sha = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
assert!(is_pure_hex(sha));
}
#[test]
fn allowlist_passes_uuid_canonical() {
assert!(is_uuid_canonical("550e8400-e29b-41d4-a716-446655440000"));
}
#[test]
fn allowlist_does_not_pass_mixed_token() {
assert!(!is_pure_hex("sk-aaaaaabbbbbbccccccddddddeeeeeeffffgg"));
}
#[test]
fn check_json_blocks_secret_in_object_value() {
let props = serde_json::json!({ "api_key": "AKIAFAKEKEY1234567890" });
assert!(
check_json(&props).is_err(),
"secret in properties object value must be blocked"
);
}
#[test]
fn check_json_blocks_secret_in_nested_object() {
let props = serde_json::json!({ "credentials": { "token": "sk-proj-FAKEKEY00000000000000000000000000000000" } }); assert!(
check_json(&props).is_err(),
"secret in nested properties object must be blocked"
);
}
#[test]
fn check_json_blocks_secret_in_array() {
let props = serde_json::json!(["normal", "AKIAFAKEKEY1234567890"]);
assert!(
check_json(&props).is_err(),
"secret in JSON array must be blocked"
);
}
#[test]
fn check_json_passes_safe_properties() {
let props = serde_json::json!({
"domain": "attention",
"status": "researched",
"year": 2024
});
assert!(
check_json(&props).is_ok(),
"normal properties must pass; fired: {:?}",
check_json(&props).err()
);
}
#[test]
fn check_tags_blocks_credential_tag() {
let tags = vec![
"type:concept".to_string(),
"AKIAFAKEKEY1234567890".to_string(),
];
assert!(
check_tags(&tags).is_err(),
"credential-shaped tag must be blocked"
);
}
#[test]
fn check_tags_passes_normal_tags() {
let tags = vec!["type:concept".to_string(), "domain:attention".to_string()];
assert!(
check_tags(&tags).is_ok(),
"normal tags must pass; fired: {:?}",
check_tags(&tags).err()
);
}
#[test]
fn allows_sk_learn_prose() {
let texts = &[
"sk-learn is a Python machine learning library",
"sk-learn-compatible transformer pipeline reference",
"sk-learn scikit-learn estimator interface",
];
for t in texts {
assert!(
scan(t).is_none(),
"sk-learn prose must pass; fired: {:?} on {:?}",
scan(t),
t
);
}
}
#[test]
fn blocks_openai_sk_proj_not_confused_with_sk_learn() {
let fake = "sk-proj-FAKEKEY00000000000000000000000000000000"; assert!(
scan(fake).is_some(),
"sk-proj- key must still be caught after sk-learn exemption"
);
}
#[test]
fn allows_sri_hash() {
let line = "integrity key: sha384-oqVuAfXRKap7fdgcCY5uykM6+R9GqQ8K/uxy9rx7HNQlGYl1kPzQho1wx4JwY8wC";
assert!(
scan(line).is_none(),
"SRI hash must pass; fired: {:?}",
scan(line)
);
}
#[test]
fn allows_base64_tokenizer_hash_metadata() {
let line = "tokenizer_vocab_hash: Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvM"; assert!(
scan(line).is_none(),
"tokenizer hash metadata must pass; fired: {:?}",
scan(line)
);
}
#[test]
fn allows_npm_lockfile_integrity() {
let body_86 = "Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvM1234567890abcdefghijklmnopqrstuvwxABCDEFGHIJKLMNOPQRST";
assert_eq!(body_86.len(), 86, "test body must be exactly 86 chars");
let line = format!(
"resolved: https://registry.npmjs.org/foo/-/foo-1.0.0.tgz\nintegrity: sha512-{body_86}=="
);
assert!(
scan(&line).is_none(),
"npm lockfile integrity must pass; fired: {:?}",
scan(&line)
);
}
#[test]
fn allows_tokenizer_vocab_hash_no_block() {
let line = "tokenizer_vocab_hash = Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvM"; assert!(
scan(line).is_none(),
"tokenizer_vocab_hash must pass; 'token' is only standalone-word matched; fired: {:?}",
scan(line)
);
}
#[test]
fn blocks_bare_base64url_43chars_near_key() {
let token_43 = "wJalrXUtnFEMI-K7MDENGbPxRfiCYEXAMPLEKEYX123"; assert_eq!(token_43.len(), 43, "test token must be exactly 43 chars");
let line = format!("api key {token_43}");
assert!(
scan(&line).is_some(),
"43-char base64url token near 'key' must be caught (no sha-prefix = not a hash); fired: {:?}",
scan(&line)
);
}
#[test]
fn blocks_bare_base64url_64chars_near_secret() {
let token_64 = "wJalrXUtnFEMI-K7MDENGbPxRfiCYEXAMPLEKEYX123wJalrXUtnFEMI-K7MDENa"; assert_eq!(token_64.len(), 64, "test token must be exactly 64 chars");
let line = format!("secret: {token_64}");
assert!(
scan(&line).is_some(),
"64-char base64url token near 'secret' must be caught; got: {:?}",
scan(&line)
);
}
#[test]
fn blocks_bare_base64url_86chars_near_auth() {
let token_86 = "wJalrXUtnFEMI-K7MDENGbPxRfiCYEXAMPLEKEYX123wJalrXUtnFEMI-K7MDENwJalrXUtnFEMI-K7MDENabc"; assert_eq!(token_86.len(), 86, "test token must be exactly 86 chars");
let line = format!("auth header {token_86}");
assert!(
scan(&line).is_some(),
"86-char base64url token near 'auth' must be caught; got: {:?}",
scan(&line)
);
}
#[test]
fn blocks_service_token_opaque_value() {
let opaque = "Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvMabcdef"; assert!(
opaque.len() >= 24,
"opaque must be long enough for entropy check"
);
let line = format!("service token {opaque}");
assert!(
scan(&line).is_some(),
"service token <opaque> must be caught by standalone 'token' check; got: {:?}",
scan(&line)
);
}
#[test]
fn blocks_token_equals_credential() {
let opaque = "Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvMabcdef"; let line = format!("token={opaque}");
assert!(
scan(&line).is_some(),
"token=<value> must be caught via token= trigger; got: {:?}",
scan(&line)
);
}
#[test]
fn blocks_token_colon_credential() {
let opaque = "Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvMabcdef"; let line = format!("token: {opaque}");
assert!(
scan(&line).is_some(),
"token: <value> must be caught via token: trigger; got: {:?}",
scan(&line)
);
}
#[test]
fn allows_next_token_technical_context() {
let line = "next_token: cursor-page-2-abcdef12345678";
assert!(
scan(line).is_none(),
"next_token technical context must not be blocked; fired: {:?}",
scan(line)
);
}
#[test]
fn allows_next_token_high_entropy_cursor() {
let cursor = "Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvMabcdef"; let line = format!("next_token: {cursor}");
assert!(
scan(&line).is_none(),
"next_token with high-entropy cursor must pass (compound identifier); fired: {:?}",
scan(&line)
);
}
#[test]
fn allows_token_count_high_entropy() {
let opaque = "Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvMabcdef"; let line = format!("token_count: {opaque}");
assert!(
scan(&line).is_none(),
"token_count with high-entropy value must pass; fired: {:?}",
scan(&line)
);
}
#[test]
fn hex_near_key_blocked_in_credential_context() {
let hex32 = "4f9c2e8a1d3b5c7e9f0a2b4d6e8c0a2b";
assert_eq!(hex32.len(), 32);
let line = format!("api key {hex32}");
assert!(
scan(&line).is_some(),
"32-char pure hex near 'api key' must be blocked; got None"
);
}
#[test]
fn hex_credential_lengths_blocked_near_trigger() {
let hex40 = "a3f5c2e9d1b8047e63a1f4c2d5b6e8f1a9c3d2e4";
let hex64 = "1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b";
let hex128 = format!("{hex64}{hex64}");
assert_eq!(hex40.len(), 40);
assert_eq!(hex64.len(), 64);
assert_eq!(hex128.len(), 128);
for (label, hex) in &[
("hex40", hex40),
("hex64", hex64),
("hex128", hex128.as_str()),
] {
let line = format!("secret key: {hex}");
assert!(
scan(&line).is_some(),
"{label} near 'secret key' must be blocked; got None"
);
}
}
#[test]
fn hex_blocked_when_trigger_and_hash_word_coexist() {
let hex32 = "4f9c2e8a1d3b5c7e9f0a2b4d6e8c0a2b";
let key_hash_line = format!("api key hash {hex32}");
let secret_sha_line = format!("secret sha {hex32}");
assert!(
scan(&key_hash_line).is_some(),
"'api key hash <hex32>' must be blocked; got None"
);
assert!(
scan(&secret_sha_line).is_some(),
"'secret sha <hex32>' must be blocked; got None"
);
}
#[test]
fn hex_near_sha_context_word_allowed() {
let hex40 = "da39a3ee5e6b4b0d3255bfef95601890afd80709";
let sha_line = format!("sha1: {hex40}");
let commit_line = format!("commit sha {hex40}");
assert!(
scan(&sha_line).is_none(),
"hex40 near 'sha1' context must be allowed; fired: {:?}",
scan(&sha_line)
);
assert!(
scan(&commit_line).is_none(),
"hex40 near 'commit sha' context must be allowed; fired: {:?}",
scan(&commit_line)
);
}
#[test]
fn hex64_near_hash_context_allowed() {
let hex64 = "1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b";
let sha_line = format!("sha256: {hex64}");
let hash_line = format!("hash value {hex64}");
assert!(
scan(&sha_line).is_none(),
"hex64 near 'sha256' must be allowed; fired: {:?}",
scan(&sha_line)
);
assert!(
scan(&hash_line).is_none(),
"hex64 near 'hash' must be allowed; fired: {:?}",
scan(&hash_line)
);
}
#[test]
fn blocks_high_entropy_hex_like_token_near_key() {
let mixed = "Xk9mZ2vQpLrT8nJwYuAeHfBsDcGiONvM"; assert!(!is_pure_hex(mixed), "test token must not be pure hex");
let line = format!("api key {mixed}");
assert!(
scan(&line).is_some(),
"mixed-charset high-entropy token near 'api key' must be caught; got: {:?}",
scan(&line)
);
}
#[test]
fn allows_hex40_without_trigger() {
let hex40 = "da39a3ee5e6b4b0d3255bfef95601890afd80709";
let line = format!("commit: {hex40}");
assert!(
scan(&line).is_none(),
"40-char hex without trigger word must pass; fired: {:?}",
scan(&line)
);
}
#[test]
fn check_json_blocks_secret_in_object_key() {
let props = serde_json::json!({ "ghp_FakeGitHubToken0000000000000000000": "redacted" }); assert!(
check_json(&props).is_err(),
"credential as JSON object key must be blocked"
);
}
#[test]
fn check_json_blocks_nested_secret_key() {
let props = serde_json::json!({
"metadata": {
"AKIAFAKEKEY000000000": "value" }
});
assert!(
check_json(&props).is_err(),
"nested credential as JSON object key must be blocked"
);
}
#[test]
fn pem_masked_excerpt_reflects_block_length_not_rest_of_string() {
let header = ["-----BEGIN RSA", " PRIVATE KEY-----"].concat(); let fake = format!(
"{}\nMIIEo\u{2026}\n-----END RSA PRIVATE KEY-----\nsome trailing text that is very long",
header
);
let m = scan(&fake).unwrap();
assert_eq!(m.detector, "pem-private-key");
let full_len = fake.chars().count();
let reported_len: usize = m
.masked
.trim_end_matches("chars")
.rsplit("...")
.next()
.and_then(|s| s.parse().ok())
.unwrap_or(full_len + 1);
assert!(
reported_len < full_len,
"masked length ({reported_len}) should be less than full string length ({full_len})"
);
}
}