#[cfg(feature = "simd")]
pub(super) fn has_secret_keyword_fast(data: &[u8]) -> bool {
use aho_corasick::AhoCorasick;
use std::sync::LazyLock;
static AC: LazyLock<Option<AhoCorasick>> = LazyLock::new(|| {
AhoCorasick::new([
"sk-proj-",
"sk-svcacct-",
"sk-admin-",
"sk_live_",
"sk_test_",
"rk_live_",
"pk_live_",
"ghp_",
"ghs_",
"gho_",
"ghu_",
"ghr_",
"github_pat_",
"xoxb-",
"xoxp-",
"xoxa-",
"xoxr-",
"xoxs-",
"xapp-",
"sk-ant-",
"hf_",
".iam.gserviceaccount.com",
"glpat-",
"npm_",
"HRKU-",
])
.ok()
});
AC.as_ref().is_none_or(|ac| ac.find(data).is_some())
}
#[cfg(feature = "simd")]
pub(super) fn has_generic_assignment_keyword(data: &[u8]) -> bool {
use aho_corasick::AhoCorasick;
use std::sync::LazyLock;
static AC: LazyLock<Option<AhoCorasick>> = LazyLock::new(|| {
AhoCorasick::builder()
.ascii_case_insensitive(true)
.build([
"secret",
"password",
"passwd",
"token",
"api_key",
"apikey",
"auth_token",
"private_key",
"client_secret",
"access_key",
])
.ok()
});
AC.as_ref().is_none_or(|ac| ac.find(data).is_some())
}
#[cfg(feature = "simd")]
pub(super) fn has_high_entropy_run_fast(data: &[u8]) -> bool {
const MIN_ENTROPY_RUN: usize = 32;
let mut run = 0usize;
for &b in data {
if b.is_ascii_alphanumeric() {
run += 1;
if run >= MIN_ENTROPY_RUN {
return true;
}
} else {
run = 0;
}
}
false
}
pub(super) fn generic_entropy_floor(detector_id: &str, credential_len: usize) -> f64 {
match detector_id {
"generic-api-key" if credential_len <= 40 => 2.8,
"generic-api-key" if credential_len <= 24 => 3.0,
"generic-api-key" => 3.5,
"generic-password" => 2.5,
"generic-database-url" => 2.0,
_ => 3.5,
}
}
pub(super) fn looks_like_variable_name(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.is_empty() || bytes.len() > 64 {
return false;
}
bytes
.iter()
.all(|&b| b.is_ascii_alphanumeric() || b == b'_')
}
pub(super) fn extend_known_prefix_credential<'a>(
data: &'a str,
credential: &'a str,
match_start: usize,
match_end: usize,
) -> (&'a str, usize) {
let (credential, match_end) =
if crate::confidence::known_prefix_confidence_floor(credential).is_some() {
let bytes = data.as_bytes();
let mut end = match_end;
while end < bytes.len() && is_provider_token_byte(bytes[end]) {
end += 1;
}
if end == match_end || !data.is_char_boundary(end) {
(credential, match_end)
} else {
(&data[match_start..end], end)
}
} else {
(credential, match_end)
};
extend_base64_padding(data, match_start, credential, match_end)
}
fn extend_base64_padding<'a>(
data: &'a str,
match_start: usize,
credential: &'a str,
match_end: usize,
) -> (&'a str, usize) {
if !credential
.chars()
.all(|c| c.is_ascii_alphanumeric() || matches!(c, '+' | '/' | '-' | '_' | '='))
{
return (credential, match_end);
}
let bytes = data.as_bytes();
let mut end = match_end;
let mut pad = 0u8;
while end < bytes.len() && bytes[end] == b'=' && pad < 2 {
end += 1;
pad += 1;
}
if pad > 0 && data.is_char_boundary(end) {
(&data[match_start..end], end)
} else {
(credential, match_end)
}
}
fn is_provider_token_byte(byte: u8) -> bool {
byte.is_ascii_alphanumeric() || matches!(byte, b'_' | b'-' | b'.')
}