pub(crate) mod keywords;
mod scanner;
#[cfg(test)]
mod entropy_tests;
pub use scanner::{find_entropy_secrets, find_entropy_secrets_with_threshold, is_sensitive_file};
pub const LOW_ENTROPY_THRESHOLD: f64 = 3.0;
pub const HIGH_ENTROPY_THRESHOLD: f64 = 4.5;
pub const VERY_HIGH_ENTROPY_THRESHOLD: f64 = 5.8;
pub const SENSITIVE_FILE_VERY_HIGH_ENTROPY_THRESHOLD: f64 = 5.5;
pub fn shannon_entropy(data: &[u8]) -> f64 {
use std::cell::RefCell;
use std::collections::HashMap;
const MAX_CACHE_ENTRIES: usize = 4096;
thread_local! {
static CACHE: RefCell<HashMap<u64, f64>> = RefCell::new(HashMap::with_capacity(256));
}
let mut hash: u64 = 0xcbf29ce484222325;
for &byte in data {
hash ^= u64::from(byte);
hash = hash.wrapping_mul(0x100000001b3);
}
CACHE.with(|cache| {
let mut cache = cache.borrow_mut();
if let Some(&cached) = cache.get(&hash) {
return cached;
}
let entropy = shannon_entropy_uncached(data);
if cache.len() >= MAX_CACHE_ENTRIES {
cache.clear(); }
cache.insert(hash, entropy);
entropy
})
}
fn shannon_entropy_uncached(data: &[u8]) -> f64 {
crate::entropy_fast::shannon_entropy_simd(data)
}
pub fn normalized_entropy(data: &[u8]) -> f64 {
if data.is_empty() {
return 0.0;
}
let unique_chars = {
let mut seen = [false; 256];
for &byte in data {
seen[byte as usize] = true;
}
seen.iter().filter(|&&value| value).count()
};
if unique_chars <= 1 {
return 0.0;
}
let max_entropy = (unique_chars as f64).log2();
if max_entropy == 0.0 {
return 0.0;
}
shannon_entropy(data) / max_entropy
}
#[derive(Debug, Clone)]
pub struct EntropyMatch {
pub value: String,
pub entropy: f64,
pub keyword: String,
pub line: usize,
pub offset: usize,
}
pub fn is_entropy_appropriate(path: Option<&str>, allow_source_files: bool) -> bool {
let Some(path) = path else { return true };
let lower = path.to_lowercase();
for extension in [".json", ".lock", ".map"] {
if lower.ends_with(extension) {
return false;
}
}
if lower.ends_with(".min.js") || lower.ends_with(".min.css") {
return false;
}
if allow_source_files {
return true;
}
for extension in [
".env",
".yaml",
".yml",
".toml",
".properties",
".cfg",
".conf",
".ini",
".config",
".secrets",
".pem",
".key",
".tfvars",
".hcl",
] {
if lower.ends_with(extension) {
return true;
}
}
let filename = lower.rsplit(['/', '\\']).next().unwrap_or(&lower);
for name in [
".env",
"credentials",
"secrets",
"apikeys",
"docker-compose",
".npmrc",
".pypirc",
".netrc",
] {
if filename.starts_with(name) || filename == name {
return true;
}
}
false
}