pub mod keywords;
mod scanner;
pub use scanner::{find_entropy_secrets, find_entropy_secrets_with_threshold, is_sensitive_file};
pub const LOW_ENTROPY_THRESHOLD: f64 = 3.0;
pub const HIGH_ENTROPY_THRESHOLD: f64 = 4.5;
pub const VERY_HIGH_ENTROPY_THRESHOLD: f64 = 5.8;
pub const SENSITIVE_FILE_VERY_HIGH_ENTROPY_THRESHOLD: f64 = 5.5;
pub fn shannon_entropy(data: &[u8]) -> f64 {
if data.len() > 1024 {
return shannon_entropy_uncached(data);
}
use std::cell::RefCell;
use std::collections::HashMap;
const MAX_CACHE_ENTRIES: usize = 4096;
thread_local! {
static CACHE: RefCell<HashMap<u64, f64>> = RefCell::new(HashMap::with_capacity(256));
}
let mut hash: u64 = 0xcbf29ce484222325;
for &byte in data {
hash ^= u64::from(byte);
hash = hash.wrapping_mul(0x100000001b3);
}
CACHE.with(|cache| {
let mut cache = cache.borrow_mut();
if let Some(&cached) = cache.get(&hash) {
return cached;
}
let entropy = shannon_entropy_uncached(data);
if cache.len() >= MAX_CACHE_ENTRIES {
cache.clear(); }
cache.insert(hash, entropy);
entropy
})
}
fn shannon_entropy_uncached(data: &[u8]) -> f64 {
crate::entropy_fast::shannon_entropy_simd(data)
}
pub fn normalized_entropy(data: &[u8]) -> f64 {
if data.is_empty() {
return 0.0;
}
let unique_chars = {
let mut seen = [false; 256];
for &byte in data {
seen[byte as usize] = true;
}
seen.iter().filter(|&&value| value).count()
};
if unique_chars <= 1 {
return 0.0;
}
let max_entropy = (unique_chars as f64).log2();
if max_entropy == 0.0 {
return 0.0;
}
shannon_entropy(data) / max_entropy
}
#[derive(Debug, Clone)]
pub struct EntropyMatch {
pub value: String,
pub entropy: f64,
pub keyword: String,
pub line: usize,
pub offset: usize,
}
pub fn is_entropy_appropriate(path: Option<&str>, allow_source_files: bool) -> bool {
let Some(path) = path else { return true };
let bytes = path.as_bytes();
let ends_ci = |suffix: &[u8]| -> bool {
bytes.len() >= suffix.len()
&& bytes[bytes.len() - suffix.len()..].eq_ignore_ascii_case(suffix)
};
for extension in [b".json".as_slice(), b".lock", b".map"] {
if ends_ci(extension) {
return false;
}
}
if ends_ci(b".min.js") || ends_ci(b".min.css") {
return false;
}
if allow_source_files {
return true;
}
let last_sep = bytes
.iter()
.rposition(|&b| b == b'/' || b == b'\\')
.map(|i| i + 1)
.unwrap_or(0);
let filename = &bytes[last_sep..];
for stem in [
b"Cargo.toml".as_slice(),
b"package.json",
b"pyproject.toml",
b"composer.json",
b"Pipfile",
b"Gemfile",
b"pom.xml",
b"build.gradle",
b"build.gradle.kts",
b"build.sbt",
b"mix.exs",
] {
if filename.eq_ignore_ascii_case(stem) {
return false;
}
}
for extension in [
b".env".as_slice(),
b".yaml",
b".yml",
b".toml",
b".properties",
b".cfg",
b".conf",
b".ini",
b".config",
b".secrets",
b".pem",
b".key",
b".tfvars",
b".hcl",
] {
if ends_ci(extension) {
return true;
}
}
const PREFIX_MATCH_NAMES: &[&[u8]] = &[b".env", b".npmrc", b".pypirc", b".netrc"];
for name in PREFIX_MATCH_NAMES {
let starts_ci =
filename.len() >= name.len() && filename[..name.len()].eq_ignore_ascii_case(name);
if starts_ci {
return true;
}
}
const EXACT_OR_CONFIG_EXT_NAMES: &[&[u8]] =
&[b"credentials", b"secrets", b"apikeys", b"docker-compose"];
const CONFIG_EXTENSIONS_AFTER_STEM: &[&[u8]] = &[
b".env",
b".yaml",
b".yml",
b".toml",
b".properties",
b".cfg",
b".conf",
b".ini",
b".config",
b".secrets",
b".pem",
b".key",
b".tfvars",
b".hcl",
b".enc",
b".vault",
b".prod",
b".txt",
];
for name in EXACT_OR_CONFIG_EXT_NAMES {
if filename.eq_ignore_ascii_case(name) {
return true;
}
if filename.len() > name.len() && filename[..name.len()].eq_ignore_ascii_case(name) {
let tail = &filename[name.len()..];
for ext in CONFIG_EXTENSIONS_AFTER_STEM {
if tail.len() >= ext.len()
&& tail[tail.len() - ext.len()..].eq_ignore_ascii_case(ext)
{
return true;
}
}
}
}
false
}