keyhog_scanner/entropy/
mod.rs1pub(crate) mod keywords;
7mod scanner;
8
9#[cfg(test)]
10mod entropy_tests;
11
12pub use scanner::{find_entropy_secrets, find_entropy_secrets_with_threshold, is_sensitive_file};
13
14pub const LOW_ENTROPY_THRESHOLD: f64 = 3.0;
16pub const HIGH_ENTROPY_THRESHOLD: f64 = 4.5;
17pub const VERY_HIGH_ENTROPY_THRESHOLD: f64 = 5.8;
19pub const SENSITIVE_FILE_VERY_HIGH_ENTROPY_THRESHOLD: f64 = 5.5;
21
22pub fn shannon_entropy(data: &[u8]) -> f64 {
30 use std::cell::RefCell;
31 use std::collections::HashMap;
32
33 const MAX_CACHE_ENTRIES: usize = 4096;
34
35 thread_local! {
36 static CACHE: RefCell<HashMap<u64, f64>> = RefCell::new(HashMap::with_capacity(256));
37 }
38
39 let mut hash: u64 = 0xcbf29ce484222325;
41 for &byte in data {
42 hash ^= u64::from(byte);
43 hash = hash.wrapping_mul(0x100000001b3);
44 }
45
46 CACHE.with(|cache| {
47 let mut cache = cache.borrow_mut();
48 if let Some(&cached) = cache.get(&hash) {
49 return cached;
50 }
51 let entropy = shannon_entropy_uncached(data);
52 if cache.len() >= MAX_CACHE_ENTRIES {
53 cache.clear(); }
55 cache.insert(hash, entropy);
56 entropy
57 })
58}
59
60fn shannon_entropy_uncached(data: &[u8]) -> f64 {
61 crate::entropy_fast::shannon_entropy_simd(data)
62}
63
64pub fn normalized_entropy(data: &[u8]) -> f64 {
66 if data.is_empty() {
67 return 0.0;
68 }
69
70 let unique_chars = {
71 let mut seen = [false; 256];
72 for &byte in data {
73 seen[byte as usize] = true;
74 }
75 seen.iter().filter(|&&value| value).count()
76 };
77
78 if unique_chars <= 1 {
79 return 0.0;
80 }
81
82 let max_entropy = (unique_chars as f64).log2();
83 if max_entropy == 0.0 {
84 return 0.0;
85 }
86
87 shannon_entropy(data) / max_entropy
88}
89
90#[derive(Debug, Clone)]
92pub struct EntropyMatch {
93 pub value: String,
95 pub entropy: f64,
97 pub keyword: String,
99 pub line: usize,
101 pub offset: usize,
103}
104
105pub fn is_entropy_appropriate(path: Option<&str>, allow_source_files: bool) -> bool {
107 let Some(path) = path else { return true };
108 let lower = path.to_lowercase();
109
110 for extension in [".json", ".lock", ".map"] {
111 if lower.ends_with(extension) {
112 return false;
113 }
114 }
115 if lower.ends_with(".min.js") || lower.ends_with(".min.css") {
116 return false;
117 }
118 if allow_source_files {
119 return true;
120 }
121
122 for extension in [
123 ".env",
124 ".yaml",
125 ".yml",
126 ".toml",
127 ".properties",
128 ".cfg",
129 ".conf",
130 ".ini",
131 ".config",
132 ".secrets",
133 ".pem",
134 ".key",
135 ".tfvars",
136 ".hcl",
137 ] {
138 if lower.ends_with(extension) {
139 return true;
140 }
141 }
142
143 let filename = lower.rsplit(['/', '\\']).next().unwrap_or(&lower);
144 for name in [
145 ".env",
146 "credentials",
147 "secrets",
148 "apikeys",
149 "docker-compose",
150 ".npmrc",
151 ".pypirc",
152 ".netrc",
153 ] {
154 if filename.starts_with(name) || filename == name {
155 return true;
156 }
157 }
158 false
159}