use std::collections::HashMap;
pub struct EntropyAnalyzer;
impl EntropyAnalyzer {
pub fn calculate(text: &str) -> f64 {
if text.is_empty() {
return 0.0;
}
let mut frequency: HashMap<char, usize> = HashMap::new();
let mut char_count: usize = 0;
for ch in text.chars() {
*frequency.entry(ch).or_insert(0) += 1;
char_count += 1;
}
let length = char_count as f64;
let mut entropy = 0.0;
for &count in frequency.values() {
let probability = count as f64 / length;
entropy -= probability * probability.log2();
}
entropy
}
pub fn is_high_entropy(text: &str, threshold: f64) -> bool {
Self::calculate(text) >= threshold
}
pub fn analyze_detailed(text: &str) -> EntropyAnalysis {
let base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
let hex_chars = "0123456789abcdefABCDEF";
let total_entropy = Self::calculate(text);
let is_base64_like = text
.chars()
.all(|c| base64_chars.contains(c) || c.is_whitespace());
let is_hex_like = text.chars().all(|c| hex_chars.contains(c));
EntropyAnalysis {
entropy: total_entropy,
is_high_entropy: total_entropy >= 4.0,
is_base64_like,
is_hex_like,
length: text.len(),
}
}
pub fn base64_entropy(text: &str) -> f64 {
let base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
let filtered: String = text.chars().filter(|c| base64_chars.contains(*c)).collect();
Self::calculate(&filtered)
}
pub fn hex_entropy(text: &str) -> f64 {
let hex_chars = "0123456789abcdefABCDEF";
let filtered: String = text.chars().filter(|c| hex_chars.contains(*c)).collect();
Self::calculate(&filtered)
}
}
#[derive(Debug, Clone)]
pub struct EntropyAnalysis {
pub entropy: f64,
pub is_high_entropy: bool,
pub is_base64_like: bool,
pub is_hex_like: bool,
pub length: usize,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_entropy_low() {
let text = "aaaaaaaaaa";
let entropy = EntropyAnalyzer::calculate(text);
assert!(entropy < 1.0);
}
#[test]
fn test_entropy_high() {
let text = "aB3$xY9#mK2@";
let entropy = EntropyAnalyzer::calculate(text);
assert!(entropy > 3.0);
}
#[test]
fn test_entropy_medium() {
let text = "password123";
let entropy = EntropyAnalyzer::calculate(text);
assert!(entropy > 2.0 && entropy < 4.0);
}
#[test]
fn test_entropy_empty() {
assert_eq!(EntropyAnalyzer::calculate(""), 0.0);
}
#[test]
fn test_entropy_single_char() {
let entropy = EntropyAnalyzer::calculate("a");
assert_eq!(entropy, 0.0);
}
#[test]
fn test_entropy_unicode_chars() {
let text = "héllo wörld ñ";
let entropy = EntropyAnalyzer::calculate(text);
assert!(entropy > 0.0, "Unicode entropy should be positive");
}
#[test]
fn test_entropy_realistic_api_key() {
let key = "sk_live_51H4gJkLmNoPqRsTuVwXyZ0123456789AbCdEfGh";
let entropy = EntropyAnalyzer::calculate(key);
assert!(
entropy > 4.0,
"Realistic API key should have high entropy, got: {}",
entropy
);
}
#[test]
fn test_is_high_entropy() {
assert!(EntropyAnalyzer::is_high_entropy("Xy9#mK2@qL5&", 3.5));
assert!(EntropyAnalyzer::is_high_entropy(
"aB3$xY9#mK2@qL5&pN7!",
4.0
));
assert!(!EntropyAnalyzer::is_high_entropy("password", 4.0));
}
#[test]
fn test_base64_detection() {
let analysis = EntropyAnalyzer::analyze_detailed("SGVsbG8gV29ybGQ=");
assert!(analysis.is_base64_like);
}
#[test]
fn test_hex_detection() {
let analysis = EntropyAnalyzer::analyze_detailed("deadbeef1234567890abcdef");
assert!(analysis.is_hex_like);
}
#[test]
fn test_not_base64() {
let analysis = EntropyAnalyzer::analyze_detailed("hello world $pecial!");
assert!(!analysis.is_base64_like);
}
#[test]
fn test_detailed_analysis_length() {
let text = "abc123def456";
let analysis = EntropyAnalyzer::analyze_detailed(text);
assert_eq!(analysis.length, 12);
}
}