use crate::{
types::{EntropyAnalysis, EntropyRegion, ObfuscationLevel, PackingIndicators},
BinaryFile, Result,
};
use std::collections::HashMap;
pub fn analyze_binary(binary: &BinaryFile) -> Result<EntropyAnalysis> {
let data = binary.data();
let overall_entropy = calculate_entropy(data);
let mut section_entropy = HashMap::new();
for section in binary.sections() {
let start = section.offset as usize;
let end = (section.offset + section.size) as usize;
if start < data.len() && end <= data.len() && start < end {
let section_data = &data[start..end];
let entropy = calculate_entropy(section_data);
section_entropy.insert(section.name.clone(), entropy);
}
}
let high_entropy_regions = find_high_entropy_regions(data)?;
let packing_indicators = analyze_packing_indicators(data, §ion_entropy);
Ok(EntropyAnalysis {
overall_entropy,
section_entropy,
high_entropy_regions,
packing_indicators,
})
}
fn calculate_entropy(data: &[u8]) -> f64 {
if data.is_empty() {
return 0.0;
}
let mut freq = [0u32; 256];
for &byte in data {
freq[byte as usize] += 1;
}
let len = data.len() as f64;
let mut entropy = 0.0;
for &count in &freq {
if count > 0 {
let p = count as f64 / len;
entropy -= p * p.log2();
}
}
entropy
}
fn find_high_entropy_regions(data: &[u8]) -> Result<Vec<EntropyRegion>> {
let mut regions = Vec::new();
let chunk_size = 1024; let high_entropy_threshold = 7.5;
for (i, chunk) in data.chunks(chunk_size).enumerate() {
let entropy = calculate_entropy(chunk);
if entropy > high_entropy_threshold {
let start = i * chunk_size;
let end = std::cmp::min(start + chunk_size, data.len());
let description = classify_high_entropy_region(chunk, entropy);
regions.push(EntropyRegion {
start: start as u64,
end: end as u64,
entropy,
description,
});
}
}
Ok(regions)
}
fn classify_high_entropy_region(data: &[u8], entropy: f64) -> String {
if entropy > 7.9 {
"Likely encrypted or compressed data".to_string()
} else if entropy > 7.5 {
if has_crypto_constants(data) {
"Possible cryptographic constants".to_string()
} else if has_compression_signature(data) {
"Possible compressed data".to_string()
} else {
"High entropy region - possible obfuscation".to_string()
}
} else {
"Moderately high entropy".to_string()
}
}
fn has_crypto_constants(data: &[u8]) -> bool {
#[allow(clippy::type_complexity)]
const CRYPTO_CONSTANTS: &[&[u8]] = &[
b"\x67\x45\x23\x01", b"\x01\x23\x45\x67", b"\x89\xab\xcd\xef", ];
for &constant in CRYPTO_CONSTANTS {
if data.windows(constant.len()).any(|w| w == constant) {
return true;
}
}
false
}
fn has_compression_signature(data: &[u8]) -> bool {
if data.len() < 4 {
return false;
}
matches!(&data[0..2], b"\x1f\x8b") || matches!(&data[0..4], b"PK\x03\x04") || matches!(&data[0..3], b"BZh") || matches!(&data[0..4], b"\xfd7zXZ") }
fn analyze_packing_indicators(
data: &[u8],
section_entropy: &HashMap<String, f64>,
) -> PackingIndicators {
let mut indicators = PackingIndicators::default();
let overall_entropy = calculate_entropy(data);
if overall_entropy > 7.5 {
indicators.is_packed = true;
}
let mut high_entropy_code = false;
for (name, &entropy) in section_entropy {
if (name.contains("text") || name.contains("code")) && entropy > 7.0 {
high_entropy_code = true;
break;
}
}
if high_entropy_code {
indicators.is_packed = true;
}
if indicators.is_packed {
indicators.compression_ratio = Some(overall_entropy / 8.0);
}
indicators.obfuscation_level = if overall_entropy > 7.8 {
ObfuscationLevel::High
} else if overall_entropy > 7.5 {
ObfuscationLevel::Medium
} else if overall_entropy > 7.0 {
ObfuscationLevel::Low
} else {
ObfuscationLevel::None
};
indicators.packer_name = detect_packer(data);
indicators
}
fn detect_packer(data: &[u8]) -> Option<String> {
if data.len() < 1024 {
return None;
}
let data_str = String::from_utf8_lossy(&data[..std::cmp::min(1024, data.len())]);
if data_str.contains("UPX") {
Some("UPX".to_string())
} else if data_str.contains("VMProtect") {
Some("VMProtect".to_string())
} else if data_str.contains("Themida") {
Some("Themida".to_string())
} else if data_str.contains("ASPack") {
Some("ASPack".to_string())
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_entropy_calculation() {
let uniform_data = vec![0u8; 1024];
let entropy = calculate_entropy(&uniform_data);
assert!(entropy < 1.0);
let random_data: Vec<u8> = (0..1024).map(|i| (i * 7 + 13) as u8).collect();
let entropy = calculate_entropy(&random_data);
assert!(entropy > 7.0);
}
#[test]
fn test_crypto_constants_detection() {
let data = b"\x67\x45\x23\x01some other data";
assert!(has_crypto_constants(data));
let data = b"no crypto constants here";
assert!(!has_crypto_constants(data));
}
#[test]
fn test_compression_signature_detection() {
let gzip_data = b"\x1f\x8b\x08\x00";
assert!(has_compression_signature(gzip_data));
let zip_data = b"PK\x03\x04";
assert!(has_compression_signature(zip_data));
let normal_data = b"normal data";
assert!(!has_compression_signature(normal_data));
}
}