use std::fs;
use std::path::PathBuf;
use crate::entropy::build_entropy_report;
use tempfile::tempdir;
use tokmd_analysis_types::AnalysisLimits;
use tokmd_analysis_types::{EntropyClass, EntropyReport};
use tokmd_types::{ChildIncludeMode, ExportData, FileKind, FileRow};
fn export_for_paths(paths: &[&str]) -> ExportData {
let rows = paths
.iter()
.map(|p| FileRow {
path: (*p).to_string(),
module: "(root)".to_string(),
lang: "Text".to_string(),
kind: FileKind::Parent,
code: 1,
comments: 0,
blanks: 0,
lines: 1,
bytes: 10,
tokens: 2,
})
.collect();
ExportData {
rows,
module_roots: vec![],
module_depth: 1,
children: ChildIncludeMode::Separate,
}
}
fn export_with_modules(entries: &[(&str, &str)]) -> ExportData {
let rows = entries
.iter()
.map(|(p, m)| FileRow {
path: (*p).to_string(),
module: (*m).to_string(),
lang: "Text".to_string(),
kind: FileKind::Parent,
code: 1,
comments: 0,
blanks: 0,
lines: 1,
bytes: 10,
tokens: 2,
})
.collect();
ExportData {
rows,
module_roots: vec![],
module_depth: 1,
children: ChildIncludeMode::Separate,
}
}
fn write_repeated(path: &std::path::Path, byte: u8, len: usize) {
fs::write(path, vec![byte; len]).unwrap();
}
fn write_pseudorandom(path: &std::path::Path, len: usize) {
let mut data = Vec::with_capacity(len);
let mut x = 0x12345678u32;
for _ in 0..len {
x = x.wrapping_mul(1664525).wrapping_add(1013904223);
data.push((x & 0xFF) as u8);
}
fs::write(path, data).unwrap();
}
#[test]
fn empty_file_list_empty_report() {
let dir = tempdir().unwrap();
let export = export_for_paths(&[]);
let report =
build_entropy_report(dir.path(), &[], &export, &AnalysisLimits::default()).unwrap();
assert!(report.suspects.is_empty());
}
#[test]
fn low_entropy_detected() {
let dir = tempdir().unwrap();
write_repeated(&dir.path().join("low.txt"), b'A', 1024);
let export = export_for_paths(&["low.txt"]);
let files = vec![PathBuf::from("low.txt")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert_eq!(report.suspects.len(), 1);
assert_eq!(report.suspects[0].class, EntropyClass::Low);
assert!(report.suspects[0].entropy_bits_per_byte < 2.0);
}
#[test]
fn high_entropy_detected() {
let dir = tempdir().unwrap();
write_pseudorandom(&dir.path().join("high.bin"), 1024);
let export = export_for_paths(&["high.bin"]);
let files = vec![PathBuf::from("high.bin")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert_eq!(report.suspects.len(), 1);
assert_eq!(report.suspects[0].class, EntropyClass::High);
assert!(report.suspects[0].entropy_bits_per_byte > 7.5);
}
#[test]
fn normal_entropy_excluded() {
let dir = tempdir().unwrap();
let text = "fn main() {\n println!(\"Hello, world!\");\n}\n".repeat(30);
fs::write(dir.path().join("main.rs"), &text).unwrap();
let export = export_for_paths(&["main.rs"]);
let files = vec![PathBuf::from("main.rs")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
for s in &report.suspects {
assert_ne!(s.class, EntropyClass::Normal);
}
}
#[test]
fn sorting_entropy_desc_path_asc() {
let dir = tempdir().unwrap();
write_repeated(&dir.path().join("b_low.txt"), b'B', 1024);
write_repeated(&dir.path().join("a_low.txt"), b'A', 1024);
let export = export_for_paths(&["b_low.txt", "a_low.txt"]);
let files = vec![PathBuf::from("b_low.txt"), PathBuf::from("a_low.txt")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
for pair in report.suspects.windows(2) {
if (pair[0].entropy_bits_per_byte - pair[1].entropy_bits_per_byte).abs() < f32::EPSILON {
assert!(
pair[0].path <= pair[1].path,
"tied entropy should sort by path asc: {} vs {}",
pair[0].path,
pair[1].path
);
} else {
assert!(
pair[0].entropy_bits_per_byte >= pair[1].entropy_bits_per_byte,
"should sort by entropy desc"
);
}
}
}
#[test]
fn max_suspects_capped_at_50() {
let dir = tempdir().unwrap();
let mut paths = Vec::new();
let mut files = Vec::new();
for i in 0..60 {
let name = format!("low_{i:03}.txt");
write_repeated(&dir.path().join(&name), b'X', 1024);
paths.push(name.clone());
files.push(PathBuf::from(name));
}
let path_refs: Vec<&str> = paths.iter().map(|s| s.as_str()).collect();
let export = export_for_paths(&path_refs);
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert!(
report.suspects.len() <= 50,
"suspects should be capped at 50, got {}",
report.suspects.len()
);
}
#[test]
fn max_bytes_budget_stops_scanning() {
let dir = tempdir().unwrap();
for i in 0..10 {
write_repeated(&dir.path().join(format!("f{i}.txt")), b'Z', 1024);
}
let paths: Vec<String> = (0..10).map(|i| format!("f{i}.txt")).collect();
let path_refs: Vec<&str> = paths.iter().map(|s| s.as_str()).collect();
let export = export_for_paths(&path_refs);
let files: Vec<PathBuf> = paths.iter().map(PathBuf::from).collect();
let limits = AnalysisLimits {
max_bytes: Some(3000),
..Default::default()
};
let report = build_entropy_report(dir.path(), &files, &export, &limits).unwrap();
assert!(
report.suspects.len() <= 3,
"budget should limit scanning: got {} suspects",
report.suspects.len()
);
}
#[test]
fn module_mapping_from_export() {
let dir = tempdir().unwrap();
write_repeated(&dir.path().join("low.txt"), b'A', 1024);
let export = export_with_modules(&[("low.txt", "my-module")]);
let files = vec![PathBuf::from("low.txt")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert_eq!(report.suspects.len(), 1);
assert_eq!(report.suspects[0].module, "my-module");
}
#[test]
fn unknown_path_maps_to_unknown_module() {
let dir = tempdir().unwrap();
write_repeated(&dir.path().join("orphan.txt"), b'A', 1024);
let export = export_for_paths(&[]);
let files = vec![PathBuf::from("orphan.txt")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert_eq!(report.suspects.len(), 1);
assert_eq!(report.suspects[0].module, "(unknown)");
}
#[test]
fn serde_roundtrip_preserves_all_fields() {
let dir = tempdir().unwrap();
write_repeated(&dir.path().join("low.txt"), b'A', 1024);
write_pseudorandom(&dir.path().join("high.bin"), 1024);
let export = export_for_paths(&["low.txt", "high.bin"]);
let files = vec![PathBuf::from("low.txt"), PathBuf::from("high.bin")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
let json = serde_json::to_string(&report).unwrap();
let deser: EntropyReport = serde_json::from_str(&json).unwrap();
assert_eq!(deser.suspects.len(), report.suspects.len());
for (orig, rt) in report.suspects.iter().zip(deser.suspects.iter()) {
assert_eq!(orig.path, rt.path);
assert_eq!(orig.module, rt.module);
assert_eq!(orig.class, rt.class);
assert_eq!(orig.sample_bytes, rt.sample_bytes);
assert!((orig.entropy_bits_per_byte - rt.entropy_bits_per_byte).abs() < f32::EPSILON);
}
}
#[test]
fn empty_file_no_suspects() {
let dir = tempdir().unwrap();
fs::write(dir.path().join("empty.txt"), b"").unwrap();
let export = export_for_paths(&["empty.txt"]);
let files = vec![PathBuf::from("empty.txt")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert!(
report.suspects.is_empty(),
"empty file should produce no suspects"
);
}
mod properties {
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
fn entropy_class_serde_roundtrip(class_idx in 0u8..4) {
let class = match class_idx {
0 => EntropyClass::Low,
1 => EntropyClass::Normal,
2 => EntropyClass::Suspicious,
_ => EntropyClass::High,
};
let json = serde_json::to_string(&class).unwrap();
let rt: EntropyClass = serde_json::from_str(&json).unwrap();
prop_assert_eq!(class, rt);
}
}
}