use std::fs;
use std::path::{Path, PathBuf};
use crate::entropy::build_entropy_report;
use tempfile::tempdir;
use tokmd_analysis_types::AnalysisLimits;
use tokmd_analysis_types::EntropyClass;
use tokmd_types::{ChildIncludeMode, ExportData, FileKind, FileRow};
fn export_for(rows: Vec<FileRow>) -> ExportData {
ExportData {
rows,
module_roots: vec![],
module_depth: 1,
children: ChildIncludeMode::Separate,
}
}
fn parent_row(path: &str, module: &str) -> FileRow {
FileRow {
path: path.to_string(),
module: module.to_string(),
lang: "Text".to_string(),
kind: FileKind::Parent,
code: 1,
comments: 0,
blanks: 0,
lines: 1,
bytes: 10,
tokens: 2,
}
}
fn child_row(path: &str) -> FileRow {
FileRow {
path: path.to_string(),
module: "(root)".to_string(),
lang: "Text".to_string(),
kind: FileKind::Child,
code: 1,
comments: 0,
blanks: 0,
lines: 1,
bytes: 10,
tokens: 2,
}
}
fn write_repeated(path: &Path, byte: u8, len: usize) {
fs::write(path, vec![byte; len]).unwrap();
}
fn write_pseudorandom(path: &Path, seed: u32, len: usize) {
let mut data = Vec::with_capacity(len);
let mut x = seed;
for _ in 0..len {
x = x.wrapping_mul(1664525).wrapping_add(1013904223);
data.push((x >> 16) as u8);
}
fs::write(path, data).unwrap();
}
#[test]
fn all_zeros_classified_low() {
let dir = tempdir().unwrap();
let f = dir.path().join("zeros.bin");
write_repeated(&f, 0x00, 1024);
let export = export_for(vec![parent_row("zeros.bin", "(root)")]);
let files = vec![PathBuf::from("zeros.bin")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert_eq!(report.suspects.len(), 1);
assert_eq!(report.suspects[0].class, EntropyClass::Low);
assert!(report.suspects[0].entropy_bits_per_byte < 0.01);
}
#[test]
fn pseudorandom_classified_high() {
let dir = tempdir().unwrap();
let f = dir.path().join("rand.bin");
write_pseudorandom(&f, 0xDEADBEEF, 4096);
let export = export_for(vec![parent_row("rand.bin", "(root)")]);
let files = vec![PathBuf::from("rand.bin")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert_eq!(report.suspects.len(), 1);
assert_eq!(report.suspects[0].class, EntropyClass::High);
assert!(report.suspects[0].entropy_bits_per_byte > 7.5);
}
#[test]
fn empty_file_not_a_suspect() {
let dir = tempdir().unwrap();
fs::write(dir.path().join("empty.txt"), b"").unwrap();
let export = export_for(vec![parent_row("empty.txt", "(root)")]);
let files = vec![PathBuf::from("empty.txt")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert!(report.suspects.is_empty(), "empty files should be skipped");
}
#[test]
fn single_byte_file_classified_low() {
let dir = tempdir().unwrap();
fs::write(dir.path().join("one.bin"), [0xAB]).unwrap();
let export = export_for(vec![parent_row("one.bin", "(root)")]);
let files = vec![PathBuf::from("one.bin")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert_eq!(report.suspects.len(), 1);
assert_eq!(report.suspects[0].class, EntropyClass::Low);
}
#[test]
fn normal_english_text_not_in_suspects() {
let dir = tempdir().unwrap();
let text = "Hello world. This is a perfectly normal sentence. ".repeat(40);
fs::write(dir.path().join("prose.txt"), text).unwrap();
let export = export_for(vec![parent_row("prose.txt", "(root)")]);
let files = vec![PathBuf::from("prose.txt")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert!(
report.suspects.is_empty(),
"normal text should not be a suspect"
);
}
#[test]
fn module_from_export_propagated_to_finding() {
let dir = tempdir().unwrap();
let f = dir.path().join("key.bin");
write_pseudorandom(&f, 0x1234, 2048);
let export = export_for(vec![parent_row("key.bin", "secrets/keys")]);
let files = vec![PathBuf::from("key.bin")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert_eq!(report.suspects.len(), 1);
assert_eq!(report.suspects[0].module, "secrets/keys");
}
#[test]
fn unmapped_file_gets_unknown_module() {
let dir = tempdir().unwrap();
let f = dir.path().join("orphan.bin");
write_pseudorandom(&f, 0xCAFE, 2048);
let export = export_for(vec![]); let files = vec![PathBuf::from("orphan.bin")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert_eq!(report.suspects.len(), 1);
assert_eq!(report.suspects[0].module, "(unknown)");
}
#[test]
fn child_rows_not_used_for_module_lookup() {
let dir = tempdir().unwrap();
let f = dir.path().join("data.bin");
write_pseudorandom(&f, 0xBEEF, 2048);
let export = export_for(vec![child_row("data.bin")]);
let files = vec![PathBuf::from("data.bin")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert_eq!(report.suspects.len(), 1);
assert_eq!(
report.suspects[0].module, "(unknown)",
"child rows should be filtered out"
);
}
#[test]
fn no_files_yields_empty_report() {
let dir = tempdir().unwrap();
let export = export_for(vec![]);
let report =
build_entropy_report(dir.path(), &[], &export, &AnalysisLimits::default()).unwrap();
assert!(report.suspects.is_empty());
}
#[test]
fn suspects_ordered_highest_entropy_first() {
let dir = tempdir().unwrap();
let lo = dir.path().join("lo.bin");
write_repeated(&lo, 0xFF, 1024);
let hi = dir.path().join("hi.bin");
write_pseudorandom(&hi, 0xAAAA, 4096);
let export = export_for(vec![
parent_row("lo.bin", "(root)"),
parent_row("hi.bin", "(root)"),
]);
let files = vec![PathBuf::from("lo.bin"), PathBuf::from("hi.bin")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert!(report.suspects.len() >= 2);
assert!(
report.suspects[0].entropy_bits_per_byte >= report.suspects[1].entropy_bits_per_byte,
"first suspect should have higher entropy"
);
}
#[test]
fn same_entropy_tiebreaks_by_path_ascending() {
let dir = tempdir().unwrap();
let a = dir.path().join("beta.bin");
let b = dir.path().join("alpha.bin");
write_repeated(&a, 0x00, 512);
write_repeated(&b, 0x00, 512);
let export = export_for(vec![
parent_row("alpha.bin", "(root)"),
parent_row("beta.bin", "(root)"),
]);
let files = vec![PathBuf::from("beta.bin"), PathBuf::from("alpha.bin")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert_eq!(report.suspects.len(), 2);
assert_eq!(report.suspects[0].path, "alpha.bin");
assert_eq!(report.suspects[1].path, "beta.bin");
}
#[test]
fn suspects_capped_at_fifty() {
let dir = tempdir().unwrap();
let mut rows = Vec::new();
let mut files = Vec::new();
for i in 0..60 {
let name = format!("f{i:03}.bin");
let f = dir.path().join(&name);
write_pseudorandom(&f, i as u32, 2048);
rows.push(parent_row(&name, "(root)"));
files.push(PathBuf::from(name));
}
let export = export_for(rows);
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert!(
report.suspects.len() <= 50,
"should truncate to MAX_SUSPECTS=50, got {}",
report.suspects.len()
);
}
#[test]
fn max_bytes_budget_limits_scanning() {
let dir = tempdir().unwrap();
let a = dir.path().join("first.bin");
let b = dir.path().join("second.bin");
write_pseudorandom(&a, 1, 512);
write_pseudorandom(&b, 2, 512);
let export = export_for(vec![
parent_row("first.bin", "(root)"),
parent_row("second.bin", "(root)"),
]);
let files = vec![PathBuf::from("first.bin"), PathBuf::from("second.bin")];
let limits = AnalysisLimits {
max_bytes: Some(512),
..AnalysisLimits::default()
};
let report = build_entropy_report(dir.path(), &files, &export, &limits).unwrap();
assert!(
report.suspects.len() <= 1,
"budget should stop after first file, got {} suspects",
report.suspects.len()
);
}
#[test]
fn max_file_bytes_limits_sample_size() {
let dir = tempdir().unwrap();
let f = dir.path().join("big.bin");
write_pseudorandom(&f, 0x7777, 8192);
let export = export_for(vec![parent_row("big.bin", "(root)")]);
let files = vec![PathBuf::from("big.bin")];
let limits = AnalysisLimits {
max_file_bytes: Some(128),
..AnalysisLimits::default()
};
let report = build_entropy_report(dir.path(), &files, &export, &limits).unwrap();
assert_eq!(report.suspects.len(), 1);
assert!(
report.suspects[0].sample_bytes <= 128,
"sample_bytes should respect max_file_bytes, got {}",
report.suspects[0].sample_bytes
);
}
#[test]
fn sample_bytes_reflects_file_size_when_smaller_than_limit() {
let dir = tempdir().unwrap();
let f = dir.path().join("small.bin");
write_repeated(&f, 0x00, 64);
let export = export_for(vec![parent_row("small.bin", "(root)")]);
let files = vec![PathBuf::from("small.bin")];
let report =
build_entropy_report(dir.path(), &files, &export, &AnalysisLimits::default()).unwrap();
assert_eq!(report.suspects.len(), 1);
assert_eq!(
report.suspects[0].sample_bytes, 64,
"sample_bytes should equal actual file size for small files"
);
}