use std::fs::File;
use std::io::Write;
use crate::content::io::{
count_tags, entropy_bits_per_byte, hash_bytes, hash_file, is_text_like, read_head,
read_head_tail, read_lines, read_text_capped,
};
fn tmp_file(name: &str, content: &[u8]) -> (tempfile::TempDir, std::path::PathBuf) {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join(name);
let mut f = File::create(&path).unwrap();
f.write_all(content).unwrap();
(dir, path)
}
#[test]
fn read_text_capped_returns_full_content_when_under_limit() {
let (_dir, path) = tmp_file("small.txt", b"Hello, World!");
let text = read_text_capped(&path, 1000).unwrap();
assert_eq!(text, "Hello, World!");
}
#[test]
fn read_text_capped_truncates_at_limit() {
let (_dir, path) = tmp_file("long.txt", b"The quick brown fox jumps");
let text = read_text_capped(&path, 9).unwrap();
assert_eq!(text, "The quick");
}
#[test]
fn read_text_capped_empty_file() {
let (_dir, path) = tmp_file("empty.txt", b"");
let text = read_text_capped(&path, 100).unwrap();
assert_eq!(text, "");
}
#[test]
fn read_text_capped_binary_content_uses_lossy() {
let (_dir, path) = tmp_file("bin.dat", &[0xFF, 0xFE, 0x00, 0x41]);
let text = read_text_capped(&path, 100).unwrap();
assert!(text.contains('A') || text.contains('\u{FFFD}'));
}
#[test]
fn entropy_empty_is_zero() {
assert_eq!(entropy_bits_per_byte(&[]), 0.0);
}
#[test]
fn entropy_single_repeated_byte_is_zero() {
let buf = vec![0x42u8; 1000];
let e = entropy_bits_per_byte(&buf);
assert!(e.abs() < 1e-6, "expected ~0.0, got {e}");
}
#[test]
fn entropy_two_values_is_one_bit() {
let buf: Vec<u8> = (0..2000).map(|i| (i % 2) as u8).collect();
let e = entropy_bits_per_byte(&buf);
assert!((e - 1.0).abs() < 0.02, "expected ~1.0, got {e}");
}
#[test]
fn entropy_four_values_is_two_bits() {
let buf: Vec<u8> = (0..2000).map(|i| (i % 4) as u8).collect();
let e = entropy_bits_per_byte(&buf);
assert!((e - 2.0).abs() < 0.02, "expected ~2.0, got {e}");
}
#[test]
fn entropy_full_byte_range_is_eight_bits() {
let buf: Vec<u8> = (0u8..=255).cycle().take(2560).collect();
let e = entropy_bits_per_byte(&buf);
assert!((e - 8.0).abs() < 0.02, "expected ~8.0, got {e}");
}
#[test]
fn entropy_monotonically_increases_with_diversity() {
let e1 = entropy_bits_per_byte(&vec![0xAA; 1000]);
let e2 = {
let buf: Vec<u8> = (0..1000).map(|i| (i % 4) as u8).collect();
entropy_bits_per_byte(&buf)
};
let e3 = {
let buf: Vec<u8> = (0..1000).map(|i| (i % 16) as u8).collect();
entropy_bits_per_byte(&buf)
};
assert!(e1 < e2, "1 value < 4 values: {e1} < {e2}");
assert!(e2 < e3, "4 values < 16 values: {e2} < {e3}");
}
#[test]
fn is_text_like_on_ascii() {
assert!(is_text_like(b"Hello, World!"));
}
#[test]
fn is_text_like_on_utf8() {
assert!(is_text_like("café résumé 日本語".as_bytes()));
}
#[test]
fn is_text_like_on_empty() {
assert!(is_text_like(b""));
}
#[test]
fn is_text_like_rejects_null_bytes() {
assert!(!is_text_like(&[0x48, 0x65, 0x00, 0x6C]));
}
#[test]
fn is_text_like_rejects_binary_blob() {
let blob: Vec<u8> = (0u8..=255).collect();
assert!(!is_text_like(&blob));
}
#[test]
fn hash_bytes_deterministic() {
let h1 = hash_bytes(b"hello");
let h2 = hash_bytes(b"hello");
assert_eq!(h1, h2);
}
#[test]
fn hash_bytes_different_inputs_differ() {
let h1 = hash_bytes(b"hello");
let h2 = hash_bytes(b"world");
assert_ne!(h1, h2);
}
#[test]
fn hash_bytes_is_64_hex_chars() {
let h = hash_bytes(b"test");
assert_eq!(h.len(), 64, "BLAKE3 hex output should be 64 chars");
assert!(h.chars().all(|c| c.is_ascii_hexdigit()));
}
#[test]
fn hash_file_matches_hash_bytes() {
let (_dir, path) = tmp_file("hashme.txt", b"deterministic content");
let file_hash = hash_file(&path, 10000).unwrap();
let bytes_hash = hash_bytes(b"deterministic content");
assert_eq!(file_hash, bytes_hash);
}
#[test]
fn hash_file_respects_max_bytes() {
let (_dir, path) = tmp_file("partial.txt", b"abcdefghij");
let h_partial = hash_file(&path, 5).unwrap();
let h_full = hash_file(&path, 1000).unwrap();
assert_ne!(h_partial, h_full, "partial hash should differ from full");
assert_eq!(h_partial, hash_bytes(b"abcde"));
}
#[test]
fn hash_empty_file() {
let (_dir, path) = tmp_file("empty.dat", b"");
let h = hash_file(&path, 100).unwrap();
assert_eq!(h, hash_bytes(b""));
}
#[test]
fn count_tags_finds_todo_and_fixme() {
let text = "// TODO: fix this\n// FIXME: broken\n// TODO: another one\n";
let tags = count_tags(text, &["TODO", "FIXME"]);
let todo_count = tags.iter().find(|(t, _)| t == "TODO").map(|(_, c)| *c);
let fixme_count = tags.iter().find(|(t, _)| t == "FIXME").map(|(_, c)| *c);
assert_eq!(todo_count, Some(2));
assert_eq!(fixme_count, Some(1));
}
#[test]
fn count_tags_case_insensitive() {
let text = "todo Todo TODO";
let tags = count_tags(text, &["TODO"]);
assert_eq!(tags[0].1, 3, "should match case-insensitively");
}
#[test]
fn count_tags_no_matches() {
let text = "clean code, no issues";
let tags = count_tags(text, &["TODO", "FIXME", "HACK"]);
assert!(tags.iter().all(|(_, c)| *c == 0));
}
#[test]
fn count_tags_empty_text() {
let tags = count_tags("", &["TODO"]);
assert_eq!(tags[0].1, 0);
}
#[test]
fn count_tags_preserves_tag_order() {
let text = "FIXME TODO HACK";
let tags = count_tags(text, &["TODO", "FIXME", "HACK"]);
assert_eq!(tags[0].0, "TODO");
assert_eq!(tags[1].0, "FIXME");
assert_eq!(tags[2].0, "HACK");
}
#[test]
fn read_head_on_binary_file() {
let (_dir, path) = tmp_file("binary.bin", &[0xFF, 0x00, 0xDE, 0xAD]);
let bytes = read_head(&path, 100).unwrap();
assert_eq!(bytes, &[0xFF, 0x00, 0xDE, 0xAD]);
}
#[test]
fn read_head_tail_on_small_file() {
let (_dir, path) = tmp_file("tiny.txt", b"abc");
let bytes = read_head_tail(&path, 100).unwrap();
assert_eq!(bytes, b"abc");
}
#[test]
fn read_head_tail_splits_large_file() {
let (_dir, path) = tmp_file("big.txt", b"0123456789");
let bytes = read_head_tail(&path, 4).unwrap();
assert_eq!(bytes, b"0189");
}
#[test]
fn read_lines_empty_file() {
let (_dir, path) = tmp_file("empty_lines.txt", b"");
let lines = read_lines(&path, 100, 10000).unwrap();
assert!(lines.is_empty());
}
#[test]
fn read_lines_respects_max_lines() {
let content = "line1\nline2\nline3\nline4\nline5\n";
let (_dir, path) = tmp_file("lines.txt", content.as_bytes());
let lines = read_lines(&path, 2, 10000).unwrap();
assert_eq!(lines.len(), 2);
assert_eq!(lines[0], "line1");
assert_eq!(lines[1], "line2");
}
#[test]
fn read_lines_zero_max_returns_empty() {
let (_dir, path) = tmp_file("nope.txt", b"content\n");
let lines = read_lines(&path, 0, 10000).unwrap();
assert!(lines.is_empty());
}