use crate::content::complexity::{
analyze_functions, analyze_nesting_depth, estimate_cognitive_complexity,
estimate_cyclomatic_complexity,
};
use crate::content::io::{
count_tags, entropy_bits_per_byte, hash_bytes, hash_file, is_text_like, read_head,
read_head_tail, read_lines, read_text_capped,
};
use std::fs::File;
use std::io::Write;
use std::path::PathBuf;
fn tmp() -> tempfile::TempDir {
tempfile::tempdir().unwrap()
}
fn write_file(dir: &tempfile::TempDir, name: &str, content: &[u8]) -> PathBuf {
let p = dir.path().join(name);
if let Some(parent) = p.parent() {
std::fs::create_dir_all(parent).unwrap();
}
let mut f = File::create(&p).unwrap();
f.write_all(content).unwrap();
p
}
#[test]
fn entropy_empty_is_zero() {
assert_eq!(entropy_bits_per_byte(&[]), 0.0);
}
#[test]
fn entropy_uniform_single_byte_is_zero() {
assert_eq!(entropy_bits_per_byte(&[0xAA; 1000]), 0.0);
}
#[test]
fn entropy_two_values_equally_distributed() {
let data: Vec<u8> = (0..1000).map(|i| if i % 2 == 0 { 0 } else { 1 }).collect();
let e = entropy_bits_per_byte(&data);
assert!((e - 1.0).abs() < 0.01, "expected ~1.0 bit, got {e}");
}
#[test]
fn entropy_four_values_equally_distributed() {
let data: Vec<u8> = (0..1000).map(|i| (i % 4) as u8).collect();
let e = entropy_bits_per_byte(&data);
assert!((e - 2.0).abs() < 0.05, "expected ~2.0 bits, got {e}");
}
#[test]
fn entropy_uniform_256_near_eight() {
let mut data = Vec::with_capacity(256 * 100);
for _ in 0..100 {
for b in 0u8..=255 {
data.push(b);
}
}
let e = entropy_bits_per_byte(&data);
assert!((e - 8.0).abs() < 0.01, "expected ~8.0, got {e}");
}
#[test]
fn entropy_single_byte_input() {
assert_eq!(entropy_bits_per_byte(&[42]), 0.0);
}
#[test]
fn entropy_two_distinct_bytes() {
let e = entropy_bits_per_byte(&[0, 1]);
assert!((e - 1.0).abs() < 0.01, "expected ~1.0, got {e}");
}
#[test]
fn entropy_skewed_distribution() {
let mut data = vec![0u8; 900];
data.extend(vec![1u8; 100]);
let e = entropy_bits_per_byte(&data);
assert!(e > 0.4 && e < 0.5, "expected ~0.469, got {e}");
}
#[test]
fn entropy_ascii_text_typical_range() {
let text =
b"The quick brown fox jumps over the lazy dog. Pack my box with five dozen liquor jugs.";
let e = entropy_bits_per_byte(text);
assert!(e > 3.0 && e < 6.0, "text entropy should be 3-6, got {e}");
}
#[test]
fn entropy_monotonically_nondecreasing_with_more_values() {
let e1 = entropy_bits_per_byte(&[0; 100]);
let e2 = entropy_bits_per_byte(&{
let mut d = vec![0u8; 50];
d.extend(vec![1u8; 50]);
d
});
assert!(e2 >= e1, "more distinct values should yield >= entropy");
}
#[test]
fn count_tags_basic() {
let text = "// TODO: fix this\n// FIXME: broken\n";
let tags = count_tags(text, &["TODO", "FIXME"]);
assert_eq!(tags[0], ("TODO".to_string(), 1));
assert_eq!(tags[1], ("FIXME".to_string(), 1));
}
#[test]
fn count_tags_case_insensitive() {
let text = "// todo: lowercase\n// Todo: mixed\n// TODO: upper\n";
let tags = count_tags(text, &["TODO"]);
assert_eq!(tags[0].1, 3);
}
#[test]
fn count_tags_multiple_on_same_line() {
let text = "// TODO TODO TODO\n";
let tags = count_tags(text, &["TODO"]);
assert_eq!(tags[0].1, 3);
}
#[test]
fn count_tags_no_matches() {
let text = "fn main() { println!(\"hello\"); }";
let tags = count_tags(text, &["TODO", "FIXME", "HACK"]);
for (_, count) in &tags {
assert_eq!(*count, 0);
}
}
#[test]
fn count_tags_empty_text() {
let tags = count_tags("", &["TODO", "FIXME"]);
assert_eq!(tags[0].1, 0);
assert_eq!(tags[1].1, 0);
}
#[test]
fn count_tags_empty_tag_list() {
let tags = count_tags("TODO FIXME", &[]);
assert!(tags.is_empty());
}
#[test]
fn count_tags_tag_in_string_literal() {
let text = r#"let s = "TODO: this is in a string";"#;
let tags = count_tags(text, &["TODO"]);
assert_eq!(tags[0].1, 1);
}
#[test]
fn count_tags_custom_markers() {
let text = "// HACK: workaround\n// XXX: dangerous\n// NOTE: important\n";
let tags = count_tags(text, &["HACK", "XXX", "NOTE"]);
assert_eq!(tags[0].1, 1); assert_eq!(tags[1].1, 1); assert_eq!(tags[2].1, 1); }
#[test]
fn count_tags_partial_match_counted() {
let text = "TODOLIST is not a real tag\n";
let tags = count_tags(text, &["TODO"]);
assert_eq!(tags[0].1, 1);
}
#[test]
fn hash_bytes_deterministic() {
let h1 = hash_bytes(b"hello world");
let h2 = hash_bytes(b"hello world");
assert_eq!(h1, h2);
}
#[test]
fn hash_bytes_always_64_hex_chars() {
let h = hash_bytes(b"test");
assert_eq!(h.len(), 64);
assert!(h.chars().all(|c| c.is_ascii_hexdigit()));
}
#[test]
fn hash_bytes_different_inputs_different_hashes() {
let h1 = hash_bytes(b"alpha");
let h2 = hash_bytes(b"beta");
assert_ne!(h1, h2);
}
#[test]
fn hash_bytes_empty_input() {
let h = hash_bytes(b"");
assert_eq!(h.len(), 64);
assert!(!h.is_empty());
}
#[test]
fn hash_file_matches_hash_bytes() {
let dir = tmp();
let path = write_file(&dir, "test.txt", b"consistent content");
let file_hash = hash_file(&path, 1000).unwrap();
let bytes_hash = hash_bytes(b"consistent content");
assert_eq!(file_hash, bytes_hash);
}
#[test]
fn hash_file_respects_max_bytes() {
let dir = tmp();
let path = write_file(&dir, "long.txt", b"abcdefghij");
let h5 = hash_file(&path, 5).unwrap();
let h10 = hash_file(&path, 10).unwrap();
assert_ne!(
h5, h10,
"different byte limits should produce different hashes"
);
assert_eq!(h5, hash_bytes(b"abcde"));
}
#[test]
fn hash_file_nonexistent_errors() {
let dir = tmp();
let result = hash_file(&dir.path().join("nope.txt"), 1000);
assert!(result.is_err());
}
#[test]
fn read_head_empty_file() {
let dir = tmp();
let path = write_file(&dir, "empty.txt", b"");
let bytes = read_head(&path, 100).unwrap();
assert!(bytes.is_empty());
}
#[test]
fn read_head_tail_empty_file() {
let dir = tmp();
let path = write_file(&dir, "empty.txt", b"");
let bytes = read_head_tail(&path, 100).unwrap();
assert!(bytes.is_empty());
}
#[test]
fn read_lines_empty_file() {
let dir = tmp();
let path = write_file(&dir, "empty.txt", b"");
let lines = read_lines(&path, 100, 10000).unwrap();
assert!(lines.is_empty());
}
#[test]
fn read_text_capped_empty_file() {
let dir = tmp();
let path = write_file(&dir, "empty.txt", b"");
let text = read_text_capped(&path, 100).unwrap();
assert!(text.is_empty());
}
#[test]
fn hash_file_empty_file() {
let dir = tmp();
let path = write_file(&dir, "empty.txt", b"");
let h = hash_file(&path, 100).unwrap();
assert_eq!(h, hash_bytes(b""));
}
#[test]
fn entropy_empty_file_content() {
assert_eq!(entropy_bits_per_byte(b""), 0.0);
}
#[test]
fn is_text_like_ascii() {
assert!(is_text_like(b"hello world"));
}
#[test]
fn is_text_like_utf8() {
assert!(is_text_like("héllo wörld".as_bytes()));
}
#[test]
fn is_text_like_null_byte_makes_binary() {
assert!(!is_text_like(b"hello\x00world"));
}
#[test]
fn is_text_like_empty_is_text() {
assert!(is_text_like(b""));
}
#[test]
fn is_text_like_just_null() {
assert!(!is_text_like(b"\x00"));
}
#[test]
fn is_text_like_binary_header() {
assert!(!is_text_like(b"\x7fELF\x00\x01\x01"));
}
#[test]
fn is_text_like_invalid_utf8_without_null() {
let bytes = vec![0xFF, 0xFE, 0x41, 0x42];
assert!(!is_text_like(&bytes));
}
#[test]
fn is_text_like_newlines_and_tabs() {
assert!(is_text_like(b"line1\nline2\ttab"));
}
#[test]
fn read_head_large_file_capped() {
let dir = tmp();
let data = vec![b'x'; 100_000];
let path = write_file(&dir, "big.txt", &data);
let head = read_head(&path, 1024).unwrap();
assert_eq!(head.len(), 1024);
}
#[test]
fn read_head_tail_large_file() {
let dir = tmp();
let mut data = Vec::with_capacity(10_000);
for i in 0..10_000u16 {
data.push((i % 256) as u8);
}
let path = write_file(&dir, "big.bin", &data);
let result = read_head_tail(&path, 100).unwrap();
assert_eq!(result.len(), 100);
assert_eq!(result[0], 0); }
#[test]
fn hash_file_large_with_limit() {
let dir = tmp();
let data = vec![b'A'; 1_000_000];
let path = write_file(&dir, "huge.txt", &data);
let h = hash_file(&path, 4096).unwrap();
assert_eq!(h.len(), 64);
assert_eq!(h, hash_bytes(&vec![b'A'; 4096]));
}
#[test]
fn entropy_large_uniform_data() {
let data = vec![0x42; 100_000];
let e = entropy_bits_per_byte(&data);
assert_eq!(e, 0.0, "uniform data should have zero entropy");
}
#[test]
fn read_head_tail_zero_max_returns_empty() {
let dir = tmp();
let path = write_file(&dir, "f.txt", b"hello");
let result = read_head_tail(&path, 0).unwrap();
assert!(result.is_empty());
}
#[test]
fn read_head_tail_max_one() {
let dir = tmp();
let path = write_file(&dir, "f.txt", b"abcdefghij");
let result = read_head_tail(&path, 1).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0], b'a');
}
#[test]
fn read_head_tail_exact_file_size() {
let dir = tmp();
let path = write_file(&dir, "f.txt", b"abcde");
let result = read_head_tail(&path, 5).unwrap();
assert_eq!(result, b"abcde");
}
#[test]
fn read_head_tail_larger_than_file() {
let dir = tmp();
let path = write_file(&dir, "f.txt", b"abc");
let result = read_head_tail(&path, 1000).unwrap();
assert_eq!(result, b"abc");
}
#[test]
fn read_lines_zero_max_lines() {
let dir = tmp();
let path = write_file(&dir, "f.txt", b"line1\nline2\n");
let lines = read_lines(&path, 0, 10000).unwrap();
assert!(lines.is_empty());
}
#[test]
fn read_lines_zero_max_bytes() {
let dir = tmp();
let path = write_file(&dir, "f.txt", b"line1\nline2\n");
let lines = read_lines(&path, 100, 0).unwrap();
assert!(lines.is_empty());
}
#[test]
fn read_lines_single_line_no_newline() {
let dir = tmp();
let path = write_file(&dir, "f.txt", b"no newline at end");
let lines = read_lines(&path, 100, 10000).unwrap();
assert_eq!(lines.len(), 1);
assert_eq!(lines[0], "no newline at end");
}
#[test]
fn read_lines_preserves_blank_lines() {
let dir = tmp();
let path = write_file(&dir, "f.txt", b"a\n\nb\n\nc\n");
let lines = read_lines(&path, 100, 10000).unwrap();
assert_eq!(lines.len(), 5);
assert_eq!(lines[1], "");
assert_eq!(lines[3], "");
}
#[test]
fn complexity_empty_code() {
let m = analyze_functions("", "rust");
assert_eq!(m.function_count, 0);
}
#[test]
fn complexity_no_functions() {
let code = "// just a comment\nlet x = 42;\n";
let m = analyze_functions(code, "rust");
assert_eq!(m.function_count, 0);
}
#[test]
fn complexity_single_rust_fn() {
let code = "fn main() {\n println!(\"hello\");\n}\n";
let m = analyze_functions(code, "rust");
assert_eq!(m.function_count, 1);
assert_eq!(m.max_function_length, 3);
}
#[test]
fn complexity_multiple_rust_fns() {
let code = r#"
fn foo() {
// do stuff
}
fn bar() {
// do other stuff
}
fn baz() {
// do more stuff
}
"#;
let m = analyze_functions(code, "rust");
assert_eq!(m.function_count, 3);
}
#[test]
fn complexity_python_def() {
let code = "def hello():\n print('hello')\n\ndef world():\n print('world')\n";
let m = analyze_functions(code, "python");
assert_eq!(m.function_count, 2);
}
#[test]
fn complexity_unsupported_language() {
let code = "fn main() { }";
let m = analyze_functions(code, "brainfuck");
assert_eq!(m.function_count, 0);
}
#[test]
fn cyclomatic_empty() {
let cc = estimate_cyclomatic_complexity("", "rust");
assert_eq!(cc.function_count, 0);
assert_eq!(cc.total_cc, 0);
}
#[test]
fn cyclomatic_simple_function() {
let code = "fn simple() {\n println!(\"hello\");\n}\n";
let cc = estimate_cyclomatic_complexity(code, "rust");
assert_eq!(cc.function_count, 1);
assert_eq!(cc.max_cc, 1); }
#[test]
fn cyclomatic_if_branch() {
let code = r#"
fn decide(x: i32) {
if x > 0 {
println!("positive");
}
}
"#;
let cc = estimate_cyclomatic_complexity(code, "rust");
assert!(cc.max_cc >= 2, "if adds at least 1: got {}", cc.max_cc);
}
#[test]
fn cognitive_empty() {
let cc = estimate_cognitive_complexity("", "rust");
assert_eq!(cc.function_count, 0);
assert_eq!(cc.total, 0);
}
#[test]
fn cognitive_nested_ifs_higher_than_flat() {
let flat = r#"
fn flat(x: i32) {
if x > 0 { }
if x > 1 { }
}
"#;
let nested = r#"
fn nested(x: i32) {
if x > 0 {
if x > 1 { }
}
}
"#;
let flat_cc = estimate_cognitive_complexity(flat, "rust");
let nested_cc = estimate_cognitive_complexity(nested, "rust");
assert!(
nested_cc.max >= flat_cc.max,
"nested should be >= flat: {} vs {}",
nested_cc.max,
flat_cc.max
);
}
#[test]
fn nesting_depth_empty() {
let n = analyze_nesting_depth("", "rust");
assert_eq!(n.max_depth, 0);
}
#[test]
fn nesting_depth_single_level() {
let code = "fn main() {\n println!(\"hi\");\n}\n";
let n = analyze_nesting_depth(code, "rust");
assert!(n.max_depth >= 1);
}
#[test]
fn nesting_depth_deep_nesting() {
let code = r#"
fn deep() {
if true {
if true {
if true {
println!("deep");
}
}
}
}
"#;
let n = analyze_nesting_depth(code, "rust");
assert!(n.max_depth >= 3, "expected depth >= 3, got {}", n.max_depth);
}
mod properties {
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
fn entropy_in_range(data in proptest::collection::vec(any::<u8>(), 0..500)) {
let e = entropy_bits_per_byte(&data);
prop_assert!(e >= 0.0, "entropy must be >= 0: {e}");
prop_assert!(e <= 8.0, "entropy must be <= 8: {e}");
}
#[test]
fn hash_length_always_64(data in proptest::collection::vec(any::<u8>(), 0..500)) {
let h = hash_bytes(&data);
prop_assert_eq!(h.len(), 64, "hash must be 64 hex chars");
}
#[test]
fn hash_is_hex_only(data in proptest::collection::vec(any::<u8>(), 0..200)) {
let h = hash_bytes(&data);
prop_assert!(h.chars().all(|c| c.is_ascii_hexdigit()), "hash must be hex: {h}");
}
#[test]
fn hash_deterministic(data in proptest::collection::vec(any::<u8>(), 0..200)) {
let h1 = hash_bytes(&data);
let h2 = hash_bytes(&data);
prop_assert_eq!(h1, h2, "hash must be deterministic");
}
#[test]
fn entropy_uniform_single_byte_zero(byte in any::<u8>()) {
let data = vec![byte; 100];
let e = entropy_bits_per_byte(&data);
prop_assert!((e - 0.0).abs() < f32::EPSILON, "single-byte uniform = 0: {e}");
}
#[test]
fn count_tags_non_negative(text in "[a-zA-Z ]{0,100}") {
let tags = count_tags(&text, &["TODO", "FIXME"]);
prop_assert!(tags.len() <= 2);
}
#[test]
fn is_text_like_consistent(data in proptest::collection::vec(any::<u8>(), 0..100)) {
let r1 = is_text_like(&data);
let r2 = is_text_like(&data);
prop_assert_eq!(r1, r2, "is_text_like must be deterministic");
}
#[test]
fn read_head_never_exceeds_max(len in 1usize..500, max in 1usize..500) {
let dir = tmp();
let data = vec![b'x'; len];
let path = write_file(&dir, "f.txt", &data);
let head = read_head(&path, max).unwrap();
prop_assert!(head.len() <= max, "head {} > max {}", head.len(), max);
prop_assert!(head.len() <= len, "head {} > file len {}", head.len(), len);
}
#[test]
fn read_head_tail_never_exceeds_max(len in 1usize..500, max in 1usize..500) {
let dir = tmp();
let data = vec![b'y'; len];
let path = write_file(&dir, "f.txt", &data);
let result = read_head_tail(&path, max).unwrap();
prop_assert!(result.len() <= max, "head_tail {} > max {}", result.len(), max);
}
}
}