use crate::content::complexity::{
analyze_functions, analyze_nesting_depth, estimate_cognitive_complexity,
estimate_cyclomatic_complexity,
};
use crate::content::io::{
count_tags, entropy_bits_per_byte, hash_bytes, hash_file, is_text_like, read_head,
read_head_tail, read_lines, read_text_capped,
};
use std::fs::File;
use std::io::Write;
mod entropy {
use super::*;
#[test]
fn scenario_empty_input_yields_zero_entropy() {
let bytes: &[u8] = &[];
let entropy = entropy_bits_per_byte(bytes);
assert_eq!(entropy, 0.0);
}
#[test]
fn scenario_single_repeated_byte_yields_zero_entropy() {
let bytes = vec![0xAA; 1000];
let entropy = entropy_bits_per_byte(&bytes);
assert!(entropy.abs() < 1e-6, "got {entropy}");
}
#[test]
fn scenario_two_equally_distributed_values_yield_one_bit() {
let bytes: Vec<u8> = (0..1000).map(|i| (i % 2) as u8).collect();
let entropy = entropy_bits_per_byte(&bytes);
assert!(
(entropy - 1.0).abs() < 0.01,
"expected ~1.0 bit, got {entropy}"
);
}
#[test]
fn scenario_four_equally_distributed_values_yield_two_bits() {
let bytes: Vec<u8> = (0..1000).map(|i| (i % 4) as u8).collect();
let entropy = entropy_bits_per_byte(&bytes);
assert!(
(entropy - 2.0).abs() < 0.01,
"expected ~2.0 bits, got {entropy}"
);
}
#[test]
fn scenario_full_byte_range_yields_eight_bits() {
let bytes: Vec<u8> = (0u8..=255).collect();
let entropy = entropy_bits_per_byte(&bytes);
assert!(
(entropy - 8.0).abs() < 0.01,
"expected ~8.0 bits, got {entropy}"
);
}
#[test]
fn scenario_skewed_distribution_yields_low_entropy() {
let mut bytes = vec![0x00; 990];
bytes.extend(vec![0xFF; 10]);
let entropy = entropy_bits_per_byte(&bytes);
assert!(entropy < 0.15, "expected low entropy, got {entropy}");
}
#[test]
fn scenario_natural_text_has_moderate_entropy() {
let text = b"The quick brown fox jumps over the lazy dog. \
This is a sample of natural language text that \
should have moderate Shannon entropy.";
let entropy = entropy_bits_per_byte(text);
assert!(
entropy > 3.0 && entropy < 5.5,
"expected 3-5.5 bits for English text, got {entropy}"
);
}
#[test]
fn scenario_single_byte_input_yields_zero_entropy() {
let bytes: &[u8] = &[42];
let entropy = entropy_bits_per_byte(bytes);
assert!(entropy.abs() < 1e-6, "got {entropy}");
}
#[test]
fn scenario_entropy_is_non_negative_for_any_input() {
for pattern in [
vec![0u8; 100],
vec![255u8; 100],
(0u8..=255).collect::<Vec<_>>(),
b"hello world".to_vec(),
] {
let entropy = entropy_bits_per_byte(&pattern);
assert!(entropy >= 0.0, "entropy should be non-negative: {entropy}");
}
}
}
mod hashing {
use super::*;
#[test]
fn scenario_hash_is_64_hex_characters() {
let content = b"deterministic hashing test";
let hash = hash_bytes(content);
assert_eq!(hash.len(), 64);
assert!(hash.chars().all(|c| c.is_ascii_hexdigit()));
}
#[test]
fn scenario_same_content_produces_same_hash() {
let content = b"reproducible";
let h1 = hash_bytes(content);
let h2 = hash_bytes(content);
assert_eq!(h1, h2);
}
#[test]
fn scenario_different_content_produces_different_hash() {
let a = b"alpha";
let b = b"bravo";
let ha = hash_bytes(a);
let hb = hash_bytes(b);
assert_ne!(ha, hb);
}
#[test]
fn scenario_empty_input_produces_valid_hash() {
let hash = hash_bytes(&[]);
assert_eq!(hash.len(), 64);
assert!(hash.chars().all(|c| c.is_ascii_hexdigit()));
}
#[test]
fn scenario_hash_file_matches_hash_bytes() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("hashtest.bin");
let content = b"file hash consistency check";
File::create(&path).unwrap().write_all(content).unwrap();
let file_hash = hash_file(&path, 10000).unwrap();
let bytes_hash = hash_bytes(content);
assert_eq!(file_hash, bytes_hash);
}
#[test]
fn scenario_hash_file_respects_max_bytes() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("truncated.bin");
File::create(&path)
.unwrap()
.write_all(b"abcdefghij")
.unwrap();
let truncated = hash_file(&path, 5).unwrap();
let full = hash_file(&path, 100).unwrap();
assert_eq!(truncated, hash_bytes(b"abcde"));
assert_ne!(truncated, full);
}
#[test]
fn scenario_one_bit_flip_changes_hash() {
let a = b"aaaa";
let mut b = *a;
b[0] ^= 0x01; let ha = hash_bytes(a);
let hb = hash_bytes(&b);
assert_ne!(ha, hb);
}
}
mod text_detection {
use super::*;
#[test]
fn scenario_empty_input_is_text_like() {
assert!(is_text_like(&[]));
}
#[test]
fn scenario_valid_ascii_is_text_like() {
let bytes = b"Hello, World! 123";
assert!(is_text_like(bytes));
}
#[test]
fn scenario_valid_utf8_is_text_like() {
let bytes = "こんにちは世界 🌍".as_bytes();
assert!(is_text_like(bytes));
}
#[test]
fn scenario_null_byte_makes_it_not_text_like() {
let bytes = b"hello\x00world";
assert!(!is_text_like(bytes));
}
#[test]
fn scenario_pure_binary_with_nulls_not_text_like() {
let bytes: Vec<u8> = vec![0x00, 0xFF, 0x00, 0xFE, 0x00];
assert!(!is_text_like(&bytes));
}
#[test]
fn scenario_invalid_utf8_without_nulls_not_text_like() {
let bytes: &[u8] = &[0xFF, 0xFE, 0xFD];
assert!(!is_text_like(bytes));
}
#[test]
fn scenario_newlines_and_tabs_are_text_like() {
let bytes = b"line1\nline2\ttabbed\r\nwindows";
assert!(is_text_like(bytes));
}
}
mod tag_counting {
use super::*;
#[test]
fn scenario_counts_todo_tags() {
let text = "// TODO: fix this\n// TODO: also this\nlet x = 1;";
let result = count_tags(text, &["TODO"]);
assert_eq!(result, vec![("TODO".to_string(), 2)]);
}
#[test]
fn scenario_case_insensitive_matching() {
let text = "todo Todo TODO tOdO";
let result = count_tags(text, &["TODO"]);
assert_eq!(result[0].1, 4);
}
#[test]
fn scenario_multiple_tags_counted_independently() {
let text = "// TODO: implement\n// FIXME: broken\n// TODO: refactor";
let result = count_tags(text, &["TODO", "FIXME"]);
assert_eq!(result.len(), 2);
assert_eq!(result[0], ("TODO".to_string(), 2));
assert_eq!(result[1], ("FIXME".to_string(), 1));
}
#[test]
fn scenario_empty_text_yields_zero_counts() {
let result = count_tags("", &["TODO", "FIXME", "HACK"]);
for (_, count) in &result {
assert_eq!(*count, 0);
}
assert_eq!(result.len(), 3);
}
#[test]
fn scenario_no_tags_yields_empty_results() {
let result = count_tags("some text here", &[]);
assert!(result.is_empty());
}
#[test]
fn scenario_tag_not_present_yields_zero() {
let result = count_tags("no markers here", &["TODO"]);
assert_eq!(result[0].1, 0);
}
#[test]
fn scenario_preserves_tag_order_in_results() {
let tags = &["FIXME", "TODO", "HACK", "NOTE"];
let result = count_tags("TODO FIXME", tags);
assert_eq!(result[0].0, "FIXME");
assert_eq!(result[1].0, "TODO");
assert_eq!(result[2].0, "HACK");
assert_eq!(result[3].0, "NOTE");
}
}
mod file_reading {
use super::*;
#[test]
fn scenario_read_head_returns_first_n_bytes() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("head.txt");
File::create(&path)
.unwrap()
.write_all(b"abcdefghij")
.unwrap();
let bytes = read_head(&path, 5).unwrap();
assert_eq!(bytes, b"abcde");
}
#[test]
fn scenario_read_head_tail_captures_boundaries() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("ht.txt");
File::create(&path)
.unwrap()
.write_all(b"0123456789")
.unwrap();
let bytes = read_head_tail(&path, 6).unwrap();
assert_eq!(bytes, b"012789");
}
#[test]
fn scenario_read_head_tail_zero_bytes_returns_empty() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("zero.txt");
File::create(&path).unwrap().write_all(b"content").unwrap();
let bytes = read_head_tail(&path, 0).unwrap();
assert!(bytes.is_empty());
}
#[test]
fn scenario_read_head_tail_file_smaller_than_limit() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("small.txt");
File::create(&path).unwrap().write_all(b"hello").unwrap();
let bytes = read_head_tail(&path, 100).unwrap();
assert_eq!(bytes, b"hello");
}
#[test]
fn scenario_read_lines_limits_by_count() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("lines.txt");
let mut f = File::create(&path).unwrap();
for i in 0..20 {
writeln!(f, "line {i}").unwrap();
}
let lines = read_lines(&path, 5, usize::MAX).unwrap();
assert_eq!(lines.len(), 5);
assert_eq!(lines[0], "line 0");
assert_eq!(lines[4], "line 4");
}
#[test]
fn scenario_read_text_capped_handles_truncation_gracefully() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("utf8.txt");
File::create(&path)
.unwrap()
.write_all("Hello 🌍 World".as_bytes())
.unwrap();
let text = read_text_capped(&path, 7).unwrap();
assert!(!text.is_empty());
assert!(text.starts_with("Hello "));
}
}
mod complexity_scenarios {
use super::*;
#[test]
fn scenario_empty_code_yields_zero_functions() {
let metrics = analyze_functions("", "rust");
assert_eq!(metrics.function_count, 0);
assert_eq!(metrics.max_function_length, 0);
assert_eq!(metrics.avg_function_length, 0.0);
}
#[test]
fn scenario_single_rust_function_detected() {
let code = "fn greet() {\n println!(\"hi\");\n}\n";
let metrics = analyze_functions(code, "rust");
assert_eq!(metrics.function_count, 1);
assert_eq!(metrics.max_function_length, 3);
}
#[test]
fn scenario_python_def_detected() {
let code = "def greet():\n print('hi')\n print('bye')\n";
let metrics = analyze_functions(code, "python");
assert_eq!(metrics.function_count, 1);
}
#[test]
fn scenario_javascript_function_detected() {
let code = "function greet() {\n console.log('hi');\n}\n";
let metrics = analyze_functions(code, "javascript");
assert_eq!(metrics.function_count, 1);
}
#[test]
fn scenario_go_func_detected() {
let code = "func main() {\n fmt.Println(\"hello\")\n}\n";
let metrics = analyze_functions(code, "go");
assert_eq!(metrics.function_count, 1);
}
#[test]
fn scenario_unsupported_language_yields_zero() {
let code = "some code here";
let metrics = analyze_functions(code, "brainfuck");
assert_eq!(metrics.function_count, 0);
}
#[test]
fn scenario_cyclomatic_complexity_simple_function() {
let code = "fn simple() {\n println!(\"hello\");\n}\n";
let result = estimate_cyclomatic_complexity(code, "rust");
assert_eq!(result.function_count, 1);
assert_eq!(result.max_cc, 1);
}
#[test]
fn scenario_cyclomatic_complexity_with_branch() {
let code = r#"fn check(x: i32) {
if x > 0 {
println!("positive");
} else {
println!("non-positive");
}
}
"#;
let result = estimate_cyclomatic_complexity(code, "rust");
assert_eq!(result.function_count, 1);
assert!(
result.max_cc >= 2,
"expected CC >= 2, got {}",
result.max_cc
);
}
#[test]
fn scenario_cyclomatic_complexity_empty_code() {
let result = estimate_cyclomatic_complexity("", "rust");
assert_eq!(result.function_count, 0);
assert_eq!(result.total_cc, 0);
assert_eq!(result.max_cc, 0);
}
#[test]
fn scenario_cognitive_complexity_nested_ifs() {
let code = r#"fn nested(x: i32) {
if x > 0 {
if x > 10 {
if x > 100 {
println!("big");
}
}
}
}
"#;
let result = estimate_cognitive_complexity(code, "rust");
assert_eq!(result.function_count, 1);
assert!(
result.max >= 3,
"expected high cognitive complexity for nested ifs, got {}",
result.max
);
}
#[test]
fn scenario_nesting_depth_increases_with_braces() {
let code = r#"fn deep() {
if true {
for i in 0..10 {
if i > 5 {
println!("{}", i);
}
}
}
}
"#;
let result = analyze_nesting_depth(code, "rust");
assert!(
result.max_depth >= 4,
"expected depth >= 4, got {}",
result.max_depth
);
}
#[test]
fn scenario_nesting_analysis_empty_code() {
let result = analyze_nesting_depth("", "rust");
assert_eq!(result.max_depth, 0);
assert_eq!(result.avg_depth, 0.0);
}
#[test]
fn scenario_python_nesting_by_indentation() {
let code = "def f():\n if True:\n for x in range(10):\n print(x)\n";
let result = analyze_nesting_depth(code, "python");
assert!(
result.max_depth >= 2,
"expected depth >= 2, got {}",
result.max_depth
);
}
#[test]
fn scenario_multiple_functions_avg_length() {
let code = r#"fn short() {
println!("hi");
}
fn longer() {
let a = 1;
let b = 2;
let c = 3;
let d = 4;
println!("{}", a + b + c + d);
}
"#;
let metrics = analyze_functions(code, "rust");
assert_eq!(metrics.function_count, 2);
assert!(metrics.avg_function_length > 2.0);
assert!(metrics.avg_function_length < metrics.max_function_length as f64);
}
#[test]
fn scenario_high_complexity_function_flagged() {
let mut code = String::from("fn branchy(x: i32) {\n");
for i in 0..12 {
code.push_str(&format!(" if x == {i} {{ println!(\"{i}\"); }}\n"));
}
code.push_str("}\n");
let result = estimate_cyclomatic_complexity(&code, "rust");
assert_eq!(result.function_count, 1);
assert!(
result.max_cc > 10,
"expected CC > 10, got {}",
result.max_cc
);
assert!(
!result.high_complexity_functions.is_empty(),
"expected high complexity flag"
);
}
}
mod entropy_edge_cases {
use super::*;
#[test]
fn scenario_entropy_all_0xff_bytes_yields_zero() {
let bytes = vec![0xFF; 500];
let entropy = entropy_bits_per_byte(&bytes);
assert!(entropy.abs() < 1e-6, "got {entropy}");
}
#[test]
fn scenario_entropy_increases_with_byte_diversity() {
let e1 = entropy_bits_per_byte(&vec![0u8; 256]);
let e2 = {
let mut buf = Vec::new();
for _ in 0..128 {
buf.push(0u8);
buf.push(1u8);
}
entropy_bits_per_byte(&buf)
};
let e4 = {
let mut buf = Vec::new();
for _ in 0..64 {
for b in 0u8..4 {
buf.push(b);
}
}
entropy_bits_per_byte(&buf)
};
assert!(
e1 < e2,
"1 value ({e1}) should have less entropy than 2 values ({e2})"
);
assert!(
e2 < e4,
"2 values ({e2}) should have less entropy than 4 values ({e4})"
);
}
#[test]
fn scenario_entropy_bounded_by_eight_bits() {
let bytes: Vec<u8> = (0..1000).map(|i| (i % 256) as u8).collect();
let entropy = entropy_bits_per_byte(&bytes);
assert!(
entropy <= 8.0 + 1e-6,
"entropy should be <= 8.0, got {entropy}"
);
}
}
mod tag_counting_edge_cases {
use super::*;
#[test]
fn scenario_count_tags_hack_note_xxx_detected() {
let text = "// HACK: workaround\n// NOTE: important\n// XXX: review this\n// HACK again";
let result = count_tags(text, &["HACK", "NOTE", "XXX"]);
assert_eq!(result[0], ("HACK".to_string(), 2));
assert_eq!(result[1], ("NOTE".to_string(), 1));
assert_eq!(result[2], ("XXX".to_string(), 1));
}
#[test]
fn scenario_count_tags_in_multiline_text() {
let text = "line 1: TODO\nline 2: nothing\nline 3: TODO and FIXME\nline 4: FIXME";
let result = count_tags(text, &["TODO", "FIXME"]);
assert_eq!(result[0], ("TODO".to_string(), 2));
assert_eq!(result[1], ("FIXME".to_string(), 2));
}
#[test]
fn scenario_count_tags_adjacent_occurrences() {
let text = "TODOTODOTODO";
let result = count_tags(text, &["TODO"]);
assert_eq!(result[0].1, 3);
}
}
mod file_reading_edge_cases {
use super::*;
#[test]
fn scenario_read_lines_empty_file_returns_no_lines() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("empty.txt");
File::create(&path).unwrap();
let lines = read_lines(&path, 100, usize::MAX).unwrap();
assert!(lines.is_empty());
}
#[test]
fn scenario_read_head_missing_file_returns_error() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("nonexistent.txt");
let result = read_head(&path, 100);
assert!(result.is_err());
}
#[test]
fn scenario_read_head_tail_single_byte_max() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("data.txt");
File::create(&path).unwrap().write_all(b"ABCDEFGH").unwrap();
let bytes = read_head_tail(&path, 1).unwrap();
assert_eq!(bytes.len(), 1);
assert_eq!(bytes[0], b'A');
}
}
mod hashing_edge_cases {
use super::*;
#[test]
fn scenario_hash_file_empty_file_matches_empty_bytes() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("empty.bin");
File::create(&path).unwrap();
let file_hash = hash_file(&path, 1000).unwrap();
let bytes_hash = hash_bytes(&[]);
assert_eq!(file_hash, bytes_hash);
}
#[test]
fn scenario_hash_bytes_single_byte_is_valid_hex() {
let hash = hash_bytes(&[42]);
assert_eq!(hash.len(), 64);
assert!(hash.chars().all(|c| c.is_ascii_hexdigit()));
}
}
mod text_detection_edge_cases {
use super::*;
#[test]
fn scenario_high_bytes_without_null_detected_as_binary() {
let bytes: Vec<u8> = vec![0x80, 0x81, 0x82, 0xFE, 0xFF];
assert!(!is_text_like(&bytes));
}
#[test]
fn scenario_latin1_superset_without_null_not_text_like() {
let bytes: &[u8] = &[0xC0, 0xC1, 0xF5, 0xF6];
assert!(!is_text_like(bytes));
}
}