use std::path::PathBuf;
use crate::content::{
ContentLimits, ImportGranularity, build_duplicate_report, build_import_report,
build_todo_report,
};
use tempfile::TempDir;
use tokmd_types::{ChildIncludeMode, ExportData, FileKind, FileRow};
fn file_row(path: &str, module: &str, lang: &str, bytes: usize) -> FileRow {
FileRow {
path: path.to_string(),
module: module.to_string(),
lang: lang.to_string(),
kind: FileKind::Parent,
code: 10,
comments: 2,
blanks: 1,
lines: 13,
bytes,
tokens: 80,
}
}
fn export(rows: Vec<FileRow>) -> ExportData {
ExportData {
rows,
module_roots: vec![],
module_depth: 1,
children: ChildIncludeMode::Separate,
}
}
fn write_file(dir: &std::path::Path, rel: &str, content: &[u8]) -> PathBuf {
let full = dir.join(rel);
if let Some(parent) = full.parent() {
std::fs::create_dir_all(parent).unwrap();
}
std::fs::write(&full, content).unwrap();
PathBuf::from(rel)
}
#[test]
fn todo_empty_files() {
let tmp = TempDir::new().unwrap();
let report = build_todo_report(tmp.path(), &[], &ContentLimits::default(), 1000).unwrap();
assert_eq!(report.total, 0);
assert!(report.tags.is_empty());
}
#[test]
fn todo_no_tags_in_file() {
let tmp = TempDir::new().unwrap();
let rel = write_file(
tmp.path(),
"clean.rs",
b"fn main() { println!(\"hello\"); }\n",
);
let report = build_todo_report(tmp.path(), &[rel], &ContentLimits::default(), 1000).unwrap();
assert_eq!(report.total, 0);
}
#[test]
fn todo_all_four_tags() {
let tmp = TempDir::new().unwrap();
let content = "// TODO: a\n// FIXME: b\n// HACK: c\n// XXX: d\n";
let rel = write_file(tmp.path(), "tags.rs", content.as_bytes());
let report = build_todo_report(tmp.path(), &[rel], &ContentLimits::default(), 1000).unwrap();
assert_eq!(report.total, 4);
let tags: std::collections::BTreeMap<String, usize> = report
.tags
.iter()
.map(|t| (t.tag.clone(), t.count))
.collect();
assert_eq!(tags["TODO"], 1);
assert_eq!(tags["FIXME"], 1);
assert_eq!(tags["HACK"], 1);
assert_eq!(tags["XXX"], 1);
}
#[test]
fn todo_multiple_same_tag() {
let tmp = TempDir::new().unwrap();
let content = "// TODO: first\n// TODO: second\n// TODO: third\nfn f() {}\n";
let rel = write_file(tmp.path(), "multi.rs", content.as_bytes());
let report = build_todo_report(tmp.path(), &[rel], &ContentLimits::default(), 1000).unwrap();
assert_eq!(report.total, 3);
}
#[test]
fn todo_density_per_kloc() {
let tmp = TempDir::new().unwrap();
let content = "// TODO: one\n// TODO: two\n";
let rel = write_file(tmp.path(), "d.rs", content.as_bytes());
let report = build_todo_report(tmp.path(), &[rel], &ContentLimits::default(), 2000).unwrap();
assert_eq!(report.density_per_kloc, 1.0);
}
#[test]
fn todo_density_zero_code() {
let tmp = TempDir::new().unwrap();
let rel = write_file(tmp.path(), "empty.rs", b"// TODO: x\n");
let report = build_todo_report(tmp.path(), &[rel], &ContentLimits::default(), 0).unwrap();
assert_eq!(report.density_per_kloc, 0.0);
}
#[test]
fn todo_multiple_files() {
let tmp = TempDir::new().unwrap();
let f1 = write_file(tmp.path(), "a.rs", b"// TODO: a\n");
let f2 = write_file(tmp.path(), "b.rs", b"// FIXME: b\n// TODO: c\n");
let report = build_todo_report(tmp.path(), &[f1, f2], &ContentLimits::default(), 1000).unwrap();
assert_eq!(report.total, 3);
}
#[test]
fn todo_serialization_roundtrip() {
let tmp = TempDir::new().unwrap();
let rel = write_file(tmp.path(), "t.rs", b"// TODO: ser\n// FIXME: rt\n");
let report = build_todo_report(tmp.path(), &[rel], &ContentLimits::default(), 5000).unwrap();
let json = serde_json::to_string(&report).unwrap();
let deser: tokmd_analysis_types::TodoReport = serde_json::from_str(&json).unwrap();
assert_eq!(deser.total, report.total);
assert_eq!(deser.tags.len(), report.tags.len());
assert_eq!(deser.density_per_kloc, report.density_per_kloc);
}
#[test]
fn dup_empty_files() {
let tmp = TempDir::new().unwrap();
let exp = export(vec![]);
let report = build_duplicate_report(tmp.path(), &[], &exp, &ContentLimits::default()).unwrap();
assert_eq!(report.wasted_bytes, 0);
assert!(report.groups.is_empty());
assert_eq!(report.strategy, "exact-blake3");
}
#[test]
fn dup_two_identical_files() {
let tmp = TempDir::new().unwrap();
let content = b"identical content here\n";
let f1 = write_file(tmp.path(), "a.rs", content);
let f2 = write_file(tmp.path(), "b.rs", content);
let exp = export(vec![
file_row("a.rs", "src", "Rust", content.len()),
file_row("b.rs", "src", "Rust", content.len()),
]);
let report =
build_duplicate_report(tmp.path(), &[f1, f2], &exp, &ContentLimits::default()).unwrap();
assert_eq!(report.groups.len(), 1);
assert_eq!(report.groups[0].files.len(), 2);
assert_eq!(report.wasted_bytes, content.len() as u64);
}
#[test]
fn dup_different_files() {
let tmp = TempDir::new().unwrap();
let f1 = write_file(tmp.path(), "a.rs", b"content a\n");
let f2 = write_file(tmp.path(), "b.rs", b"content b\n");
let exp = export(vec![
file_row("a.rs", "src", "Rust", 10),
file_row("b.rs", "src", "Rust", 10),
]);
let report =
build_duplicate_report(tmp.path(), &[f1, f2], &exp, &ContentLimits::default()).unwrap();
assert!(report.groups.is_empty());
}
#[test]
fn dup_three_identical_files() {
let tmp = TempDir::new().unwrap();
let content = b"same content\n";
let f1 = write_file(tmp.path(), "x.rs", content);
let f2 = write_file(tmp.path(), "y.rs", content);
let f3 = write_file(tmp.path(), "z.rs", content);
let exp = export(vec![
file_row("x.rs", "m", "Rust", content.len()),
file_row("y.rs", "m", "Rust", content.len()),
file_row("z.rs", "m", "Rust", content.len()),
]);
let report =
build_duplicate_report(tmp.path(), &[f1, f2, f3], &exp, &ContentLimits::default()).unwrap();
assert_eq!(report.groups.len(), 1);
assert_eq!(report.groups[0].files.len(), 3);
assert_eq!(report.wasted_bytes, 2 * content.len() as u64);
}
#[test]
fn dup_empty_files_skipped() {
let tmp = TempDir::new().unwrap();
let f1 = write_file(tmp.path(), "empty1.rs", b"");
let f2 = write_file(tmp.path(), "empty2.rs", b"");
let exp = export(vec![
file_row("empty1.rs", "m", "Rust", 0),
file_row("empty2.rs", "m", "Rust", 0),
]);
let report =
build_duplicate_report(tmp.path(), &[f1, f2], &exp, &ContentLimits::default()).unwrap();
assert!(report.groups.is_empty());
}
#[test]
fn dup_groups_sorted_by_bytes_desc() {
let tmp = TempDir::new().unwrap();
let small = b"sm\n";
let large = b"this is a much larger content block!\n";
let f1 = write_file(tmp.path(), "s1.rs", small);
let f2 = write_file(tmp.path(), "s2.rs", small);
let f3 = write_file(tmp.path(), "l1.rs", large);
let f4 = write_file(tmp.path(), "l2.rs", large);
let exp = export(vec![
file_row("s1.rs", "m", "Rust", small.len()),
file_row("s2.rs", "m", "Rust", small.len()),
file_row("l1.rs", "m", "Rust", large.len()),
file_row("l2.rs", "m", "Rust", large.len()),
]);
let report = build_duplicate_report(
tmp.path(),
&[f1, f2, f3, f4],
&exp,
&ContentLimits::default(),
)
.unwrap();
assert_eq!(report.groups.len(), 2);
assert!(report.groups[0].bytes >= report.groups[1].bytes);
}
#[test]
fn dup_serialization_roundtrip() {
let tmp = TempDir::new().unwrap();
let content = b"dup content\n";
let f1 = write_file(tmp.path(), "a.rs", content);
let f2 = write_file(tmp.path(), "b.rs", content);
let exp = export(vec![
file_row("a.rs", "m", "Rust", content.len()),
file_row("b.rs", "m", "Rust", content.len()),
]);
let report =
build_duplicate_report(tmp.path(), &[f1, f2], &exp, &ContentLimits::default()).unwrap();
let json = serde_json::to_string(&report).unwrap();
let deser: tokmd_analysis_types::DuplicateReport = serde_json::from_str(&json).unwrap();
assert_eq!(deser.groups.len(), report.groups.len());
assert_eq!(deser.wasted_bytes, report.wasted_bytes);
assert_eq!(deser.strategy, "exact-blake3");
}
#[test]
fn dup_density_report() {
let tmp = TempDir::new().unwrap();
let content = b"duplicate block\n";
let f1 = write_file(tmp.path(), "a.rs", content);
let f2 = write_file(tmp.path(), "b.rs", content);
let exp = export(vec![
file_row("a.rs", "mod", "Rust", content.len()),
file_row("b.rs", "mod", "Rust", content.len()),
]);
let report =
build_duplicate_report(tmp.path(), &[f1, f2], &exp, &ContentLimits::default()).unwrap();
let density = report.density.as_ref().expect("density present");
assert_eq!(density.duplicate_groups, 1);
assert_eq!(density.duplicate_files, 2);
assert!(density.wasted_pct_of_codebase > 0.0);
assert!(density.wasted_pct_of_codebase <= 1.0);
}
#[test]
fn import_empty_files() {
let tmp = TempDir::new().unwrap();
let exp = export(vec![]);
let report = build_import_report(
tmp.path(),
&[],
&exp,
ImportGranularity::Module,
&ContentLimits::default(),
)
.unwrap();
assert!(report.edges.is_empty());
assert_eq!(report.granularity, "module");
}
#[test]
fn import_rust_use_statement() {
let tmp = TempDir::new().unwrap();
let content = b"use std::collections::HashMap;\nfn main() {}\n";
let rel = write_file(tmp.path(), "main.rs", content);
let exp = export(vec![file_row("main.rs", "root", "Rust", content.len())]);
let report = build_import_report(
tmp.path(),
&[rel],
&exp,
ImportGranularity::Module,
&ContentLimits::default(),
)
.unwrap();
assert!(!report.edges.is_empty());
}
#[test]
fn import_file_granularity() {
let tmp = TempDir::new().unwrap();
let exp = export(vec![]);
let report = build_import_report(
tmp.path(),
&[],
&exp,
ImportGranularity::File,
&ContentLimits::default(),
)
.unwrap();
assert_eq!(report.granularity, "file");
}
#[test]
fn import_serialization_roundtrip() {
let tmp = TempDir::new().unwrap();
let content = b"use serde::Serialize;\nuse anyhow::Result;\n";
let rel = write_file(tmp.path(), "lib.rs", content);
let exp = export(vec![file_row("lib.rs", "core", "Rust", content.len())]);
let report = build_import_report(
tmp.path(),
&[rel],
&exp,
ImportGranularity::Module,
&ContentLimits::default(),
)
.unwrap();
let json = serde_json::to_string(&report).unwrap();
let deser: tokmd_analysis_types::ImportReport = serde_json::from_str(&json).unwrap();
assert_eq!(deser.granularity, report.granularity);
assert_eq!(deser.edges.len(), report.edges.len());
}
#[test]
fn import_edges_sorted_by_count() {
let tmp = TempDir::new().unwrap();
let content = b"use std::io;\nuse std::io;\nuse std::collections::HashMap;\n";
let rel = write_file(tmp.path(), "multi.rs", content);
let exp = export(vec![file_row("multi.rs", "root", "Rust", content.len())]);
let report = build_import_report(
tmp.path(),
&[rel],
&exp,
ImportGranularity::Module,
&ContentLimits::default(),
)
.unwrap();
if report.edges.len() >= 2 {
assert!(report.edges[0].count >= report.edges[1].count);
}
}
#[test]
fn import_unsupported_language() {
let tmp = TempDir::new().unwrap();
let content = b"some assembly code\n";
let rel = write_file(tmp.path(), "code.asm", content);
let exp = export(vec![file_row(
"code.asm",
"root",
"Assembly",
content.len(),
)]);
let report = build_import_report(
tmp.path(),
&[rel],
&exp,
ImportGranularity::Module,
&ContentLimits::default(),
)
.unwrap();
assert!(report.edges.is_empty());
}
#[test]
fn todo_respects_per_file_limit() {
let tmp = TempDir::new().unwrap();
let mut content = String::new();
for _ in 0..200 {
content.push_str("// normal line\n");
}
content.push_str("// TODO: should not be found\n");
let rel = write_file(tmp.path(), "big.rs", content.as_bytes());
let limits = ContentLimits {
max_bytes: None,
max_file_bytes: Some(100), };
let report = build_todo_report(tmp.path(), &[rel], &limits, 1000).unwrap();
assert_eq!(report.total, 0);
}
#[test]
fn todo_respects_total_byte_limit() {
let tmp = TempDir::new().unwrap();
let big_content = "// TODO: first file\n".repeat(10); let f1 = write_file(tmp.path(), "a.rs", big_content.as_bytes());
let f2 = write_file(tmp.path(), "b.rs", b"// TODO: second file\n");
let limits = ContentLimits {
max_bytes: Some(big_content.len() as u64), max_file_bytes: None,
};
let report = build_todo_report(tmp.path(), &[f1, f2], &limits, 1000).unwrap();
assert_eq!(report.total, 10);
}
#[test]
fn dup_density_by_module() {
let tmp = TempDir::new().unwrap();
let content = b"same content in both\n";
let f1 = write_file(tmp.path(), "mod_a/x.rs", content);
let f2 = write_file(tmp.path(), "mod_b/y.rs", content);
let exp = export(vec![
file_row("mod_a/x.rs", "mod_a", "Rust", content.len()),
file_row("mod_b/y.rs", "mod_b", "Rust", content.len()),
]);
let report =
build_duplicate_report(tmp.path(), &[f1, f2], &exp, &ContentLimits::default()).unwrap();
let density = report.density.as_ref().unwrap();
assert!(!density.by_module.is_empty());
}
#[test]
fn dup_excludes_child_rows() {
let tmp = TempDir::new().unwrap();
let content = b"child content\n";
let f1 = write_file(tmp.path(), "a.rs", content);
let f2 = write_file(tmp.path(), "b.rs", content);
let mut rows = vec![file_row("a.rs", "m", "Rust", content.len())];
rows.push(FileRow {
path: "b.rs".to_string(),
module: "m".to_string(),
lang: "Rust".to_string(),
kind: FileKind::Child,
code: 10,
comments: 0,
blanks: 0,
lines: 10,
bytes: content.len(),
tokens: 50,
});
let exp = export(rows);
let report =
build_duplicate_report(tmp.path(), &[f1, f2], &exp, &ContentLimits::default()).unwrap();
assert_eq!(report.groups.len(), 1);
}