use crate::topics::build_topic_clouds;
use tokmd_types::{ChildIncludeMode, ExportData, FileKind, FileRow};
fn make_export(files: &[(&str, &str, &str, usize)]) -> ExportData {
let rows = files
.iter()
.map(|(path, module, lang, tokens)| FileRow {
path: path.to_string(),
module: module.to_string(),
lang: lang.to_string(),
kind: FileKind::Parent,
code: 50,
comments: 5,
blanks: 5,
lines: 60,
bytes: 500,
tokens: *tokens,
})
.collect();
ExportData {
rows,
module_roots: vec![],
module_depth: 2,
children: ChildIncludeMode::Separate,
}
}
#[test]
fn empty_export_yields_empty_topics() {
let export = make_export(&[]);
let clouds = build_topic_clouds(&export);
assert!(clouds.overall.is_empty());
assert!(clouds.per_module.is_empty());
}
#[test]
fn topics_from_path_tokens() {
let export = make_export(&[
("crates/auth/login.rs", "crates/auth", "Rust", 100),
("crates/auth/token.rs", "crates/auth", "Rust", 100),
]);
let clouds = build_topic_clouds(&export);
let auth = clouds.per_module.get("crates/auth").unwrap();
let terms: Vec<&str> = auth.iter().map(|t| t.term.as_str()).collect();
assert!(terms.contains(&"login"), "expected 'login' in {terms:?}");
assert!(terms.contains(&"token"), "expected 'token' in {terms:?}");
}
#[test]
fn overall_topics_are_populated() {
let export = make_export(&[
("crates/auth/login.rs", "crates/auth", "Rust", 100),
("crates/payments/stripe.rs", "crates/payments", "Rust", 100),
]);
let clouds = build_topic_clouds(&export);
assert!(!clouds.overall.is_empty());
}
#[test]
fn file_extensions_are_stopwords() {
let export = make_export(&[("crates/auth/login.rs", "crates/auth", "Rust", 100)]);
let clouds = build_topic_clouds(&export);
let all_terms: Vec<&str> = clouds.overall.iter().map(|t| t.term.as_str()).collect();
assert!(!all_terms.contains(&"rs"), "'rs' should be a stopword");
}
#[test]
fn common_dirs_are_stopwords() {
let export = make_export(&[("src/lib/utils/helpers.rs", "src/lib", "Rust", 100)]);
let clouds = build_topic_clouds(&export);
let all_terms: Vec<&str> = clouds.overall.iter().map(|t| t.term.as_str()).collect();
assert!(!all_terms.contains(&"src"), "'src' should be a stopword");
assert!(!all_terms.contains(&"lib"), "'lib' should be a stopword");
}
#[test]
fn topics_are_grouped_by_module() {
let export = make_export(&[
("crates/auth/login.rs", "crates/auth", "Rust", 100),
("crates/payments/invoice.rs", "crates/payments", "Rust", 100),
]);
let clouds = build_topic_clouds(&export);
assert!(clouds.per_module.contains_key("crates/auth"));
assert!(clouds.per_module.contains_key("crates/payments"));
}
#[test]
fn module_topics_contain_relevant_terms() {
let export = make_export(&[
(
"crates/payments/stripe_api.rs",
"crates/payments",
"Rust",
100,
),
("crates/payments/refund.rs", "crates/payments", "Rust", 100),
]);
let clouds = build_topic_clouds(&export);
let payments = clouds.per_module.get("crates/payments").unwrap();
let terms: Vec<&str> = payments.iter().map(|t| t.term.as_str()).collect();
assert!(terms.contains(&"stripe"), "expected 'stripe' in {terms:?}");
assert!(terms.contains(&"refund"), "expected 'refund' in {terms:?}");
}
#[test]
fn topic_clouds_are_deterministic() {
let export = make_export(&[
("crates/auth/login.rs", "crates/auth", "Rust", 100),
("crates/auth/token.rs", "crates/auth", "Rust", 100),
("crates/payments/stripe.rs", "crates/payments", "Rust", 100),
]);
let c1 = build_topic_clouds(&export);
let c2 = build_topic_clouds(&export);
assert_eq!(c1.overall.len(), c2.overall.len());
for (a, b) in c1.overall.iter().zip(c2.overall.iter()) {
assert_eq!(a.term, b.term);
assert_eq!(a.tf, b.tf);
assert_eq!(a.df, b.df);
}
}
#[test]
fn topic_terms_have_positive_scores() {
let export = make_export(&[("crates/auth/login.rs", "crates/auth", "Rust", 100)]);
let clouds = build_topic_clouds(&export);
for term in &clouds.overall {
assert!(term.score > 0.0, "score should be positive: {:?}", term);
assert!(term.tf > 0, "tf should be > 0");
}
}
#[test]
fn top_k_limits_per_module_terms() {
let files: Vec<(&str, &str, &str, usize)> = (0..20)
.map(|i| {
let path: &'static str = Box::leak(format!("mod/file_{i}.rs").into_boxed_str());
(path, "mod", "Rust", 100usize)
})
.collect();
let export = make_export(&files);
let clouds = build_topic_clouds(&export);
if let Some(topics) = clouds.per_module.get("mod") {
assert!(topics.len() <= 8, "should be capped at top-K=8");
}
}