use std::collections::BTreeMap;
use std::io::Write;
use std::path::Path;
use tokmd_config::{ContextStrategy, ValueMetric};
use tokmd_context_git::GitScores;
use tokmd_context_policy::{
assign_policy as assign_context_policy, classify_file as classify_context_file,
compute_file_cap as compute_context_file_cap, is_spine_file as matches_spine_file,
smart_exclude_reason,
};
use tokmd_path::normalize_slashes as normalize_path;
use tokmd_types::{
ContextFileRow, FileClassification, FileKind, FileRow, InclusionPolicy, PolicyExcludedFile,
SmartExcludedFile,
};
pub fn is_smart_excluded(path: &str) -> Option<&'static str> {
smart_exclude_reason(path)
}
const SPINE_BUDGET_FRACTION: f64 = 0.05;
const SPINE_BUDGET_CAP: usize = 5000;
fn is_spine_file(path: &str) -> bool {
matches_spine_file(path)
}
#[allow(dead_code)]
pub struct SelectOptions {
pub no_smart_exclude: bool,
pub max_file_pct: f64,
pub max_file_tokens: Option<usize>,
pub require_git_scores: bool,
pub dense_threshold: f64,
}
impl Default for SelectOptions {
fn default() -> Self {
Self {
no_smart_exclude: false,
max_file_pct: 0.15,
max_file_tokens: None,
require_git_scores: false,
dense_threshold: 50.0,
}
}
}
pub struct SelectResult {
pub selected: Vec<ContextFileRow>,
pub smart_excluded: Vec<SmartExcludedFile>,
pub excluded_by_policy: Vec<PolicyExcludedFile>,
pub rank_by_effective: String,
pub fallback_reason: Option<String>,
}
pub fn parse_budget(budget: &str) -> anyhow::Result<usize> {
let input = budget.trim().to_lowercase();
if input == "unlimited" || input == "max" {
return Ok(usize::MAX);
}
let (num_str, multiplier) = if let Some(num) = input.strip_suffix('k') {
(num.trim(), 1_000.0)
} else if let Some(num) = input.strip_suffix('m') {
(num.trim(), 1_000_000.0)
} else if let Some(num) = input.strip_suffix('g') {
(num.trim(), 1_000_000_000.0)
} else {
(input.as_str(), 1.0)
};
let n: f64 = num_str.parse().map_err(|_| {
anyhow::anyhow!(
"Invalid budget '{}': expected <number>[k|m|g] or 'unlimited' (examples: 128k, 1m, 1g, unlimited)",
budget.trim()
)
})?;
let result = n * multiplier;
if result > usize::MAX as f64 {
anyhow::bail!(
"Invalid budget '{}': value overflows (max is {})",
budget.trim(),
usize::MAX
);
}
Ok(result as usize)
}
fn get_value(row: &FileRow, metric: ValueMetric, git_scores: Option<&GitScores>) -> usize {
let path = normalize_path(&row.path);
match metric {
ValueMetric::Code => row.code,
ValueMetric::Tokens => row.tokens,
ValueMetric::Hotspot => git_scores
.and_then(|gs| gs.hotspots.get(&path).copied())
.unwrap_or(row.code),
ValueMetric::Churn => {
git_scores
.and_then(|gs| gs.commit_counts.get(&path).copied())
.map(|commits| commits * 1000 + row.code)
.unwrap_or(row.code)
}
}
}
pub fn classify_file(
path: &str,
tokens: usize,
lines: usize,
dense_threshold: f64,
) -> Vec<FileClassification> {
classify_context_file(path, tokens, lines, dense_threshold)
}
pub struct ResolvedMetric {
pub effective: ValueMetric,
pub fallback_reason: Option<String>,
}
pub fn resolve_metric(requested: ValueMetric, git_scores: Option<&GitScores>) -> ResolvedMetric {
match requested {
ValueMetric::Hotspot if git_scores.is_none() => ResolvedMetric {
effective: ValueMetric::Code,
fallback_reason: Some(
"hotspot requires git scores; falling back to code lines".to_string(),
),
},
ValueMetric::Churn if git_scores.is_none() => ResolvedMetric {
effective: ValueMetric::Code,
fallback_reason: Some(
"churn requires git scores; falling back to code lines".to_string(),
),
},
_ => ResolvedMetric {
effective: requested,
fallback_reason: None,
},
}
}
pub fn compute_file_cap(budget: usize, options: &SelectOptions) -> usize {
compute_context_file_cap(budget, options.max_file_pct, options.max_file_tokens)
}
pub fn assign_policy(
tokens: usize,
file_cap: usize,
classifications: &[FileClassification],
) -> (InclusionPolicy, Option<String>) {
assign_context_policy(tokens, file_cap, classifications)
}
pub fn write_head_tail<W: Write>(
w: &mut W,
path: &Path,
file: &ContextFileRow,
compress: bool,
) -> anyhow::Result<()> {
let content = std::fs::read_to_string(path)
.map_err(|e| anyhow::anyhow!("Failed to read {}: {}", path.display(), e))?;
let all_lines: Vec<&str> = content.lines().collect();
let total_lines = all_lines.len();
if total_lines == 0 {
return Ok(());
}
let eff = file.effective_tokens.unwrap_or(file.tokens);
let tpl = file.tokens as f64 / total_lines.max(1) as f64;
let target_lines = if tpl > 0.0 {
(eff as f64 / tpl).ceil() as usize
} else {
total_lines
};
if target_lines >= total_lines {
for line in &all_lines {
if compress && line.trim().is_empty() {
continue;
}
writeln!(w, "{line}")?;
}
return Ok(());
}
let head_count = (target_lines as f64 * 0.6).ceil() as usize;
let tail_count = target_lines.saturating_sub(head_count);
let omitted = total_lines.saturating_sub(head_count + tail_count);
for line in all_lines.iter().take(head_count) {
if compress && line.trim().is_empty() {
continue;
}
writeln!(w, "{line}")?;
}
if omitted > 0 {
writeln!(w, "// ... [{omitted} lines omitted] ...")?;
}
let tail_start = total_lines.saturating_sub(tail_count);
for line in all_lines.iter().skip(tail_start) {
if compress && line.trim().is_empty() {
continue;
}
writeln!(w, "{line}")?;
}
Ok(())
}
pub fn pack_greedy(
rows: &[FileRow],
budget: usize,
metric: ValueMetric,
git_scores: Option<&GitScores>,
) -> Vec<ContextFileRow> {
let mut candidates: Vec<_> = rows.iter().filter(|r| r.kind == FileKind::Parent).collect();
candidates.sort_by(|a, b| {
let va = get_value(a, metric, git_scores);
let vb = get_value(b, metric, git_scores);
vb.cmp(&va).then_with(|| a.path.cmp(&b.path))
});
let mut selected = Vec::new();
let mut used_tokens = 0;
for row in candidates {
if used_tokens + row.tokens <= budget {
used_tokens += row.tokens;
selected.push(to_context_row(row, metric, git_scores));
}
}
selected
}
pub fn pack_spread(
rows: &[FileRow],
budget: usize,
metric: ValueMetric,
git_scores: Option<&GitScores>,
) -> Vec<ContextFileRow> {
let parents: Vec<_> = rows.iter().filter(|r| r.kind == FileKind::Parent).collect();
let mut groups: BTreeMap<String, Vec<&FileRow>> = BTreeMap::new();
for row in &parents {
let key = row.module.clone();
groups.entry(key).or_default().push(row);
}
for group in groups.values_mut() {
group.sort_by(|a, b| {
let va = get_value(a, metric, git_scores);
let vb = get_value(b, metric, git_scores);
vb.cmp(&va).then_with(|| a.path.cmp(&b.path))
});
}
let mut selected = Vec::new();
let mut used_tokens = 0;
let spread_budget = (budget as f64 * 0.7) as usize;
let mut group_indices: BTreeMap<String, usize> = BTreeMap::new();
let mut made_progress = true;
while made_progress && used_tokens < spread_budget {
made_progress = false;
for (key, group) in &groups {
let idx = group_indices.entry(key.clone()).or_insert(0);
if *idx < group.len() {
let row = group[*idx];
if used_tokens + row.tokens <= spread_budget {
used_tokens += row.tokens;
selected.push(to_context_row(row, metric, git_scores));
*idx += 1;
made_progress = true;
} else {
*idx += 1; }
}
}
}
let mut remaining: Vec<_> = parents
.iter()
.filter(|r| !selected.iter().any(|s| s.path == r.path))
.collect();
remaining.sort_by(|a, b| {
let va = get_value(a, metric, git_scores);
let vb = get_value(b, metric, git_scores);
vb.cmp(&va).then_with(|| a.path.cmp(&b.path))
});
for row in remaining {
if used_tokens + row.tokens <= budget {
used_tokens += row.tokens;
selected.push(to_context_row(row, metric, git_scores));
}
}
selected
}
#[allow(dead_code)]
pub fn select_files(
rows: &[FileRow],
budget: usize,
strategy: ContextStrategy,
metric: ValueMetric,
git_scores: Option<&GitScores>,
) -> Vec<ContextFileRow> {
select_files_with_options(
rows,
budget,
strategy,
metric,
git_scores,
&SelectOptions {
no_smart_exclude: true,
..Default::default()
},
)
.selected
}
pub fn select_files_with_options(
rows: &[FileRow],
budget: usize,
strategy: ContextStrategy,
metric: ValueMetric,
git_scores: Option<&GitScores>,
options: &SelectOptions,
) -> SelectResult {
let resolved = resolve_metric(metric, git_scores);
let effective_metric = resolved.effective;
let metric_name = match effective_metric {
ValueMetric::Code => "code",
ValueMetric::Tokens => "tokens",
ValueMetric::Churn => "churn",
ValueMetric::Hotspot => "hotspot",
};
let mut smart_excluded = Vec::new();
let candidates: Vec<&FileRow> = if options.no_smart_exclude {
rows.iter().collect()
} else {
rows.iter()
.filter(|row| {
if row.kind != FileKind::Parent {
return true; }
let path = normalize_path(&row.path);
if let Some(reason) = is_smart_excluded(&path) {
smart_excluded.push(SmartExcludedFile {
path,
reason: reason.to_string(),
tokens: row.tokens,
});
false
} else {
true
}
})
.collect()
};
let candidate_rows: Vec<FileRow> = candidates.into_iter().cloned().collect();
let file_cap = compute_file_cap(budget, options);
let mut classification_map: BTreeMap<String, Vec<FileClassification>> = BTreeMap::new();
let mut policy_map: BTreeMap<String, (InclusionPolicy, Option<String>)> = BTreeMap::new();
let mut excluded_by_policy: Vec<PolicyExcludedFile> = Vec::new();
let mut original_tokens: BTreeMap<String, usize> = BTreeMap::new();
for row in candidate_rows.iter().filter(|r| r.kind == FileKind::Parent) {
let path = normalize_path(&row.path);
let classes = classify_file(&path, row.tokens, row.lines, options.dense_threshold);
let (policy, reason) = assign_policy(row.tokens, file_cap, &classes);
classification_map.insert(path.clone(), classes.clone());
policy_map.insert(path.clone(), (policy, reason.clone()));
original_tokens.insert(path.clone(), row.tokens);
if matches!(policy, InclusionPolicy::Skip | InclusionPolicy::Summary) {
excluded_by_policy.push(PolicyExcludedFile {
path,
original_tokens: row.tokens,
policy,
reason: reason.unwrap_or_default(),
classifications: classes,
});
}
}
let excluded_paths: std::collections::BTreeSet<&str> =
excluded_by_policy.iter().map(|e| e.path.as_str()).collect();
let pack_rows: Vec<FileRow> = candidate_rows
.iter()
.filter(|r| {
if r.kind != FileKind::Parent {
return true; }
let path = normalize_path(&r.path);
!excluded_paths.contains(path.as_str())
})
.map(|r| {
let path = normalize_path(&r.path);
if let Some((InclusionPolicy::HeadTail, _)) = policy_map.get(&path) {
let capped = r.tokens.min(file_cap);
FileRow {
tokens: capped,
..r.clone()
}
} else {
r.clone()
}
})
.collect();
let spine_budget = std::cmp::min(
(budget as f64 * SPINE_BUDGET_FRACTION) as usize,
SPINE_BUDGET_CAP,
);
let parents: Vec<&FileRow> = pack_rows
.iter()
.filter(|r| r.kind == FileKind::Parent)
.collect();
let mut spine_files: Vec<ContextFileRow> = Vec::new();
let mut spine_used = 0;
let mut spine_paths: std::collections::BTreeSet<&str> = std::collections::BTreeSet::new();
let mut spine_candidates: Vec<&FileRow> = parents
.iter()
.filter(|r| is_spine_file(&r.path))
.copied()
.collect();
spine_candidates.sort_by(|a, b| a.tokens.cmp(&b.tokens).then_with(|| a.path.cmp(&b.path)));
for row in spine_candidates {
if spine_used + row.tokens <= spine_budget {
spine_used += row.tokens;
spine_paths.insert(row.path.as_str());
spine_files.push(to_context_row_with_reason(
row,
effective_metric,
git_scores,
"spine",
));
}
}
let remaining_budget = budget.saturating_sub(spine_used);
let non_spine_rows: Vec<FileRow> = pack_rows
.iter()
.filter(|r| !spine_paths.contains(&r.path.as_str()))
.cloned()
.collect();
let mut ranked: Vec<ContextFileRow> = match strategy {
ContextStrategy::Greedy => pack_greedy(
&non_spine_rows,
remaining_budget,
effective_metric,
git_scores,
),
ContextStrategy::Spread => pack_spread(
&non_spine_rows,
remaining_budget,
effective_metric,
git_scores,
),
};
for file in &mut ranked {
if file.rank_reason.is_empty() {
file.rank_reason = metric_name.to_string();
}
}
let mut selected = spine_files;
selected.extend(ranked);
for file in &mut selected {
let path = normalize_path(&file.path);
if let Some(classes) = classification_map.get(&path) {
file.classifications = classes.clone();
}
if let Some((policy, reason)) = policy_map.get(&path) {
file.policy = *policy;
file.policy_reason = reason.clone();
if *policy == InclusionPolicy::HeadTail {
file.effective_tokens = Some(file.tokens);
if let Some(original) = original_tokens.get(&path) {
file.tokens = *original;
}
}
}
}
SelectResult {
selected,
smart_excluded,
excluded_by_policy,
rank_by_effective: metric_name.to_string(),
fallback_reason: resolved.fallback_reason,
}
}
fn to_context_row(
row: &FileRow,
metric: ValueMetric,
git_scores: Option<&GitScores>,
) -> ContextFileRow {
to_context_row_with_reason(row, metric, git_scores, "")
}
fn to_context_row_with_reason(
row: &FileRow,
metric: ValueMetric,
git_scores: Option<&GitScores>,
reason: &str,
) -> ContextFileRow {
ContextFileRow {
path: row.path.clone(),
module: row.module.clone(),
lang: row.lang.clone(),
tokens: row.tokens,
code: row.code,
lines: row.lines,
bytes: row.bytes,
value: get_value(row, metric, git_scores),
rank_reason: reason.to_string(),
policy: InclusionPolicy::Full,
effective_tokens: None,
policy_reason: None,
classifications: Vec::new(),
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_test_row(path: &str, module: &str, lang: &str, tokens: usize, code: usize) -> FileRow {
FileRow {
path: path.to_string(),
module: module.to_string(),
lang: lang.to_string(),
kind: FileKind::Parent,
code,
comments: 0,
blanks: 0,
lines: code,
bytes: code * 10,
tokens,
}
}
fn make_child_row(path: &str, module: &str, lang: &str, tokens: usize, code: usize) -> FileRow {
FileRow {
path: path.to_string(),
module: module.to_string(),
lang: lang.to_string(),
kind: FileKind::Child,
code,
comments: 0,
blanks: 0,
lines: code,
bytes: code * 10,
tokens,
}
}
#[test]
fn test_parse_budget() {
assert_eq!(parse_budget("128k").unwrap(), 128_000);
assert_eq!(parse_budget("1m").unwrap(), 1_000_000);
assert_eq!(parse_budget("50000").unwrap(), 50_000);
assert_eq!(parse_budget("1.5k").unwrap(), 1_500);
}
#[test]
fn test_parse_budget_g_suffix() {
assert_eq!(parse_budget("1g").unwrap(), 1_000_000_000);
assert_eq!(parse_budget("0.5g").unwrap(), 500_000_000);
assert_eq!(parse_budget("2G").unwrap(), 2_000_000_000);
}
#[test]
fn test_parse_budget_unlimited() {
assert_eq!(parse_budget("unlimited").unwrap(), usize::MAX);
assert_eq!(parse_budget("max").unwrap(), usize::MAX);
assert_eq!(parse_budget("UNLIMITED").unwrap(), usize::MAX);
assert_eq!(parse_budget("MAX").unwrap(), usize::MAX);
assert_eq!(parse_budget(" unlimited ").unwrap(), usize::MAX);
}
#[test]
fn test_parse_budget_with_whitespace() {
assert_eq!(parse_budget(" 10k ").unwrap(), 10_000);
assert_eq!(parse_budget(" 5m ").unwrap(), 5_000_000);
}
#[test]
fn test_parse_budget_case_insensitive() {
assert_eq!(parse_budget("10K").unwrap(), 10_000);
assert_eq!(parse_budget("2M").unwrap(), 2_000_000);
}
#[test]
fn test_parse_budget_multiplication_k() {
assert_eq!(parse_budget("2k").unwrap(), 2_000);
assert_eq!(parse_budget("0.5k").unwrap(), 500);
}
#[test]
fn test_parse_budget_multiplication_m() {
assert_eq!(parse_budget("2m").unwrap(), 2_000_000);
assert_eq!(parse_budget("0.5m").unwrap(), 500_000);
}
#[test]
fn test_normalize_path() {
assert_eq!(normalize_path("foo/bar"), "foo/bar");
assert_eq!(normalize_path("foo\\bar"), "foo/bar");
assert_eq!(normalize_path("foo\\bar\\baz"), "foo/bar/baz");
}
#[test]
fn test_get_value_code_metric() {
let row = make_test_row("test.rs", "mod", "Rust", 100, 50);
assert_eq!(get_value(&row, ValueMetric::Code, None), 50);
}
#[test]
fn test_get_value_tokens_metric() {
let row = make_test_row("test.rs", "mod", "Rust", 100, 50);
assert_eq!(get_value(&row, ValueMetric::Tokens, None), 100);
}
#[test]
fn test_get_value_hotspot_without_git() {
let row = make_test_row("test.rs", "mod", "Rust", 100, 50);
assert_eq!(get_value(&row, ValueMetric::Hotspot, None), 50);
}
#[test]
fn test_get_value_hotspot_with_git() {
let row = make_test_row("test.rs", "mod", "Rust", 100, 50);
let mut hotspots = BTreeMap::new();
hotspots.insert("test.rs".to_string(), 999);
let git_scores = GitScores {
hotspots,
commit_counts: BTreeMap::new(),
};
assert_eq!(
get_value(&row, ValueMetric::Hotspot, Some(&git_scores)),
999
);
}
#[test]
fn test_get_value_churn_without_git() {
let row = make_test_row("test.rs", "mod", "Rust", 100, 50);
assert_eq!(get_value(&row, ValueMetric::Churn, None), 50);
}
#[test]
fn test_get_value_churn_with_git() {
let row = make_test_row("test.rs", "mod", "Rust", 100, 50);
let mut commit_counts = BTreeMap::new();
commit_counts.insert("test.rs".to_string(), 5);
let git_scores = GitScores {
hotspots: BTreeMap::new(),
commit_counts,
};
assert_eq!(get_value(&row, ValueMetric::Churn, Some(&git_scores)), 5050);
}
#[test]
fn test_pack_greedy_empty_rows() {
let rows: Vec<FileRow> = vec![];
let result = pack_greedy(&rows, 1000, ValueMetric::Code, None);
assert!(result.is_empty());
}
#[test]
fn test_pack_greedy_budget_exceeded() {
let rows = vec![
make_test_row("a.rs", "mod", "Rust", 500, 100),
make_test_row("b.rs", "mod", "Rust", 600, 200),
];
let result = pack_greedy(&rows, 500, ValueMetric::Code, None);
assert_eq!(result.len(), 1);
assert_eq!(result[0].path, "a.rs");
}
#[test]
fn test_pack_greedy_filters_child_rows() {
let rows = vec![
make_test_row("parent.rs", "mod", "Rust", 100, 50),
make_child_row("child.rs", "mod", "Rust", 50, 25),
];
let result = pack_greedy(&rows, 1000, ValueMetric::Code, None);
assert_eq!(result.len(), 1);
assert_eq!(result[0].path, "parent.rs");
}
#[test]
fn test_pack_greedy_selects_by_value() {
let rows = vec![
make_test_row("low.rs", "mod", "Rust", 100, 10),
make_test_row("high.rs", "mod", "Rust", 100, 90),
make_test_row("mid.rs", "mod", "Rust", 100, 50),
];
let result = pack_greedy(&rows, 300, ValueMetric::Code, None);
assert_eq!(result.len(), 3);
assert_eq!(result[0].path, "high.rs");
assert_eq!(result[1].path, "mid.rs");
assert_eq!(result[2].path, "low.rs");
}
#[test]
fn test_pack_greedy_respects_token_budget() {
let rows = vec![
make_test_row("big.rs", "mod", "Rust", 500, 100),
make_test_row("small.rs", "mod", "Rust", 100, 50),
];
let result = pack_greedy(&rows, 150, ValueMetric::Code, None);
assert_eq!(result.len(), 1);
assert_eq!(result[0].path, "small.rs");
}
#[test]
fn test_pack_greedy_accumulates_tokens() {
let rows = vec![
make_test_row("a.rs", "mod", "Rust", 100, 50),
make_test_row("b.rs", "mod", "Rust", 100, 40),
make_test_row("c.rs", "mod", "Rust", 100, 30),
];
let result = pack_greedy(&rows, 250, ValueMetric::Code, None);
assert_eq!(result.len(), 2);
}
#[test]
fn test_pack_spread_empty_rows() {
let rows: Vec<FileRow> = vec![];
let result = pack_spread(&rows, 1000, ValueMetric::Code, None);
assert!(result.is_empty());
}
#[test]
fn test_pack_spread_filters_child_rows() {
let rows = vec![
make_test_row("parent.rs", "mod", "Rust", 100, 50),
make_child_row("child.rs", "mod", "Rust", 50, 25),
];
let result = pack_spread(&rows, 1000, ValueMetric::Code, None);
assert_eq!(result.len(), 1);
assert_eq!(result[0].path, "parent.rs");
}
#[test]
fn test_pack_spread_distributes_across_groups() {
let rows = vec![
make_test_row("rust1.rs", "mod1", "Rust", 100, 50),
make_test_row("rust2.rs", "mod1", "Rust", 100, 60),
make_test_row("python1.py", "mod2", "Python", 100, 70),
make_test_row("python2.py", "mod2", "Python", 100, 80),
];
let result = pack_spread(&rows, 1000, ValueMetric::Code, None);
assert_eq!(result.len(), 4);
}
#[test]
fn test_pack_spread_round_robin_fills_70_percent() {
let rows = vec![
make_test_row("a.rs", "mod1", "Rust", 100, 50),
make_test_row("b.py", "mod2", "Python", 100, 60),
];
let result = pack_spread(&rows, 200, ValueMetric::Code, None);
assert_eq!(result.len(), 2);
}
#[test]
fn test_pack_spread_greedy_fills_remaining_30_percent() {
let rows = vec![
make_test_row("a.rs", "mod1", "Rust", 50, 50),
make_test_row("b.py", "mod2", "Python", 50, 60),
make_test_row("c.rs", "mod1", "Rust", 50, 40),
];
let result = pack_spread(&rows, 200, ValueMetric::Code, None);
assert_eq!(result.len(), 3);
}
#[test]
fn test_select_files_greedy_strategy() {
let rows = vec![make_test_row("a.rs", "mod", "Rust", 100, 50)];
let result = select_files(
&rows,
1000,
ContextStrategy::Greedy,
ValueMetric::Code,
None,
);
assert_eq!(result.len(), 1);
}
#[test]
fn test_select_files_spread_strategy() {
let rows = vec![make_test_row("a.rs", "mod", "Rust", 100, 50)];
let result = select_files(
&rows,
1000,
ContextStrategy::Spread,
ValueMetric::Code,
None,
);
assert_eq!(result.len(), 1);
}
#[test]
fn test_to_context_row_fields() {
let row = make_test_row("test.rs", "mymod", "Rust", 100, 50);
let ctx_row = to_context_row(&row, ValueMetric::Code, None);
assert_eq!(ctx_row.path, "test.rs");
assert_eq!(ctx_row.module, "mymod");
assert_eq!(ctx_row.lang, "Rust");
assert_eq!(ctx_row.tokens, 100);
assert_eq!(ctx_row.code, 50);
assert_eq!(ctx_row.lines, 50);
assert_eq!(ctx_row.bytes, 500);
assert_eq!(ctx_row.value, 50); assert_eq!(ctx_row.rank_reason, ""); }
#[test]
fn test_to_context_row_value_from_tokens_metric() {
let row = make_test_row("test.rs", "mymod", "Rust", 200, 50);
let ctx_row = to_context_row(&row, ValueMetric::Tokens, None);
assert_eq!(ctx_row.value, 200);
}
#[test]
fn test_pack_greedy_budget_boundary() {
let rows = vec![make_test_row("exact.rs", "mod", "Rust", 100, 50)];
let result = pack_greedy(&rows, 100, ValueMetric::Code, None);
assert_eq!(result.len(), 1);
let result = pack_greedy(&rows, 99, ValueMetric::Code, None);
assert_eq!(result.len(), 0);
}
#[test]
fn test_pack_spread_skips_large_files_in_spread_phase() {
let rows = vec![
make_test_row("small.rs", "mod1", "Rust", 50, 100),
make_test_row("large.rs", "mod2", "Rust", 200, 50), ];
let result = pack_spread(&rows, 200, ValueMetric::Code, None);
assert_eq!(result.len(), 1);
assert_eq!(result[0].path, "small.rs");
}
#[test]
fn test_pack_spread_no_duplicates() {
let rows = vec![
make_test_row("a.rs", "mod1", "Rust", 50, 100),
make_test_row("b.rs", "mod1", "Rust", 50, 90),
];
let result = pack_spread(&rows, 500, ValueMetric::Code, None);
let paths: Vec<_> = result.iter().map(|r| &r.path).collect();
assert_eq!(paths.len(), 2);
assert!(paths.contains(&&"a.rs".to_string()));
assert!(paths.contains(&&"b.rs".to_string()));
}
#[test]
fn test_normalize_path_with_backslash() {
let row = FileRow {
path: "foo\\bar\\test.rs".to_string(),
module: "mod".to_string(),
lang: "Rust".to_string(),
kind: FileKind::Parent,
code: 50,
comments: 0,
blanks: 0,
lines: 50,
bytes: 500,
tokens: 100,
};
let mut hotspots = BTreeMap::new();
hotspots.insert("foo/bar/test.rs".to_string(), 999);
let git_scores = GitScores {
hotspots,
commit_counts: BTreeMap::new(),
};
assert_eq!(
get_value(&row, ValueMetric::Hotspot, Some(&git_scores)),
999
);
}
#[test]
fn test_select_files_non_empty_when_budget_allows() {
let rows = vec![make_test_row("small.rs", "src", "Rust", 50, 100)];
let result = select_files(
&rows,
1000,
ContextStrategy::Greedy,
ValueMetric::Code,
None,
);
assert!(
!result.is_empty(),
"select_files must return non-empty when files fit budget"
);
}
#[test]
fn test_budget_invariant_greedy() {
let rows = vec![
make_test_row("a.rs", "mod1", "Rust", 100, 50),
make_test_row("b.rs", "mod2", "Rust", 150, 40),
make_test_row("c.rs", "mod3", "Rust", 200, 30),
];
let budget = 250;
let result = pack_greedy(&rows, budget, ValueMetric::Code, None);
let total_tokens: usize = result.iter().map(|r| r.tokens).sum();
assert!(
total_tokens <= budget,
"Total tokens ({total_tokens}) must not exceed budget ({budget})"
);
}
#[test]
fn test_budget_invariant_spread() {
let rows = vec![
make_test_row("a.rs", "mod1", "Rust", 100, 50),
make_test_row("b.rs", "mod2", "Python", 150, 40),
make_test_row("c.rs", "mod3", "Go", 200, 30),
];
let budget = 250;
let result = pack_spread(&rows, budget, ValueMetric::Code, None);
let total_tokens: usize = result.iter().map(|r| r.tokens).sum();
assert!(
total_tokens <= budget,
"Total tokens ({total_tokens}) must not exceed budget ({budget})"
);
}
#[test]
fn test_parent_only_invariant_greedy() {
let rows = vec![
make_test_row("parent1.rs", "mod", "Rust", 100, 50),
make_child_row("child1.rs", "mod", "Rust", 50, 25),
make_test_row("parent2.rs", "mod", "Rust", 100, 40),
make_child_row("child2.rs", "mod", "Rust", 50, 20),
];
let result = pack_greedy(&rows, 1000, ValueMetric::Code, None);
for ctx_row in &result {
let original = rows.iter().find(|r| r.path == ctx_row.path).unwrap();
assert_eq!(
original.kind,
FileKind::Parent,
"Selected file {} must be a Parent, not a Child",
ctx_row.path
);
}
assert_eq!(result.len(), 2, "Should select both parent files");
}
#[test]
fn test_parent_only_invariant_spread() {
let rows = vec![
make_test_row("parent1.rs", "mod1", "Rust", 100, 50),
make_child_row("child1.rs", "mod1", "Rust", 50, 25),
make_test_row("parent2.py", "mod2", "Python", 100, 40),
make_child_row("child2.py", "mod2", "Python", 50, 20),
];
let result = pack_spread(&rows, 1000, ValueMetric::Code, None);
for ctx_row in &result {
let original = rows.iter().find(|r| r.path == ctx_row.path).unwrap();
assert_eq!(
original.kind,
FileKind::Parent,
"Selected file {} must be a Parent",
ctx_row.path
);
}
}
#[test]
fn test_determinism_greedy() {
let rows = vec![
make_test_row("a.rs", "mod", "Rust", 100, 50),
make_test_row("b.rs", "mod", "Rust", 100, 50), make_test_row("c.rs", "mod", "Rust", 100, 30),
];
let result1 = pack_greedy(&rows, 1000, ValueMetric::Code, None);
let result2 = pack_greedy(&rows, 1000, ValueMetric::Code, None);
let paths1: Vec<_> = result1.iter().map(|r| &r.path).collect();
let paths2: Vec<_> = result2.iter().map(|r| &r.path).collect();
assert_eq!(paths1, paths2, "pack_greedy must be deterministic");
}
#[test]
fn test_determinism_spread() {
let rows = vec![
make_test_row("a.rs", "mod1", "Rust", 100, 50),
make_test_row("b.py", "mod2", "Python", 100, 50),
make_test_row("c.rs", "mod1", "Rust", 100, 50),
];
let result1 = pack_spread(&rows, 1000, ValueMetric::Code, None);
let result2 = pack_spread(&rows, 1000, ValueMetric::Code, None);
let paths1: Vec<_> = result1.iter().map(|r| &r.path).collect();
let paths2: Vec<_> = result2.iter().map(|r| &r.path).collect();
assert_eq!(paths1, paths2, "pack_spread must be deterministic");
}
#[test]
fn test_tiebreaker_by_path() {
let rows = vec![
make_test_row("z.rs", "mod", "Rust", 100, 50),
make_test_row("a.rs", "mod", "Rust", 100, 50), ];
let result = pack_greedy(&rows, 1000, ValueMetric::Code, None);
assert_eq!(
result[0].path, "a.rs",
"Files with equal value should tie-break by path"
);
assert_eq!(result[1].path, "z.rs");
}
#[test]
fn test_spread_distributes_before_greedy_fill() {
let rows = vec![
make_test_row("mod1/best.rs", "mod1", "Rust", 50, 100),
make_test_row("mod1/okay.rs", "mod1", "Rust", 50, 20),
make_test_row("mod2/best.py", "mod2", "Python", 50, 100),
make_test_row("mod2/okay.py", "mod2", "Python", 50, 20),
];
let spread_result = pack_spread(&rows, 300, ValueMetric::Code, None);
let first_two: Vec<_> = spread_result.iter().take(2).map(|r| &r.path).collect();
assert!(
first_two.contains(&&"mod1/best.rs".to_string()),
"Spread should pick best from mod1 early"
);
assert!(
first_two.contains(&&"mod2/best.py".to_string()),
"Spread should pick best from mod2 early"
);
}
#[test]
fn test_pack_spread_module_first_avoids_language_bias() {
let rows = vec![
make_test_row("mod1/a.rs", "mod1", "Rust", 50, 100),
make_test_row("mod1/a.py", "mod1", "Python", 50, 90),
make_test_row("mod2/b.rs", "mod2", "Rust", 50, 80),
];
let result = pack_spread(&rows, 200, ValueMetric::Code, None);
let first_two: Vec<_> = result.iter().take(2).map(|r| r.path.as_str()).collect();
assert!(
first_two.iter().any(|p| p.starts_with("mod1/")),
"First two picks should include mod1"
);
assert!(
first_two.iter().any(|p| p.starts_with("mod2/")),
"First two picks should include mod2"
);
}
#[test]
fn test_greedy_picks_highest_value_first() {
let rows = vec![
make_test_row("low.rs", "mod", "Rust", 100, 10),
make_test_row("high.rs", "mod", "Rust", 100, 90),
];
let result = pack_greedy(&rows, 100, ValueMetric::Code, None);
assert_eq!(result.len(), 1);
assert_eq!(
result[0].path, "high.rs",
"Greedy should pick highest value file"
);
assert_eq!(result[0].value, 90);
}
#[test]
fn test_pack_greedy_skips_files_that_dont_fit() {
let rows = vec![
make_test_row("big.rs", "mod", "Rust", 200, 100), make_test_row("small.rs", "mod", "Rust", 50, 10), ];
let result = pack_greedy(&rows, 100, ValueMetric::Code, None);
assert_eq!(result.len(), 1);
assert_eq!(result[0].path, "small.rs");
}
#[test]
fn test_select_files_routes_to_correct_strategy() {
let rows = vec![
make_test_row("a.rs", "mod1", "Rust", 50, 50),
make_test_row("b.py", "mod2", "Python", 50, 50),
];
let greedy = select_files(&rows, 100, ContextStrategy::Greedy, ValueMetric::Code, None);
let spread = select_files(&rows, 100, ContextStrategy::Spread, ValueMetric::Code, None);
assert!(!greedy.is_empty(), "Greedy strategy should return results");
assert!(!spread.is_empty(), "Spread strategy should return results");
}
#[test]
fn test_to_context_row_preserves_all_fields() {
let row = FileRow {
path: "test/path.rs".to_string(),
module: "test_mod".to_string(),
lang: "Rust".to_string(),
kind: FileKind::Parent,
code: 42,
comments: 10,
blanks: 5,
lines: 57,
bytes: 1234,
tokens: 99,
};
let ctx = to_context_row(&row, ValueMetric::Code, None);
assert_eq!(ctx.path, "test/path.rs");
assert_eq!(ctx.module, "test_mod");
assert_eq!(ctx.lang, "Rust");
assert_eq!(ctx.tokens, 99);
assert_eq!(ctx.code, 42);
assert_eq!(ctx.lines, 57);
assert_eq!(ctx.bytes, 1234);
assert_eq!(ctx.value, 42); }
#[test]
fn test_churn_metric_formula() {
let row = make_test_row("test.rs", "mod", "Rust", 100, 7);
let mut commit_counts = BTreeMap::new();
commit_counts.insert("test.rs".to_string(), 3);
let git_scores = GitScores {
hotspots: BTreeMap::new(),
commit_counts,
};
assert_eq!(get_value(&row, ValueMetric::Churn, Some(&git_scores)), 3007);
}
#[test]
fn test_hotspot_metric_multiplication() {
let row = make_test_row("test.rs", "mod", "Rust", 100, 50);
let mut hotspots = BTreeMap::new();
hotspots.insert("test.rs".to_string(), 150); let git_scores = GitScores {
hotspots,
commit_counts: BTreeMap::new(),
};
assert_eq!(
get_value(&row, ValueMetric::Hotspot, Some(&git_scores)),
150
);
}
#[test]
fn test_spread_70_percent_allocation() {
let rows = vec![
make_test_row("a.rs", "mod1", "Rust", 350, 50),
make_test_row("b.rs", "mod1", "Rust", 350, 40),
make_test_row("c.py", "mod2", "Python", 350, 30),
];
let result = pack_spread(&rows, 1000, ValueMetric::Code, None);
assert!(result.len() >= 2, "Should select at least 2 files");
}
#[test]
fn test_normalize_path_preserves_forward_slashes() {
assert_eq!(normalize_path("foo/bar/baz.rs"), "foo/bar/baz.rs");
assert!(!normalize_path("test.rs").is_empty());
}
#[test]
fn test_normalize_path_not_xyzzy() {
assert_ne!(normalize_path("foo/bar"), "xyzzy");
assert_ne!(normalize_path("test.rs"), "xyzzy");
}
#[test]
fn test_parse_budget_invalid_alpha() {
let err = parse_budget("abc").unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("Invalid budget"),
"Expected guidance message, got: {msg}"
);
assert!(
msg.contains("128k"),
"Expected example in guidance, got: {msg}"
);
}
#[test]
fn test_parse_budget_invalid_suffix() {
let err = parse_budget("1x").unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("Invalid budget"),
"Expected guidance message, got: {msg}"
);
}
#[test]
fn test_parse_budget_invalid_empty() {
assert!(parse_budget("").is_err());
}
#[test]
fn test_parse_budget_invalid_suffix_only() {
assert!(parse_budget("k").is_err());
assert!(parse_budget("m").is_err());
assert!(parse_budget("g").is_err());
}
#[test]
fn test_parse_budget_overflow() {
let err = parse_budget("999999999999g").unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("overflows"),
"Expected overflow message, got: {msg}"
);
}
#[test]
fn test_pack_greedy_all_children_empty() {
let rows = vec![
make_child_row("a.rs", "mod", "Rust", 100, 50),
make_child_row("b.rs", "mod", "Rust", 100, 50),
];
let result = pack_greedy(&rows, 1000, ValueMetric::Code, None);
assert!(result.is_empty());
}
#[test]
fn test_pack_spread_all_children_empty() {
let rows = vec![
make_child_row("a.rs", "mod", "Rust", 100, 50),
make_child_row("b.rs", "mod", "Rust", 100, 50),
];
let result = pack_spread(&rows, 1000, ValueMetric::Code, None);
assert!(result.is_empty());
}
#[test]
fn test_select_files_greedy_with_git_scores() {
let rows = vec![make_test_row("a.rs", "mod", "Rust", 100, 50)];
let mut hotspots = BTreeMap::new();
hotspots.insert("a.rs".to_string(), 999);
let git_scores = GitScores {
hotspots,
commit_counts: BTreeMap::new(),
};
let result = select_files(
&rows,
1000,
ContextStrategy::Greedy,
ValueMetric::Hotspot,
Some(&git_scores),
);
assert_eq!(result[0].value, 999);
}
#[test]
fn test_select_files_spread_with_git_scores() {
let rows = vec![make_test_row("a.rs", "mod", "Rust", 100, 50)];
let mut commit_counts = BTreeMap::new();
commit_counts.insert("a.rs".to_string(), 5);
let git_scores = GitScores {
hotspots: BTreeMap::new(),
commit_counts,
};
let result = select_files(
&rows,
1000,
ContextStrategy::Spread,
ValueMetric::Churn,
Some(&git_scores),
);
assert_eq!(result[0].value, 5050);
}
#[test]
fn test_to_context_row_value_hotspot() {
let row = make_test_row("test.rs", "mod", "Rust", 100, 50);
let mut hotspots = BTreeMap::new();
hotspots.insert("test.rs".to_string(), 777);
let git_scores = GitScores {
hotspots,
commit_counts: BTreeMap::new(),
};
let ctx_row = to_context_row(&row, ValueMetric::Hotspot, Some(&git_scores));
assert_eq!(ctx_row.value, 777);
}
#[test]
fn test_to_context_row_value_churn() {
let row = make_test_row("test.rs", "mod", "Rust", 100, 50);
let mut commit_counts = BTreeMap::new();
commit_counts.insert("test.rs".to_string(), 3);
let git_scores = GitScores {
hotspots: BTreeMap::new(),
commit_counts,
};
let ctx_row = to_context_row(&row, ValueMetric::Churn, Some(&git_scores));
assert_eq!(ctx_row.value, 3050);
}
#[test]
fn test_get_value_churn_formula_exact() {
let row = make_test_row("test.rs", "mod", "Rust", 100, 42);
let mut commit_counts = BTreeMap::new();
commit_counts.insert("test.rs".to_string(), 7);
let git_scores = GitScores {
hotspots: BTreeMap::new(),
commit_counts,
};
assert_eq!(get_value(&row, ValueMetric::Churn, Some(&git_scores)), 7042);
}
#[test]
fn test_pack_spread_70_percent_exact() {
let rows = vec![make_test_row("a.rs", "mod1", "Rust", 70, 50)];
let result = pack_spread(&rows, 100, ValueMetric::Code, None);
assert_eq!(result.len(), 1);
}
#[test]
fn test_pack_spread_71_percent_needs_greedy() {
let rows = vec![make_test_row("a.rs", "mod1", "Rust", 71, 50)];
let result = pack_spread(&rows, 100, ValueMetric::Code, None);
assert_eq!(result.len(), 1); }
#[test]
fn test_pack_greedy_tiebreaker_by_path_explicit() {
let rows = vec![
make_test_row("zzz.rs", "mod", "Rust", 100, 50),
make_test_row("aaa.rs", "mod", "Rust", 100, 50),
make_test_row("mmm.rs", "mod", "Rust", 100, 50),
];
let result = pack_greedy(&rows, 300, ValueMetric::Code, None);
assert_eq!(result.len(), 3);
assert_eq!(result[0].path, "aaa.rs");
assert_eq!(result[1].path, "mmm.rs");
assert_eq!(result[2].path, "zzz.rs");
}
#[test]
fn test_is_smart_excluded_lockfiles() {
assert_eq!(is_smart_excluded("Cargo.lock"), Some("lockfile"));
assert_eq!(is_smart_excluded("package-lock.json"), Some("lockfile"));
assert_eq!(is_smart_excluded("yarn.lock"), Some("lockfile"));
assert_eq!(is_smart_excluded("go.sum"), Some("lockfile"));
assert_eq!(is_smart_excluded("poetry.lock"), Some("lockfile"));
assert_eq!(is_smart_excluded("Gemfile.lock"), Some("lockfile"));
assert_eq!(is_smart_excluded("some/dir/Cargo.lock"), Some("lockfile"));
}
#[test]
fn test_is_smart_excluded_minified() {
assert_eq!(is_smart_excluded("app.min.js"), Some("minified"));
assert_eq!(is_smart_excluded("style.min.css"), Some("minified"));
assert_eq!(is_smart_excluded("vendor/app.min.js"), Some("minified"));
}
#[test]
fn test_is_smart_excluded_sourcemaps() {
assert_eq!(is_smart_excluded("app.js.map"), Some("sourcemap"));
assert_eq!(is_smart_excluded("style.css.map"), Some("sourcemap"));
}
#[test]
fn test_is_smart_excluded_normal_files() {
assert_eq!(is_smart_excluded("main.rs"), None);
assert_eq!(is_smart_excluded("Cargo.toml"), None);
assert_eq!(is_smart_excluded("app.js"), None);
assert_eq!(is_smart_excluded("style.css"), None);
}
#[test]
fn test_is_spine_file() {
assert!(is_spine_file("README.md"));
assert!(is_spine_file("Cargo.toml"));
assert!(is_spine_file("ROADMAP.md"));
assert!(is_spine_file("CONTRIBUTING.md"));
assert!(is_spine_file("package.json"));
assert!(is_spine_file("docs/architecture.md"));
assert!(is_spine_file("some/path/README.md"));
assert!(!is_spine_file("src/main.rs"));
assert!(!is_spine_file("README_backup.md"));
}
#[test]
fn test_select_files_with_options_smart_exclude() {
let rows = vec![
make_test_row("src/main.rs", "src", "Rust", 100, 50),
make_test_row("Cargo.lock", ".", "TOML", 500, 200),
make_test_row("src/lib.rs", "src", "Rust", 100, 40),
];
let result = select_files_with_options(
&rows,
1000,
ContextStrategy::Greedy,
ValueMetric::Code,
None,
&SelectOptions {
no_smart_exclude: false,
..Default::default()
},
);
assert_eq!(result.smart_excluded.len(), 1);
assert_eq!(result.smart_excluded[0].path, "Cargo.lock");
assert_eq!(result.smart_excluded[0].reason, "lockfile");
let paths: Vec<&str> = result.selected.iter().map(|r| r.path.as_str()).collect();
assert!(!paths.contains(&"Cargo.lock"));
assert!(paths.contains(&"src/main.rs"));
assert!(paths.contains(&"src/lib.rs"));
}
#[test]
fn test_select_files_with_options_no_smart_exclude() {
let rows = vec![
make_test_row("src/main.rs", "src", "Rust", 100, 50),
make_test_row("Cargo.lock", ".", "TOML", 100, 200),
];
let result = select_files_with_options(
&rows,
1000,
ContextStrategy::Greedy,
ValueMetric::Code,
None,
&SelectOptions {
no_smart_exclude: true,
..Default::default()
},
);
assert!(result.smart_excluded.is_empty());
assert_eq!(result.selected.len(), 2);
}
#[test]
fn test_select_files_with_options_spine_reservation() {
let rows = vec![
make_test_row("README.md", ".", "Markdown", 50, 30),
make_test_row("src/big.rs", "src", "Rust", 100, 100),
make_test_row("src/small.rs", "src", "Rust", 50, 50),
];
let result = select_files_with_options(
&rows,
2000,
ContextStrategy::Greedy,
ValueMetric::Code,
None,
&SelectOptions {
no_smart_exclude: true,
..Default::default()
},
);
assert!(!result.selected.is_empty());
let readme_entry = result.selected.iter().find(|f| f.path == "README.md");
assert!(
readme_entry.is_some(),
"README.md should be in selected files"
);
assert_eq!(readme_entry.unwrap().rank_reason, "spine");
}
#[test]
fn test_select_files_with_options_rank_reason() {
let rows = vec![make_test_row("src/main.rs", "src", "Rust", 100, 50)];
let result = select_files_with_options(
&rows,
1000,
ContextStrategy::Greedy,
ValueMetric::Code,
None,
&SelectOptions {
no_smart_exclude: true,
..Default::default()
},
);
assert_eq!(result.selected.len(), 1);
assert_eq!(result.selected[0].rank_reason, "code");
}
#[test]
fn test_select_files_with_options_rank_reason_hotspot_fallback() {
let rows = vec![make_test_row("src/main.rs", "src", "Rust", 100, 50)];
let result = select_files_with_options(
&rows,
1000,
ContextStrategy::Greedy,
ValueMetric::Hotspot,
None,
&SelectOptions {
no_smart_exclude: true,
..Default::default()
},
);
assert_eq!(result.selected[0].rank_reason, "code");
assert_eq!(result.rank_by_effective, "code");
assert!(result.fallback_reason.is_some());
assert!(result.fallback_reason.as_ref().unwrap().contains("hotspot"));
}
#[test]
fn test_select_files_with_options_rank_reason_hotspot_with_git() {
let rows = vec![make_test_row("src/main.rs", "src", "Rust", 100, 50)];
let mut hotspots = BTreeMap::new();
hotspots.insert("src/main.rs".to_string(), 999);
let git_scores = GitScores {
hotspots,
commit_counts: BTreeMap::new(),
};
let result = select_files_with_options(
&rows,
1000,
ContextStrategy::Greedy,
ValueMetric::Hotspot,
Some(&git_scores),
&SelectOptions {
no_smart_exclude: true,
..Default::default()
},
);
assert_eq!(result.selected[0].rank_reason, "hotspot");
assert_eq!(result.rank_by_effective, "hotspot");
assert!(result.fallback_reason.is_none());
}
#[test]
fn test_classify_lockfile() {
let classes = classify_file("Cargo.lock", 1000, 100, 50.0);
assert!(classes.contains(&FileClassification::Lockfile));
}
#[test]
fn test_classify_nested_lockfile() {
let classes = classify_file("some/dir/package-lock.json", 1000, 100, 50.0);
assert!(classes.contains(&FileClassification::Lockfile));
}
#[test]
fn test_classify_generated() {
let classes = classify_file("src/parser/node-types.json", 5000, 10, 50.0);
assert!(classes.contains(&FileClassification::Generated));
assert!(classes.contains(&FileClassification::DataBlob));
}
#[test]
fn test_classify_generated_pb_rs() {
let classes = classify_file("proto/types.pb.rs", 1000, 200, 50.0);
assert!(classes.contains(&FileClassification::Generated));
}
#[test]
fn test_classify_vendored() {
let classes = classify_file("vendor/github.com/lib/pq/conn.go", 500, 100, 50.0);
assert!(classes.contains(&FileClassification::Vendored));
}
#[test]
fn test_classify_fixture() {
let classes = classify_file("tests/fixtures/sample.json", 200, 50, 50.0);
assert!(classes.contains(&FileClassification::Fixture));
}
#[test]
fn test_classify_minified() {
let classes = classify_file("dist/app.min.js", 50000, 1, 50.0);
assert!(classes.contains(&FileClassification::Minified));
assert!(classes.contains(&FileClassification::DataBlob));
}
#[test]
fn test_classify_sourcemap() {
let classes = classify_file("dist/app.js.map", 30000, 1, 50.0);
assert!(classes.contains(&FileClassification::Sourcemap));
assert!(classes.contains(&FileClassification::DataBlob));
}
#[test]
fn test_classify_dense_blob() {
let classes = classify_file("src/data.rs", 1000, 10, 50.0);
assert!(classes.contains(&FileClassification::DataBlob));
}
#[test]
fn test_classify_normal_file() {
let classes = classify_file("src/main.rs", 100, 50, 50.0);
assert!(classes.is_empty());
}
#[test]
fn test_compute_file_cap_default() {
let opts = SelectOptions::default();
let cap = compute_file_cap(128_000, &opts);
assert_eq!(cap, 16_000);
}
#[test]
fn test_compute_file_cap_small_budget() {
let opts = SelectOptions::default();
let cap = compute_file_cap(10_000, &opts);
assert_eq!(cap, 1_500);
}
#[test]
fn test_compute_file_cap_custom() {
let opts = SelectOptions {
max_file_pct: 0.25,
max_file_tokens: Some(5_000),
..Default::default()
};
let cap = compute_file_cap(100_000, &opts);
assert_eq!(cap, 5_000);
}
#[test]
fn test_compute_file_cap_unlimited_budget() {
let opts = SelectOptions::default();
let cap = compute_file_cap(usize::MAX, &opts);
assert_eq!(cap, usize::MAX);
}
#[test]
fn test_assign_policy_under_cap_is_full() {
let (policy, reason) = assign_policy(100, 16_000, &[]);
assert_eq!(policy, InclusionPolicy::Full);
assert!(reason.is_none());
}
#[test]
fn test_assign_policy_over_cap_normal_is_head_tail() {
let (policy, reason) = assign_policy(20_000, 16_000, &[]);
assert_eq!(policy, InclusionPolicy::HeadTail);
assert!(reason.is_some());
assert!(reason.unwrap().contains("head+tail"));
}
#[test]
fn test_assign_policy_over_cap_generated_is_skip() {
let (policy, reason) = assign_policy(20_000, 16_000, &[FileClassification::Generated]);
assert_eq!(policy, InclusionPolicy::Skip);
assert!(reason.is_some());
assert!(reason.unwrap().contains("generated"));
}
#[test]
fn test_assign_policy_over_cap_data_blob_is_skip() {
let (policy, reason) = assign_policy(20_000, 16_000, &[FileClassification::DataBlob]);
assert_eq!(policy, InclusionPolicy::Skip);
assert!(reason.is_some());
}
#[test]
fn test_assign_policy_over_cap_vendored_is_skip() {
let (policy, reason) = assign_policy(20_000, 16_000, &[FileClassification::Vendored]);
assert_eq!(policy, InclusionPolicy::Skip);
assert!(reason.is_some());
}
#[test]
fn test_assign_policy_over_cap_fixture_is_head_tail() {
let (policy, _) = assign_policy(20_000, 16_000, &[FileClassification::Fixture]);
assert_eq!(policy, InclusionPolicy::HeadTail);
}
#[test]
fn test_resolve_metric_code_no_fallback() {
let resolved = resolve_metric(ValueMetric::Code, None);
assert_eq!(resolved.effective, ValueMetric::Code);
assert!(resolved.fallback_reason.is_none());
}
#[test]
fn test_resolve_metric_hotspot_falls_back() {
let resolved = resolve_metric(ValueMetric::Hotspot, None);
assert_eq!(resolved.effective, ValueMetric::Code);
assert!(resolved.fallback_reason.is_some());
assert!(resolved.fallback_reason.unwrap().contains("hotspot"));
}
#[test]
fn test_resolve_metric_churn_falls_back() {
let resolved = resolve_metric(ValueMetric::Churn, None);
assert_eq!(resolved.effective, ValueMetric::Code);
assert!(resolved.fallback_reason.is_some());
assert!(resolved.fallback_reason.unwrap().contains("churn"));
}
#[test]
fn test_resolve_metric_hotspot_with_git_no_fallback() {
let git_scores = GitScores {
hotspots: BTreeMap::new(),
commit_counts: BTreeMap::new(),
};
let resolved = resolve_metric(ValueMetric::Hotspot, Some(&git_scores));
assert_eq!(resolved.effective, ValueMetric::Hotspot);
assert!(resolved.fallback_reason.is_none());
}
#[test]
fn test_budget_uses_effective_tokens() {
let rows = vec![
make_test_row("big.rs", "src", "Rust", 20_000, 1000),
make_test_row("small.rs", "src", "Rust", 100, 50),
];
let result = select_files_with_options(
&rows,
128_000,
ContextStrategy::Greedy,
ValueMetric::Code,
None,
&SelectOptions {
no_smart_exclude: true,
..Default::default()
},
);
assert_eq!(result.selected.len(), 2);
let big = result.selected.iter().find(|f| f.path == "big.rs").unwrap();
assert_eq!(big.policy, InclusionPolicy::HeadTail);
assert!(big.effective_tokens.is_some());
assert!(big.effective_tokens.unwrap() <= 16_000);
let small = result
.selected
.iter()
.find(|f| f.path == "small.rs")
.unwrap();
assert_eq!(small.policy, InclusionPolicy::Full);
assert!(small.effective_tokens.is_none());
}
#[test]
fn test_generated_over_cap_excluded_by_policy() {
let rows = vec![
make_test_row("src/main.rs", "src", "Rust", 100, 50),
FileRow {
path: "src/parser/node-types.json".to_string(),
module: "src".to_string(),
lang: "JSON".to_string(),
kind: FileKind::Parent,
code: 10,
comments: 0,
blanks: 0,
lines: 10,
bytes: 500_000,
tokens: 117_000,
},
];
let result = select_files_with_options(
&rows,
128_000,
ContextStrategy::Greedy,
ValueMetric::Code,
None,
&SelectOptions {
no_smart_exclude: true,
..Default::default()
},
);
assert_eq!(result.excluded_by_policy.len(), 1);
assert!(
result.excluded_by_policy[0]
.path
.contains("node-types.json")
);
assert_eq!(result.excluded_by_policy[0].policy, InclusionPolicy::Skip);
assert!(result.selected.iter().any(|f| f.path == "src/main.rs"));
assert!(
!result
.selected
.iter()
.any(|f| f.path.contains("node-types.json"))
);
}
}