entrenar/train/curriculum/efficiency.rs
1//! Efficiency metrics for curriculum learning
2
3/// Compute efficiency score as per CITL spec
4///
5/// E(T) = Accuracy(T) / log(CorpusSize(T))
6///
7/// Higher is better - balances accuracy against corpus bloat.
8pub fn efficiency_score(accuracy: f32, corpus_size_bytes: usize) -> f32 {
9 if corpus_size_bytes <= 1 {
10 return accuracy;
11 }
12 accuracy / (corpus_size_bytes as f32).max(f32::MIN_POSITIVE).ln()
13}
14
15/// Compare tiers and select optimal based on efficiency
16///
17/// Returns (best_tier, efficiency_score)
18pub fn select_optimal_tier(tier_results: &[(usize, f32, usize)]) -> Option<(usize, f32)> {
19 tier_results
20 .iter()
21 .map(|&(tier, accuracy, corpus_size)| {
22 let eff = efficiency_score(accuracy, corpus_size);
23 (tier, eff)
24 })
25 .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
26}