entrenar/hf_pipeline/leaderboard/
parser.rs1use super::types::{HfLeaderboard, LeaderboardKind};
7use crate::eval::evaluator::{EvalResult, Leaderboard, Metric};
8use crate::eval::RougeVariant;
9
10pub fn to_leaderboard(hf: &HfLeaderboard) -> Leaderboard {
14 let primary = hf.kind.primary_metric();
15 let mut leaderboard = Leaderboard::new(primary);
16
17 for entry in &hf.entries {
18 let mut result = EvalResult::new(&entry.model_id);
19
20 for (column, &value) in &entry.scores {
21 if let Some(metric) = column_to_metric(&hf.kind, column) {
22 result.add_score(metric, value);
23 }
24 }
25
26 leaderboard.add(result);
27 }
28
29 leaderboard
30}
31
32#[must_use]
36pub fn column_to_metric(kind: &LeaderboardKind, column: &str) -> Option<Metric> {
37 let col_lower = column.to_lowercase();
38
39 match kind {
40 LeaderboardKind::OpenASR => match col_lower.as_str() {
41 "wer" | "average_wer" | "word_error_rate" => Some(Metric::WER),
42 "rtfx" | "rtf" | "real_time_factor" => Some(Metric::RTFx),
43 _ => None,
44 },
45 LeaderboardKind::OpenLLMv2 => match col_lower.as_str() {
46 "mmlu" | "mmlu_pro" | "mmlu_accuracy" => Some(Metric::MMLUAccuracy),
47 "accuracy" | "average" | "avg" => Some(Metric::Accuracy),
48 _ => None,
49 },
50 LeaderboardKind::MTEB => match col_lower.as_str() {
51 "ndcg@10" | "ndcg_at_10" => Some(Metric::NDCGAtK(10)),
52 "accuracy" => Some(Metric::Accuracy),
53 _ => None,
54 },
55 LeaderboardKind::BigCodeBench => match col_lower.as_str() {
56 "pass@1" | "pass_at_1" => Some(Metric::PassAtK(1)),
57 "pass@10" | "pass_at_10" => Some(Metric::PassAtK(10)),
58 _ => None,
59 },
60 LeaderboardKind::Custom(_) => generic_column_to_metric(&col_lower),
61 }
62}
63
64fn generic_column_to_metric(column: &str) -> Option<Metric> {
66 match column {
67 "accuracy" | "acc" => Some(Metric::Accuracy),
68 "wer" | "word_error_rate" => Some(Metric::WER),
69 "bleu" => Some(Metric::BLEU),
70 "rouge1" | "rouge_1" => Some(Metric::ROUGE(RougeVariant::Rouge1)),
71 "rouge2" | "rouge_2" => Some(Metric::ROUGE(RougeVariant::Rouge2)),
72 "rougel" | "rouge_l" => Some(Metric::ROUGE(RougeVariant::RougeL)),
73 "perplexity" | "ppl" => Some(Metric::Perplexity),
74 "mmlu" => Some(Metric::MMLUAccuracy),
75 "pass@1" | "pass_at_1" => Some(Metric::PassAtK(1)),
76 "ndcg@10" | "ndcg_at_10" => Some(Metric::NDCGAtK(10)),
77 _ => None,
78 }
79}
80
81pub fn compare_with_leaderboard(my_result: &EvalResult, hf: &HfLeaderboard) -> Leaderboard {
86 let mut leaderboard = to_leaderboard(hf);
87 leaderboard.add(my_result.clone());
88 leaderboard
89}