1pub mod accuracy;
35pub mod arc;
36pub mod bleu;
37pub mod boolq;
38pub mod bootstrap;
39pub mod calibration;
40pub mod chrf;
41pub mod dataset;
42pub mod error;
43pub mod gsm8k;
44pub mod hellaswag;
45pub mod meteor;
46pub mod mmlu;
47pub mod perplexity;
48pub mod qa;
49pub mod report;
50pub mod rouge;
51pub mod streaming;
52pub mod throughput;
53pub mod truthfulqa;
54pub mod winogrande;
55
56#[cfg(test)]
57mod tests;
58
59pub use accuracy::{
64 AccuracyResult, ExactMatchEvaluator, LogitMcResult, McEvaluator, McLogitEvaluator,
65};
66pub use arc::{ArcEvaluator, ArcResult, ArcSplit};
67pub use bleu::{corpus_bleu, sentence_bleu, BleuConfig, BleuScore, SmoothingMethod};
68pub use boolq::{BoolQDataset, BoolQEvaluator, BoolQItem, BoolQResult};
69pub use bootstrap::{bootstrap_ci, ConfidenceInterval};
70pub use calibration::{
71 brier_score, calibration_all, expected_calibration_error, nll_from_logits, BinStat,
72 CalibrationResult,
73};
74pub use chrf::{chrf, chrf_plus_plus, chrf_with, ChrfScore};
75pub use dataset::{EvalDataset, EvalExample, McDataset, MultipleChoiceQuestion};
76pub use error::EvalError;
77pub use gsm8k::{Gsm8kEvaluator, Gsm8kResult};
78pub use hellaswag::{HellaSwagDataset, HellaSwagEvaluator, HellaSwagItem, HellaSwagResult};
79pub use meteor::{align_tokens, meteor, meteor_multi, MeteorConfig, MeteorScore};
80pub use mmlu::{MmluEvaluator, MmluResult};
81pub use perplexity::{PerplexityEvaluator, PerplexityResult};
82pub use qa::{
83 corpus_em_f1, exact_match as qa_exact_match, f1_score as qa_f1_score, normalize_answer,
84 normalize_tokens, score_multi as qa_score_multi, QaScore,
85};
86pub use report::{EvalReport, EvalResultEntry};
87pub use rouge::{
88 ngram_counts, tokenize, CorpusRouge, RougeLScore, RougeNScore, RougeSScore, TokenSeq,
89};
90pub use streaming::{OnlineAccuracy, OnlinePerplexity};
91pub use throughput::{percentile, time_fn, ThroughputBenchmark, ThroughputResult};
92pub use truthfulqa::{
93 TruthfulQaDataset, TruthfulQaEvaluator, TruthfulQaItem, TruthfulQaMode, TruthfulQaResult,
94};
95pub use winogrande::{WinoGrandeDataset, WinoGrandeEvaluator, WinoGrandeItem, WinoGrandeResult};