pub mod accuracy;
pub mod bleu;
pub mod bootstrap;
pub mod calibration;
pub mod chrf;
pub mod dataset;
pub mod error;
pub mod meteor;
pub mod perplexity;
pub mod qa;
pub mod report;
pub mod rouge;
pub mod streaming;
pub mod throughput;
#[cfg(test)]
mod tests;
pub use accuracy::{
AccuracyResult, ExactMatchEvaluator, LogitMcResult, McEvaluator, McLogitEvaluator,
};
pub use bleu::{corpus_bleu, sentence_bleu, BleuConfig, BleuScore, SmoothingMethod};
pub use bootstrap::{bootstrap_ci, ConfidenceInterval};
pub use calibration::{
brier_score, calibration_all, expected_calibration_error, nll_from_logits, BinStat,
CalibrationResult,
};
pub use chrf::{chrf, chrf_plus_plus, chrf_with, ChrfScore};
pub use dataset::{EvalDataset, EvalExample, McDataset, MultipleChoiceQuestion};
pub use error::EvalError;
pub use meteor::{align_tokens, meteor, meteor_multi, MeteorConfig, MeteorScore};
pub use perplexity::{PerplexityEvaluator, PerplexityResult};
pub use qa::{
corpus_em_f1, exact_match as qa_exact_match, f1_score as qa_f1_score, normalize_answer,
normalize_tokens, score_multi as qa_score_multi, QaScore,
};
pub use report::{EvalReport, EvalResultEntry};
pub use rouge::{
ngram_counts, tokenize, CorpusRouge, RougeLScore, RougeNScore, RougeSScore, TokenSeq,
};
pub use streaming::{OnlineAccuracy, OnlinePerplexity};
pub use throughput::{percentile, time_fn, ThroughputBenchmark, ThroughputResult};