Expand description
VIL Evaluation Framework (H10).
Provides metrics, datasets, batch evaluation, and reporting for LLM output quality.
use vil_eval::{EvalDataset, EvalCase, EvalRunner, AnswerRelevance};
let mut dataset = EvalDataset::new();
dataset.add_case(EvalCase {
question: "What is Rust?".to_string(),
context: "Rust is a systems programming language.".to_string(),
answer: "Rust is a systems programming language focused on safety.".to_string(),
reference: None,
});
let runner = EvalRunner::new(dataset).add_metric(Box::new(AnswerRelevance));
let report = runner.run();
assert_eq!(report.case_count(), 1);Re-exports§
pub use dataset::EvalCase;pub use dataset::EvalDataset;pub use evaluator::EvalMetric;pub use evaluator::MetricScore;pub use metrics::AnswerLength;pub use metrics::AnswerRelevance;pub use metrics::ContextRecall;pub use metrics::Faithfulness;pub use report::CaseResult;pub use report::EvalReport;pub use runner::EvalRunner;pub use plugin::EvalPlugin;pub use semantic::EvalFault;pub use semantic::EvalRunEvent;pub use semantic::EvalState;