aster_bench/eval_suites/
evaluation.rs1use crate::bench_session::BenchAgent;
2use crate::bench_work_dir::BenchmarkWorkDir;
3use anyhow::Result;
4use async_trait::async_trait;
5use serde::{Deserialize, Serialize};
6use std::fmt;
7
8pub type Model = (String, String);
9pub type Extension = String;
10
11#[derive(Debug, Deserialize, Serialize, Clone)]
12pub enum EvalMetricValue {
13 Integer(i64),
14 Float(f64),
15 String(String),
16 Boolean(bool),
17}
18
19impl fmt::Display for EvalMetricValue {
20 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
21 match self {
22 EvalMetricValue::Integer(i) => write!(f, "{}", i),
23 EvalMetricValue::Float(fl) => write!(f, "{:.2}", fl),
24 EvalMetricValue::String(s) => write!(f, "{}", s),
25 EvalMetricValue::Boolean(b) => write!(f, "{}", b),
26 }
27 }
28}
29#[derive(Debug, Serialize)]
30pub struct EvalMetric {
31 pub name: String,
32 pub value: EvalMetricValue,
33}
34
35#[derive(Debug, Default)]
36pub struct ExtensionRequirements {
37 pub builtin: Vec<String>,
38 pub external: Vec<String>,
39 pub streamable_http: Vec<String>,
40}
41
42#[async_trait]
43pub trait Evaluation: Send + Sync {
44 async fn run(
45 &self,
46 agent: &mut BenchAgent,
47 run_loc: &mut BenchmarkWorkDir,
48 ) -> Result<Vec<(String, EvalMetricValue)>>;
49
50 fn name(&self) -> &str;
51
52 fn required_extensions(&self) -> ExtensionRequirements {
53 ExtensionRequirements {
54 builtin: Vec::new(),
55 external: Vec::new(),
56 streamable_http: Vec::new(),
57 }
58 }
59}