Skip to main content

aster_bench/eval_suites/
evaluation.rs

1use crate::bench_session::BenchAgent;
2use crate::bench_work_dir::BenchmarkWorkDir;
3use anyhow::Result;
4use async_trait::async_trait;
5use serde::{Deserialize, Serialize};
6use std::fmt;
7
8pub type Model = (String, String);
9pub type Extension = String;
10
11#[derive(Debug, Deserialize, Serialize, Clone)]
12pub enum EvalMetricValue {
13    Integer(i64),
14    Float(f64),
15    String(String),
16    Boolean(bool),
17}
18
19impl fmt::Display for EvalMetricValue {
20    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
21        match self {
22            EvalMetricValue::Integer(i) => write!(f, "{}", i),
23            EvalMetricValue::Float(fl) => write!(f, "{:.2}", fl),
24            EvalMetricValue::String(s) => write!(f, "{}", s),
25            EvalMetricValue::Boolean(b) => write!(f, "{}", b),
26        }
27    }
28}
29#[derive(Debug, Serialize)]
30pub struct EvalMetric {
31    pub name: String,
32    pub value: EvalMetricValue,
33}
34
35#[derive(Debug, Default)]
36pub struct ExtensionRequirements {
37    pub builtin: Vec<String>,
38    pub external: Vec<String>,
39    pub streamable_http: Vec<String>,
40}
41
42#[async_trait]
43pub trait Evaluation: Send + Sync {
44    async fn run(
45        &self,
46        agent: &mut BenchAgent,
47        run_loc: &mut BenchmarkWorkDir,
48    ) -> Result<Vec<(String, EvalMetricValue)>>;
49
50    fn name(&self) -> &str;
51
52    fn required_extensions(&self) -> ExtensionRequirements {
53        ExtensionRequirements {
54            builtin: Vec::new(),
55            external: Vec::new(),
56            streamable_http: Vec::new(),
57        }
58    }
59}