Skip to main content

aster_bench/
bench_config.rs

1use crate::bench_work_dir::BenchmarkWorkDir;
2use serde::{Deserialize, Serialize};
3use std::fs;
4use std::fs::read_to_string;
5use std::path::PathBuf;
6
7#[derive(Clone, Serialize, Deserialize, Debug)]
8pub struct BenchToolShimOpt {
9    pub use_tool_shim: bool,
10    pub tool_shim_model: Option<String>,
11}
12
13#[derive(Clone, Serialize, Deserialize, Debug)]
14pub struct BenchModel {
15    pub provider: String,
16    pub name: String,
17    pub parallel_safe: bool,
18    pub tool_shim: Option<BenchToolShimOpt>,
19}
20#[derive(Clone, Serialize, Deserialize, Debug)]
21pub struct BenchEval {
22    pub selector: String,
23    pub post_process_cmd: Option<PathBuf>,
24    pub parallel_safe: bool,
25}
26#[derive(Clone, Serialize, Deserialize, Debug)]
27pub struct BenchRunConfig {
28    pub models: Vec<BenchModel>,
29    pub evals: Vec<BenchEval>,
30    pub include_dirs: Vec<PathBuf>,
31    pub repeat: Option<usize>,
32    pub run_id: Option<String>,
33    pub output_dir: Option<PathBuf>,
34    pub eval_result_filename: String,
35    pub run_summary_filename: String,
36    pub env_file: Option<PathBuf>,
37}
38
39impl Default for BenchRunConfig {
40    fn default() -> Self {
41        BenchRunConfig {
42            models: vec![
43                BenchModel {
44                    provider: "databricks".to_string(),
45                    name: "aster".to_string(),
46                    parallel_safe: true,
47                    tool_shim: Some(BenchToolShimOpt {
48                        use_tool_shim: false,
49                        tool_shim_model: None,
50                    }),
51                },
52                BenchModel {
53                    provider: "databricks".to_string(),
54                    name: "aster-claude-4-sonnet".to_string(),
55                    parallel_safe: true,
56                    tool_shim: None,
57                },
58            ],
59            evals: vec![BenchEval {
60                selector: "core".into(),
61                post_process_cmd: None,
62                parallel_safe: true, // Default to true
63            }],
64            include_dirs: vec![],
65            repeat: Some(2),
66            run_id: None,
67            output_dir: None,
68            eval_result_filename: "eval-results.json".to_string(),
69            run_summary_filename: "run-results-summary.json".to_string(),
70            env_file: None,
71        }
72    }
73}
74impl BenchRunConfig {
75    pub fn from_string(cfg: String) -> anyhow::Result<Self> {
76        let mut config: Self = serde_json::from_str(cfg.as_str())?;
77        // update include_dirs to contain full-paths only
78        config.include_dirs = BenchmarkWorkDir::canonical_dirs(config.include_dirs);
79        Self::canonicalize_eval_post_proc_cmd(&mut config);
80        Ok(config)
81    }
82
83    fn canonicalize_eval_post_proc_cmd(config: &mut BenchRunConfig) {
84        // update eval post-process script paths to all be full-paths
85        config.evals.iter_mut().for_each(|eval| {
86            if let Some(post_process_cmd) = &eval.post_process_cmd {
87                let canon = BenchmarkWorkDir::canonical_dirs(vec![post_process_cmd.clone()]);
88                let full_path_cmd = canon[0].clone();
89                if !full_path_cmd.exists() {
90                    panic!("BenchConfigError: Eval post-process command not found. File {:?} does not exist", full_path_cmd);
91                }
92                eval.post_process_cmd = Some(full_path_cmd);
93            }
94        });
95    }
96    pub fn from(cfg: PathBuf) -> anyhow::Result<Self> {
97        let config = Self::from_string(read_to_string(cfg)?)?;
98        Ok(config)
99    }
100
101    pub fn to_string(&self) -> anyhow::Result<String> {
102        Ok(serde_json::to_string_pretty(self)?)
103    }
104
105    pub fn save(&self, name: String) {
106        let config = self.to_string().unwrap();
107        fs::write(name, config).expect("Unable to write bench config file");
108    }
109}