Skip to main content

aster_cli/commands/
bench.rs

1use crate::session::build_session;
2use crate::session::SessionBuilderConfig;
3use crate::{logging, CliSession};
4use aster::conversation::Conversation;
5use aster_bench::bench_session::{BenchAgent, BenchBaseSession};
6use aster_bench::eval_suites::ExtensionRequirements;
7use async_trait::async_trait;
8use std::sync::Arc;
9use tokio::sync::Mutex;
10
11// allow session obj to be used in benchmarking
12#[async_trait]
13impl BenchBaseSession for CliSession {
14    async fn headless(&mut self, message: String) -> anyhow::Result<()> {
15        self.headless(message).await
16    }
17    fn message_history(&self) -> Conversation {
18        self.message_history()
19    }
20    fn get_total_token_usage(&self) -> anyhow::Result<Option<i32>> {
21        // Since the trait requires sync but the session method is async,
22        // we need to block on the async call
23        tokio::task::block_in_place(|| {
24            tokio::runtime::Handle::current().block_on(self.get_total_token_usage())
25        })
26    }
27
28    fn get_session_id(&self) -> anyhow::Result<String> {
29        Ok(self.session_id().to_string())
30    }
31}
32pub async fn agent_generator(
33    requirements: ExtensionRequirements,
34    session_id: String,
35) -> BenchAgent {
36    let base_session = build_session(SessionBuilderConfig {
37        session_id: Some(session_id),
38        resume: false,
39        no_session: false,
40        extensions: requirements.external,
41        streamable_http_extensions: requirements.streamable_http,
42        builtins: requirements.builtin,
43        extensions_override: None,
44        additional_system_prompt: None,
45        settings: None,
46        provider: None,
47        model: None,
48        debug: false,
49        max_tool_repetitions: None,
50        interactive: false, // Benchmarking is non-interactive
51        scheduled_job_id: None,
52        max_turns: None,
53        quiet: false,
54        sub_recipes: None,
55        final_output_response: None,
56        retry_config: None,
57        output_format: "text".to_string(),
58    })
59    .await;
60
61    let bench_agent = BenchAgent::new(Box::new(base_session));
62
63    let errors = Some(Arc::new(Mutex::new(bench_agent.get_errors().await)));
64    logging::setup_logging(Some("bench"), errors).expect("Failed to initialize logging");
65
66    bench_agent
67}