use std::time::Instant;
use crate::agent::Agent;
use crate::eval::scenario::EvalScenario;
#[derive(Debug)]
pub struct EvalResult {
pub scenario_input: String,
pub passed: bool,
pub output: String,
pub iterations: usize,
pub latency: std::time::Duration,
pub cost: f64,
pub scorer_results: Vec<bool>,
pub error: Option<String>,
}
pub struct EvalRunner<'a> {
agent: &'a Agent,
concurrency: usize,
}
impl<'a> EvalRunner<'a> {
pub fn new(agent: &'a Agent) -> Self {
Self {
agent,
concurrency: 1,
}
}
pub fn concurrency(mut self, n: usize) -> Self {
self.concurrency = n.max(1);
self
}
pub async fn run(&self, scenarios: &[EvalScenario]) -> Vec<EvalResult> {
let mut results = Vec::with_capacity(scenarios.len());
for chunk in scenarios.chunks(self.concurrency) {
let mut handles = Vec::with_capacity(chunk.len());
for scenario in chunk {
handles.push(self.run_one(scenario));
}
for handle in handles {
results.push(handle.await);
}
}
results
}
async fn run_one(&self, scenario: &EvalScenario) -> EvalResult {
let start = Instant::now();
let result = self.agent.prompt(&scenario.input).await;
let latency = start.elapsed();
match result {
Ok(response) => {
let mut scorer_results = Vec::with_capacity(scenario.scorers.len());
for scorer in &scenario.scorers {
scorer_results.push(scorer.evaluate(&response.final_text).await);
}
let passed = scorer_results.iter().all(|r| *r);
EvalResult {
scenario_input: scenario.input.clone(),
passed,
output: response.final_text,
iterations: response.iterations,
latency,
cost: response.cost,
scorer_results,
error: None,
}
}
Err(e) => EvalResult {
scenario_input: scenario.input.clone(),
passed: false,
output: String::new(),
iterations: 0,
latency,
cost: 0.0,
scorer_results: Vec::new(),
error: Some(e.to_string()),
},
}
}
}