use anyhow::Result;
use async_trait::async_trait;
use super::types::{Feedback, Observation, Task, Trajectory};
#[async_trait]
pub trait Evolvable: Send + Sync {
async fn solve(&self, task: &Task) -> Result<Trajectory>;
async fn export_to_fs(&self) -> Result<()>;
async fn reload_from_fs(&self) -> Result<()>;
}
#[async_trait]
pub trait BenchmarkAdapter: Send + Sync {
async fn get_tasks(&self, split: &str, limit: usize) -> Result<Vec<Task>>;
async fn evaluate(&self, task: &Task, trajectory: &Trajectory) -> Result<Feedback>;
}
pub struct TrialRunner {
agent: Box<dyn Evolvable>,
benchmark: Box<dyn BenchmarkAdapter>,
}
impl TrialRunner {
pub fn new(agent: Box<dyn Evolvable>, benchmark: Box<dyn BenchmarkAdapter>) -> Self {
Self { agent, benchmark }
}
pub async fn run_tasks(&self, tasks: &[Task]) -> Vec<Observation> {
let mut results = Vec::new();
for task in tasks {
match self.run_single(task).await {
Ok(obs) => results.push(obs),
Err(e) => tracing::error!(task_id = %task.id, "TrialRunner error: {e}"),
}
}
results
}
pub async fn run_single(&self, task: &Task) -> Result<Observation> {
let trajectory = self.agent.solve(task).await?;
let feedback = self.benchmark.evaluate(task, &trajectory).await?;
Ok(Observation {
task: task.clone(),
trajectory,
feedback,
})
}
pub async fn get_tasks(&self, split: &str, limit: usize) -> Result<Vec<Task>> {
self.benchmark.get_tasks(split, limit).await
}
pub fn agent(&self) -> &dyn Evolvable {
self.agent.as_ref()
}
}