collet 0.1.1 - Docs.rs

//! TrialRunner & Evolvable — bridge between Collet's agent loop and evolution.
//!
//! [`Evolvable`] is the trait that Collet agents must implement to participate
//! in evolution.  [`TrialRunner`] wraps an evolvable agent + benchmark to
//! provide the `solve + evaluate` capability that engines may use for live
//! validation.

use anyhow::Result;
use async_trait::async_trait;

use super::types::{Feedback, Observation, Task, Trajectory};

// ---------------------------------------------------------------------------
// Evolvable trait — the bridge from Collet's agent to evolution
// ---------------------------------------------------------------------------

/// Trait that a Collet agent must implement to be evolvable.
///
/// This bridges Collet's event-driven agent loop to the evolution system's
/// request-response pattern.  The adapter wraps Collet's `agent::loop::run()`,
/// spawns it with a task as user message, collects events until `AgentEvent::Done`,
/// and packages the result as a [`Trajectory`].
#[async_trait]
pub trait Evolvable: Send + Sync {
    /// Solve a single task, returning the execution trace.
    async fn solve(&self, task: &Task) -> Result<Trajectory>;

    /// Export in-memory state to the workspace filesystem.
    ///
    /// Called after the solve phase so the observer can capture any state
    /// not already persisted (e.g., session data, accumulated skills).
    async fn export_to_fs(&self) -> Result<()>;

    /// Reload agent state from the (possibly mutated) workspace filesystem.
    ///
    /// Called after the evolution engine mutates the workspace so the agent
    /// picks up improved prompts, skills, and memory.
    async fn reload_from_fs(&self) -> Result<()>;
}

// ---------------------------------------------------------------------------
// BenchmarkAdapter trait
// ---------------------------------------------------------------------------

/// Trait for benchmark datasets that provide tasks and evaluation.
///
/// Maps to A-Evolve's `BenchmarkAdapter`.  Implement `get_tasks()` to supply
/// tasks and `evaluate()` to score agent trajectories.
#[async_trait]
pub trait BenchmarkAdapter: Send + Sync {
    /// Fetch a batch of tasks from the benchmark.
    async fn get_tasks(&self, split: &str, limit: usize) -> Result<Vec<Task>>;

    /// Evaluate an agent's trajectory against the ground truth.
    async fn evaluate(&self, task: &Task, trajectory: &Trajectory) -> Result<Feedback>;
}

// ---------------------------------------------------------------------------
// TrialRunner
// ---------------------------------------------------------------------------

/// Run the agent on benchmark tasks and return observations.
///
/// This is a *capability*, not a requirement.  Engines that do not need
/// live validation can simply ignore the `trial` argument in `step()`.
pub struct TrialRunner {
    agent: Box<dyn Evolvable>,
    benchmark: Box<dyn BenchmarkAdapter>,
}

impl TrialRunner {
    pub fn new(agent: Box<dyn Evolvable>, benchmark: Box<dyn BenchmarkAdapter>) -> Self {
        Self { agent, benchmark }
    }

    /// Run the agent on `tasks` and return observations.
    pub async fn run_tasks(&self, tasks: &[Task]) -> Vec<Observation> {
        let mut results = Vec::new();
        for task in tasks {
            match self.run_single(task).await {
                Ok(obs) => results.push(obs),
                Err(e) => tracing::error!(task_id = %task.id, "TrialRunner error: {e}"),
            }
        }
        results
    }

    /// Convenience: run one task and return its observation.
    pub async fn run_single(&self, task: &Task) -> Result<Observation> {
        let trajectory = self.agent.solve(task).await?;
        let feedback = self.benchmark.evaluate(task, &trajectory).await?;
        Ok(Observation {
            task: task.clone(),
            trajectory,
            feedback,
        })
    }

    /// Fetch tasks from the benchmark dataset.
    pub async fn get_tasks(&self, split: &str, limit: usize) -> Result<Vec<Task>> {
        self.benchmark.get_tasks(split, limit).await
    }

    /// Get a reference to the underlying evolvable agent.
    pub fn agent(&self) -> &dyn Evolvable {
        self.agent.as_ref()
    }
}