dci-tool 0.1.0 - Docs.rs

//! Agentic evaluation.
//!
//! This module implements the `rig_retrieval_evals::agents::AgentEvalRunner`
//! trait for `DciAgent`. This enables end-to-end evaluation of the DCI
//! agent's full multi-turn interaction loop, tracking the final answers,
//! tool calls made, and number of turns taken to reach the answer.

use std::future::Future;
use std::pin::Pin;

use rig_core::completion::{CompletionModel, Prompt};
use rig_retrieval_evals::agents::{
    AgentEvalRunner, AgentEvalTask, AgentObservation, AgentToolCall,
};

use crate::agent::DciAgent;

impl<M: CompletionModel + 'static, P: rig_core::agent::PromptHook<M> + 'static> AgentEvalRunner
    for DciAgent<M, P>
{
    fn run<'a>(
        &'a self,
        task: &'a AgentEvalTask,
    ) -> Pin<
        Box<
            dyn Future<Output = std::result::Result<AgentObservation, rig_retrieval_evals::Error>>
                + Send
                + 'a,
        >,
    > {
        Box::pin(async move {
            let max_turns = task.max_turns.unwrap_or_else(|| self.max_turns());

            let response = self
                .agent()
                .prompt(&task.prompt)
                .max_turns(max_turns)
                .extended_details()
                .await
                .map_err(|e| {
                    rig_retrieval_evals::Error::Config(format!("Agent eval failed: {}", e))
                })?;

            let mut tool_calls = Vec::new();
            if let Some(messages) = &response.messages {
                for msg in messages {
                    if let rig_core::message::Message::Assistant { content, .. } = msg {
                        for item in content.clone() {
                            if let rig_core::completion::AssistantContent::ToolCall(tc) = item {
                                tool_calls.push(AgentToolCall {
                                    name: tc.function.name.clone(),
                                    arguments: tc.function.arguments.clone(),
                                    ok: None, // Rig doesn't explicitly track success in the Assistant message itself.
                                });
                            }
                        }
                    }
                }
            }

            Ok(AgentObservation {
                final_output: response.output,
                tool_calls,
                turns: Some(response.completion_calls.len()),
                metadata: std::collections::BTreeMap::new(),
            })
        })
    }
}