vtcode 0.99.1

A Rust-based terminal coding agent with modular architecture supporting multiple LLM providers
use anyhow::{Context, Result, anyhow, bail};
use std::str::FromStr;
use vtcode_core::config::VTCodeConfig;
use vtcode_core::config::models::ModelId;
use vtcode_core::config::types::AgentConfig as CoreAgentConfig;
use vtcode_core::core::agent::runner::{AgentRunner, RunnerSettings};
use vtcode_core::core::agent::types::AgentType;
use vtcode_core::utils::file_utils::write_file_with_context_sync;

use crate::startup::ensure_full_auto_workspace_trust;

use super::BenchmarkCommandOptions;
use super::report::{BenchmarkReport, BenchmarkTaskReport};
use super::spec::load_prepared_tasks;

const ERROR_FULL_AUTO_REQUIRED: &str =
    "Benchmark runs require --full-auto/--auto with [automation.full_auto] enabled.";
const ERROR_SPEC_EMPTY: &str = "Benchmark specification is empty.";

pub async fn handle_benchmark_command(
    config: &CoreAgentConfig,
    vt_cfg: &VTCodeConfig,
    options: BenchmarkCommandOptions,
    full_auto_requested: bool,
) -> Result<()> {
    if !full_auto_requested {
        bail!(ERROR_FULL_AUTO_REQUIRED);
    }

    let automation_cfg = &vt_cfg.automation.full_auto;
    if !automation_cfg.enabled {
        bail!(ERROR_FULL_AUTO_REQUIRED);
    }

    if !ensure_full_auto_workspace_trust(&config.workspace).await? {
        return Ok(());
    }

    let mut tasks = load_prepared_tasks(&options, &config.workspace)?;
    if tasks.is_empty() {
        bail!(ERROR_SPEC_EMPTY);
    }

    if let Some(limit) = options.max_tasks {
        if limit == 0 {
            bail!("--max-tasks must be greater than zero when provided.");
        }
        if tasks.len() > limit {
            tasks.truncate(limit);
        }
    }

    let model_id = ModelId::from_str(&config.model).with_context(|| {
        format!(
            "Model '{}' is not recognized for benchmark execution. Update vtcode.toml to a supported identifier.",
            config.model
        )
    })?;

    let session_id = format!(
        "benchmark-{}",
        std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .map_err(|err| anyhow!("Failed to derive session identifier timestamp: {}", err))?
            .as_secs()
    );

    let mut runner = AgentRunner::new_with_openai_auth(
        AgentType::Single,
        model_id,
        config.api_key.clone(),
        config.workspace.clone(),
        session_id,
        RunnerSettings {
            reasoning_effort: Some(config.reasoning_effort),
            verbosity: None,
        },
        None,
        config.openai_chatgpt_auth.clone(),
    )
    .await?;
    runner.enable_full_auto(&automation_cfg.allowed_tools).await;

    let mut reports = Vec::with_capacity(tasks.len());
    let max_retries = vt_cfg.agent.max_task_retries;
    for prepared in &tasks {
        let result = runner
            .execute_task_with_retry(&prepared.task, &prepared.contexts, max_retries)
            .await
            .with_context(|| {
                format!(
                    "Failed to execute task '{}' after retries",
                    prepared.task.id
                )
            })?;
        reports.push(BenchmarkTaskReport::from_task_result(
            &prepared.task,
            result,
        ));
    }

    let report = BenchmarkReport {
        model: config.model.clone(),
        provider: config.provider.clone(),
        workspace: config.workspace.display().to_string(),
        task_count: reports.len(),
        tasks: reports,
    };

    let serialized = serde_json::to_string_pretty(&report)
        .context("Failed to serialize benchmark report to JSON")?;

    if let Some(path) = &options.output {
        write_file_with_context_sync(path, &serialized, "benchmark report")?;
    }

    println!("{}", serialized);
    Ok(())
}