vibe-tests 0.0.1

Integration test framework for MCP servers with LLM-powered tool calling.
Documentation
//! Test engine — main entry point for MCP integration testing.
//! Combines environment, callbacks, and runtime state.

use std::fs;
use std::time::Duration;

use serde_json::{Value, json};
use tracing::Level;

use crate::base::error::{TestError, TestsResult};
use crate::base::result::{TestModelResult, TestResult};
use crate::docker::compose::Compose;
use crate::engine::engine_builder::EngineBuilder;
use crate::engine::engine_dialog::Dialog;
use crate::engine::engine_env::EngineEnv;
use crate::engine::engine_events::EngineEvents;
use crate::engine::engine_report::EngineReport;
use crate::engine::engine_state::EngineState;
use crate::env::env_start::EnvStart;
use crate::env::env_stop::EnvStop;
use crate::mcp::client::McpClient;
use crate::mcp::runner::Runner;
use crate::ollama::client::OllamaClient;

/// Test engine ready to execute queries.
/// Created via EngineTests::builder().
pub struct EngineTests {
    /// Static configuration: hosts, models, log level.
    pub env: EngineEnv,
    /// Lifecycle callbacks: on_start, on_run, on_stop, on_log.
    pub events: EngineEvents,
    /// Runtime state: home dir, child processes, timing.
    pub state: EngineState,
}

impl EngineTests {
    /// Creates a new builder.
    pub fn builder() -> EngineBuilder {
        EngineBuilder::new()
    }

    /// Initialize infrastructure: compose up, on_start, health check.
    pub async fn init(&mut self) -> TestsResult<()> {
        // Skip if already initialized
        if self.state.initialized {
            return Ok(());
        }
        self.state.initialized = true;

        // Initialize tracing subscriber (writes to log file via tee)
        let tee = self.state.tee.clone();
        tracing_subscriber::fmt()
            .with_writer(move || tee.clone())
            .with_max_level(Level::TRACE)
            .try_init()
            .ok();

        // Start docker compose if configured
        if let Some(compose_file) = &self.env.compose_file {
            self.state.compose = Some(
                Compose::new(compose_file, self.state.tee.clone(), self.env.timeout)
                    .up()
                    .await?,
            );
        }

        // Call on_start — user does setup, then we wait for MCP health
        if let Some(on_start) = self.events.on_start.take() {
            self.state.start_data = Some(
                on_start(EnvStart {
                    home: self.state.home.path().to_path_buf(),
                    tee: self.state.tee.clone(),
                })
                .await?,
            );
            tokio::time::sleep(Duration::from_secs(2)).await;
        }

        // Wait for MCP server to become ready
        let runner = Runner::new(&self.env.mcp_host, self.env.timeout);
        runner.wait_healthy().await?;
        self.state.runner = Some(runner);

        Ok(())
    }

    /// Runs a single test query against the MCP server via LLM across all models.
    pub async fn test(&self, query: &str) -> TestResult {
        let mut models = Vec::new();

        for model in &self.env.ollama_models {
            let start = std::time::Instant::now();
            EngineReport::trace_start(query, model);

            // Setup Ollama client and optionally unload other models
            let ollama = OllamaClient::new(&self.env.ollama_host);
            if self.env.ollama_exclusive {
                if let Err(e) = ollama.unload_except(model).await {
                    tracing::warn!("Failed to unload models: {}", e);
                }
            }

            // Connect to MCP server and list available tools
            let mcp = McpClient::new(&self.env.mcp_host).await.unwrap();
            let tools = mcp.list_tools().await.unwrap();
            let tool_values: Vec<Value> = tools
                .iter()
                .map(|t| {
                    json!({
                        "type": "function",
                        "function": {
                            "name": t.name,
                            "description": t.description,
                            "parameters": t.input_schema
                        }
                    })
                })
                .collect();

            let dialog = Dialog::new(ollama, mcp, model.clone(), tool_values, self.env.timeout);

            // Run dialog and collect trace + result
            let duration_ms = start.elapsed().as_millis() as u64;
            let model_result = match dialog.run(query).await {
                Ok(r) => {
                    EngineReport::trace_ok(
                        query,
                        model,
                        &r.tool,
                        &r.args,
                        &r.model_response,
                        &r.tool_response,
                        duration_ms,
                    );
                    TestModelResult {
                        model: model.clone(),
                        tool: Some(r.tool),
                        model_response: Some(r.model_response),
                        tool_response: Some(r.tool_response),
                        code: None,
                    }
                }
                Err(e) => {
                    let (tool, args, code) = match &e {
                        TestError::ToolCall(r) => (r.tool.as_deref(), r.args.as_deref(), r.code),
                        _ => (None, None, -1),
                    };
                    EngineReport::trace_fail(
                        query,
                        model,
                        tool,
                        args,
                        &e.to_string(),
                        code,
                        duration_ms,
                    );
                    TestModelResult {
                        model: model.clone(),
                        tool: tool.map(String::from),
                        model_response: None,
                        tool_response: None,
                        code: Some(code),
                    }
                }
            };
            models.push(model_result);
        }

        // Success if all models called a tool without error
        let success = models.iter().all(|m| m.tool.is_some() && m.code.is_none());
        TestResult { success, models }
    }

    /// Manual cleanup: stops compose, stops MCP runner, runs on_stop.
    /// Called automatically on drop and by #[dtor] on process exit.
    pub fn shutdown(&mut self) {
        // Build report from log file
        let report =
            EngineReport::from_log(&self.state.tee.path().to_string_lossy(), &self.env.mcp_host);
        // User callback with data from on_start
        if let Some(on_stop) = self.events.on_stop.take() {
            on_stop(EnvStop {
                home: self.state.home.path().to_path_buf(),
                log_file: self.state.tee.path().to_path_buf(),
                duration: self.state.start_time.elapsed(),
                data: self.state.start_data.take().unwrap_or(None),
                report,
            });
        }
        // Wait for MCP server to stop (killed by user in on_stop)
        if let Some(runner) = &self.state.runner {
            tracing::debug!("Checking if MCP server stopped...");
            match runner.wait_dead() {
                Ok(()) => tracing::debug!("MCP server stopped"),
                Err(_) => tracing::warn!("MCP server may still be running"),
            }
        }
        // Stop docker compose
        if let Some(mut compose) = self.state.compose.take() {
            compose.down();
        }
        // Remove temp directory (static won't drop TempDir automatically)
        let _ = fs::remove_dir_all(self.state.home.path());
    }
}

/// Cleans up on drop: runs on_stop callback, kills child processes.
/// TempDir self-destructs after this, deleting the isolated home directory.
impl Drop for EngineTests {
    fn drop(&mut self) {
        self.shutdown();
    }
}