Skip to main content

vibe_tests/engine/
engine_tests.rs

1//! Test engine — main entry point for MCP integration testing.
2//! Combines environment, callbacks, and runtime state.
3
4use std::fs;
5use std::time::Duration;
6
7use serde_json::{Value, json};
8use tracing::Level;
9
10use crate::base::error::{TestError, TestsResult};
11use crate::base::result::{TestModelResult, TestResult};
12use crate::docker::compose::Compose;
13use crate::engine::engine_builder::EngineBuilder;
14use crate::engine::engine_dialog::Dialog;
15use crate::engine::engine_env::EngineEnv;
16use crate::engine::engine_events::EngineEvents;
17use crate::engine::engine_report::EngineReport;
18use crate::engine::engine_state::EngineState;
19use crate::env::env_start::EnvStart;
20use crate::env::env_stop::EnvStop;
21use crate::mcp::client::McpClient;
22use crate::mcp::runner::Runner;
23use crate::ollama::client::OllamaClient;
24
25/// Test engine ready to execute queries.
26/// Created via EngineTests::builder().
27pub struct EngineTests {
28    /// Static configuration: hosts, models, log level.
29    pub env: EngineEnv,
30    /// Lifecycle callbacks: on_start, on_run, on_stop, on_log.
31    pub events: EngineEvents,
32    /// Runtime state: home dir, child processes, timing.
33    pub state: EngineState,
34}
35
36impl EngineTests {
37    /// Creates a new builder.
38    pub fn builder() -> EngineBuilder {
39        EngineBuilder::new()
40    }
41
42    /// Initialize infrastructure: compose up, on_start, health check.
43    pub async fn init(&mut self) -> TestsResult<()> {
44        // Skip if already initialized
45        if self.state.initialized {
46            return Ok(());
47        }
48        self.state.initialized = true;
49
50        // Initialize tracing subscriber (writes to log file via tee)
51        let tee = self.state.tee.clone();
52        tracing_subscriber::fmt()
53            .with_writer(move || tee.clone())
54            .with_max_level(Level::TRACE)
55            .try_init()
56            .ok();
57
58        // Start docker compose if configured
59        if let Some(compose_file) = &self.env.compose_file {
60            self.state.compose = Some(
61                Compose::new(compose_file, self.state.tee.clone(), self.env.timeout)
62                    .up()
63                    .await?,
64            );
65        }
66
67        // Call on_start — user does setup, then we wait for MCP health
68        if let Some(on_start) = self.events.on_start.take() {
69            self.state.start_data = Some(
70                on_start(EnvStart {
71                    home: self.state.home.path().to_path_buf(),
72                    tee: self.state.tee.clone(),
73                })
74                .await?,
75            );
76            tokio::time::sleep(Duration::from_secs(2)).await;
77        }
78
79        // Wait for MCP server to become ready
80        let runner = Runner::new(&self.env.mcp_host, self.env.timeout);
81        runner.wait_healthy().await?;
82        self.state.runner = Some(runner);
83
84        Ok(())
85    }
86
87    /// Runs a single test query against the MCP server via LLM across all models.
88    pub async fn test(&self, query: &str) -> TestResult {
89        let mut models = Vec::new();
90
91        for model in &self.env.ollama_models {
92            let start = std::time::Instant::now();
93            EngineReport::trace_start(query, model);
94
95            // Setup Ollama client and optionally unload other models
96            let ollama = OllamaClient::new(&self.env.ollama_host);
97            if self.env.ollama_exclusive {
98                if let Err(e) = ollama.unload_except(model).await {
99                    tracing::warn!("Failed to unload models: {}", e);
100                }
101            }
102
103            // Connect to MCP server and list available tools
104            let mcp = McpClient::new(&self.env.mcp_host).await.unwrap();
105            let tools = mcp.list_tools().await.unwrap();
106            let tool_values: Vec<Value> = tools
107                .iter()
108                .map(|t| {
109                    json!({
110                        "type": "function",
111                        "function": {
112                            "name": t.name,
113                            "description": t.description,
114                            "parameters": t.input_schema
115                        }
116                    })
117                })
118                .collect();
119
120            let dialog = Dialog::new(ollama, mcp, model.clone(), tool_values, self.env.timeout);
121
122            // Run dialog and collect trace + result
123            let duration_ms = start.elapsed().as_millis() as u64;
124            let model_result = match dialog.run(query).await {
125                Ok(r) => {
126                    EngineReport::trace_ok(
127                        query,
128                        model,
129                        &r.tool,
130                        &r.args,
131                        &r.model_response,
132                        &r.tool_response,
133                        duration_ms,
134                    );
135                    TestModelResult {
136                        model: model.clone(),
137                        tool: Some(r.tool),
138                        model_response: Some(r.model_response),
139                        tool_response: Some(r.tool_response),
140                        code: None,
141                    }
142                }
143                Err(e) => {
144                    let (tool, args, code) = match &e {
145                        TestError::ToolCall(r) => (r.tool.as_deref(), r.args.as_deref(), r.code),
146                        _ => (None, None, -1),
147                    };
148                    EngineReport::trace_fail(
149                        query,
150                        model,
151                        tool,
152                        args,
153                        &e.to_string(),
154                        code,
155                        duration_ms,
156                    );
157                    TestModelResult {
158                        model: model.clone(),
159                        tool: tool.map(String::from),
160                        model_response: None,
161                        tool_response: None,
162                        code: Some(code),
163                    }
164                }
165            };
166            models.push(model_result);
167        }
168
169        // Success if all models called a tool without error
170        let success = models.iter().all(|m| m.tool.is_some() && m.code.is_none());
171        TestResult { success, models }
172    }
173
174    /// Manual cleanup: stops compose, stops MCP runner, runs on_stop.
175    /// Called automatically on drop and by #[dtor] on process exit.
176    pub fn shutdown(&mut self) {
177        // Build report from log file
178        let report =
179            EngineReport::from_log(&self.state.tee.path().to_string_lossy(), &self.env.mcp_host);
180        // User callback with data from on_start
181        if let Some(on_stop) = self.events.on_stop.take() {
182            on_stop(EnvStop {
183                home: self.state.home.path().to_path_buf(),
184                log_file: self.state.tee.path().to_path_buf(),
185                duration: self.state.start_time.elapsed(),
186                data: self.state.start_data.take().unwrap_or(None),
187                report,
188            });
189        }
190        // Wait for MCP server to stop (killed by user in on_stop)
191        if let Some(runner) = &self.state.runner {
192            tracing::debug!("Checking if MCP server stopped...");
193            match runner.wait_dead() {
194                Ok(()) => tracing::debug!("MCP server stopped"),
195                Err(_) => tracing::warn!("MCP server may still be running"),
196            }
197        }
198        // Stop docker compose
199        if let Some(mut compose) = self.state.compose.take() {
200            compose.down();
201        }
202        // Remove temp directory (static won't drop TempDir automatically)
203        let _ = fs::remove_dir_all(self.state.home.path());
204    }
205}
206
207/// Cleans up on drop: runs on_stop callback, kills child processes.
208/// TempDir self-destructs after this, deleting the isolated home directory.
209impl Drop for EngineTests {
210    fn drop(&mut self) {
211        self.shutdown();
212    }
213}