vibe_tests/engine/
engine_tests.rs1use std::fs;
5use std::time::Duration;
6
7use serde_json::{Value, json};
8use tracing::Level;
9
10use crate::base::error::{TestError, TestsResult};
11use crate::base::result::{TestModelResult, TestResult};
12use crate::docker::compose::Compose;
13use crate::engine::engine_builder::EngineBuilder;
14use crate::engine::engine_dialog::Dialog;
15use crate::engine::engine_env::EngineEnv;
16use crate::engine::engine_events::EngineEvents;
17use crate::engine::engine_report::EngineReport;
18use crate::engine::engine_state::EngineState;
19use crate::env::env_start::EnvStart;
20use crate::env::env_stop::EnvStop;
21use crate::mcp::client::McpClient;
22use crate::mcp::runner::Runner;
23use crate::ollama::client::OllamaClient;
24
25pub struct EngineTests {
28 pub env: EngineEnv,
30 pub events: EngineEvents,
32 pub state: EngineState,
34}
35
36impl EngineTests {
37 pub fn builder() -> EngineBuilder {
39 EngineBuilder::new()
40 }
41
42 pub async fn init(&mut self) -> TestsResult<()> {
44 if self.state.initialized {
46 return Ok(());
47 }
48 self.state.initialized = true;
49
50 let tee = self.state.tee.clone();
52 tracing_subscriber::fmt()
53 .with_writer(move || tee.clone())
54 .with_max_level(Level::TRACE)
55 .try_init()
56 .ok();
57
58 if let Some(compose_file) = &self.env.compose_file {
60 self.state.compose = Some(
61 Compose::new(compose_file, self.state.tee.clone(), self.env.timeout)
62 .up()
63 .await?,
64 );
65 }
66
67 if let Some(on_start) = self.events.on_start.take() {
69 self.state.start_data = Some(
70 on_start(EnvStart {
71 home: self.state.home.path().to_path_buf(),
72 tee: self.state.tee.clone(),
73 })
74 .await?,
75 );
76 tokio::time::sleep(Duration::from_secs(2)).await;
77 }
78
79 let runner = Runner::new(&self.env.mcp_host, self.env.timeout);
81 runner.wait_healthy().await?;
82 self.state.runner = Some(runner);
83
84 Ok(())
85 }
86
87 pub async fn test(&self, query: &str) -> TestResult {
89 let mut models = Vec::new();
90
91 for model in &self.env.ollama_models {
92 let start = std::time::Instant::now();
93 EngineReport::trace_start(query, model);
94
95 let ollama = OllamaClient::new(&self.env.ollama_host);
97 if self.env.ollama_exclusive {
98 if let Err(e) = ollama.unload_except(model).await {
99 tracing::warn!("Failed to unload models: {}", e);
100 }
101 }
102
103 let mcp = McpClient::new(&self.env.mcp_host).await.unwrap();
105 let tools = mcp.list_tools().await.unwrap();
106 let tool_values: Vec<Value> = tools
107 .iter()
108 .map(|t| {
109 json!({
110 "type": "function",
111 "function": {
112 "name": t.name,
113 "description": t.description,
114 "parameters": t.input_schema
115 }
116 })
117 })
118 .collect();
119
120 let dialog = Dialog::new(ollama, mcp, model.clone(), tool_values, self.env.timeout);
121
122 let duration_ms = start.elapsed().as_millis() as u64;
124 let model_result = match dialog.run(query).await {
125 Ok(r) => {
126 EngineReport::trace_ok(
127 query,
128 model,
129 &r.tool,
130 &r.args,
131 &r.model_response,
132 &r.tool_response,
133 duration_ms,
134 );
135 TestModelResult {
136 model: model.clone(),
137 tool: Some(r.tool),
138 model_response: Some(r.model_response),
139 tool_response: Some(r.tool_response),
140 code: None,
141 }
142 }
143 Err(e) => {
144 let (tool, args, code) = match &e {
145 TestError::ToolCall(r) => (r.tool.as_deref(), r.args.as_deref(), r.code),
146 _ => (None, None, -1),
147 };
148 EngineReport::trace_fail(
149 query,
150 model,
151 tool,
152 args,
153 &e.to_string(),
154 code,
155 duration_ms,
156 );
157 TestModelResult {
158 model: model.clone(),
159 tool: tool.map(String::from),
160 model_response: None,
161 tool_response: None,
162 code: Some(code),
163 }
164 }
165 };
166 models.push(model_result);
167 }
168
169 let success = models.iter().all(|m| m.tool.is_some() && m.code.is_none());
171 TestResult { success, models }
172 }
173
174 pub fn shutdown(&mut self) {
177 let report =
179 EngineReport::from_log(&self.state.tee.path().to_string_lossy(), &self.env.mcp_host);
180 if let Some(on_stop) = self.events.on_stop.take() {
182 on_stop(EnvStop {
183 home: self.state.home.path().to_path_buf(),
184 log_file: self.state.tee.path().to_path_buf(),
185 duration: self.state.start_time.elapsed(),
186 data: self.state.start_data.take().unwrap_or(None),
187 report,
188 });
189 }
190 if let Some(runner) = &self.state.runner {
192 tracing::debug!("Checking if MCP server stopped...");
193 match runner.wait_dead() {
194 Ok(()) => tracing::debug!("MCP server stopped"),
195 Err(_) => tracing::warn!("MCP server may still be running"),
196 }
197 }
198 if let Some(mut compose) = self.state.compose.take() {
200 compose.down();
201 }
202 let _ = fs::remove_dir_all(self.state.home.path());
204 }
205}
206
207impl Drop for EngineTests {
210 fn drop(&mut self) {
211 self.shutdown();
212 }
213}