tooltest_core/validation/
mod.rs1use std::collections::HashMap;
4use std::env;
5use std::fmt;
6use std::sync::Arc;
7
8use crate::generator::invocation_strategy;
9use crate::{RunConfig, RunFailure, SessionDriver, SessionError, TraceEntry};
10use proptest::strategy::{Strategy, ValueTree};
11use proptest::test_runner::TestRunner;
12use rmcp::model::Tool;
13
14mod listing;
15mod validators;
16
17pub use listing::{list_tools_http, list_tools_stdio, list_tools_with_session, ListToolsError};
18
19#[cfg(test)]
20#[path = "../../tests/internal/validation_unit_tests.rs"]
21mod tests;
22
23#[cfg(test)]
24use crate::SchemaError;
25#[cfg(test)]
26use crate::{HttpConfig, SchemaConfig, StdioConfig};
27#[cfg(test)]
28use listing::list_tools_with_connector;
29#[cfg(test)]
30use validators::{apply_validators, default_validator, output_schema_validator};
31
32const DEFAULT_CASES_PER_TOOL: usize = 50;
33const CASES_PER_TOOL_ENV: &str = "TOOLTEST_CASES_PER_TOOL";
34
35#[derive(Clone, Debug)]
37pub enum ToolValidationDecision {
38 Accept,
40 Reject(RunFailure),
42 Defer,
44}
45
46pub type ToolValidationFn = Arc<dyn Fn(&Tool, &TraceEntry) -> ToolValidationDecision + Send + Sync>;
48
49#[derive(Clone)]
51pub struct ToolValidationConfig {
52 pub run: RunConfig,
54 pub cases_per_tool: usize,
56 pub validators: Vec<ToolValidationFn>,
58}
59
60impl ToolValidationConfig {
61 pub fn new() -> Self {
63 Self {
64 run: RunConfig::new(),
65 cases_per_tool: default_cases_per_tool(),
66 validators: validators::default_validators(),
67 }
68 }
69
70 pub fn with_cases_per_tool(mut self, cases_per_tool: usize) -> Self {
72 self.cases_per_tool = cases_per_tool.max(1);
73 self
74 }
75
76 pub fn with_run_config(mut self, run: RunConfig) -> Self {
78 self.run = run;
79 self
80 }
81
82 pub fn with_validator(mut self, validator: ToolValidationFn) -> Self {
84 self.validators.insert(0, validator);
85 self
86 }
87}
88
89impl Default for ToolValidationConfig {
90 fn default() -> Self {
91 Self::new()
92 }
93}
94
95#[derive(Clone, Debug)]
97pub struct BulkToolValidationSummary {
98 pub tools: Vec<String>,
100 pub cases_per_tool: usize,
102}
103
104#[derive(Clone, Debug)]
106pub struct ToolValidationFailure {
107 pub tool: String,
109 pub failure: RunFailure,
111 pub trace: Vec<TraceEntry>,
113}
114
115#[derive(Debug)]
117pub enum ToolValidationError {
118 Session(SessionError),
120 NoToolsAvailable,
122 MissingTools { tools: Vec<String> },
124 Generation { tool: String, reason: String },
126 ValidationFailed(ToolValidationFailure),
128}
129
130impl fmt::Display for ToolValidationError {
131 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
132 match self {
133 ToolValidationError::Session(error) => write!(f, "session error: {error:?}"),
134 ToolValidationError::NoToolsAvailable => write!(f, "no tools available for validation"),
135 ToolValidationError::MissingTools { tools } => {
136 write!(f, "missing tools: {}", tools.join(", "))
137 }
138 ToolValidationError::Generation { tool, reason } => {
139 write!(f, "failed to generate invocation for '{tool}': {reason}")
140 }
141 ToolValidationError::ValidationFailed(failure) => write!(
142 f,
143 "tool '{}' failed validation: {}",
144 failure.tool, failure.failure.reason
145 ),
146 }
147 }
148}
149
150impl std::error::Error for ToolValidationError {}
151
152impl From<SessionError> for ToolValidationError {
153 fn from(error: SessionError) -> Self {
154 ToolValidationError::Session(error)
155 }
156}
157
158pub async fn validate_tools(
160 session: &SessionDriver,
161 config: &ToolValidationConfig,
162 tool_names: Option<&[String]>,
163) -> Result<BulkToolValidationSummary, ToolValidationError> {
164 let tools = session.list_tools().await?;
165 if tools.is_empty() {
166 return Err(ToolValidationError::NoToolsAvailable);
167 }
168
169 let tools = select_tools(tools, tool_names)?;
170 for tool in &tools {
171 run_tool_cases(session, config, tool).await?;
172 }
173
174 Ok(BulkToolValidationSummary {
175 tools: tools.iter().map(|tool| tool.name.to_string()).collect(),
176 cases_per_tool: config.cases_per_tool.max(1),
177 })
178}
179
180pub async fn validate_tool(
182 session: &SessionDriver,
183 config: &ToolValidationConfig,
184 tool: &Tool,
185) -> Result<(), ToolValidationError> {
186 run_tool_cases(session, config, tool).await
187}
188
189#[allow(clippy::result_large_err)]
190fn select_tools(
191 tools: Vec<Tool>,
192 tool_names: Option<&[String]>,
193) -> Result<Vec<Tool>, ToolValidationError> {
194 let Some(tool_names) = tool_names else {
195 return Ok(tools);
196 };
197
198 let tool_map: HashMap<String, Tool> = tools
199 .into_iter()
200 .map(|tool| (tool.name.to_string(), tool))
201 .collect();
202
203 let mut missing = Vec::new();
204 let mut selected = Vec::new();
205 for name in tool_names {
206 if let Some(tool) = tool_map.get(name) {
207 selected.push(tool.clone());
208 } else {
209 missing.push(name.clone());
210 }
211 }
212
213 if !missing.is_empty() {
214 return Err(ToolValidationError::MissingTools { tools: missing });
215 }
216
217 Ok(selected)
218}
219
220async fn run_tool_cases(
221 session: &SessionDriver,
222 config: &ToolValidationConfig,
223 tool: &Tool,
224) -> Result<(), ToolValidationError> {
225 let strategy = invocation_strategy(std::slice::from_ref(tool), config.run.predicate.as_ref())
226 .map_err(|error| ToolValidationError::Generation {
227 tool: tool.name.to_string(),
228 reason: error.to_string(),
229 })?;
230
231 let cases = config.cases_per_tool.max(1);
232 let mut runner = TestRunner::default();
233
234 for _ in 0..cases {
235 let tree =
236 strategy
237 .new_tree(&mut runner)
238 .map_err(|reason| ToolValidationError::Generation {
239 tool: tool.name.to_string(),
240 reason: reason.to_string(),
241 })?;
242
243 if run_invocation(session, config, tool, tree.current())
244 .await?
245 .is_some()
246 {
247 let minimized = shrink_failure(session, config, tool, tree).await?;
248 return Err(ToolValidationError::ValidationFailed(minimized));
249 }
250 }
251
252 Ok(())
253}
254
255async fn run_invocation(
256 session: &SessionDriver,
257 config: &ToolValidationConfig,
258 tool: &Tool,
259 invocation: crate::ToolInvocation,
260) -> Result<Option<ToolValidationFailure>, ToolValidationError> {
261 let trace = session.send_tool_call(invocation).await?;
262 if let Err(failure) = validators::apply_validators(config, tool, &trace) {
263 return Ok(Some(ToolValidationFailure {
264 tool: tool.name.to_string(),
265 failure,
266 trace: vec![trace],
267 }));
268 }
269 Ok(None)
270}
271
272async fn shrink_failure<T>(
273 session: &SessionDriver,
274 config: &ToolValidationConfig,
275 tool: &Tool,
276 mut tree: T,
277) -> Result<ToolValidationFailure, ToolValidationError>
278where
279 T: ValueTree<Value = crate::ToolInvocation>,
280{
281 let Some(mut best) = run_invocation(session, config, tool, tree.current()).await? else {
282 return Err(ToolValidationError::Generation {
283 tool: tool.name.to_string(),
284 reason: "expected failing case to shrink".to_string(),
285 });
286 };
287
288 loop {
289 if !tree.simplify() {
290 break;
291 }
292
293 match run_invocation(session, config, tool, tree.current()).await? {
294 Some(failure) => {
295 best = failure;
296 continue;
297 }
298 None => {
299 let mut restored = false;
300 while tree.complicate() {
301 if let Some(failure) =
302 run_invocation(session, config, tool, tree.current()).await?
303 {
304 best = failure;
305 restored = true;
306 break;
307 }
308 }
309 if !restored {
310 break;
311 }
312 }
313 }
314 }
315
316 Ok(best)
317}
318
319fn default_cases_per_tool() -> usize {
320 match env::var(CASES_PER_TOOL_ENV) {
321 Ok(value) => value
322 .parse::<usize>()
323 .ok()
324 .filter(|v| *v > 0)
325 .unwrap_or(DEFAULT_CASES_PER_TOOL),
326 Err(_) => DEFAULT_CASES_PER_TOOL,
327 }
328}