tooltest_core/validation/
mod.rs

1//! Helpers for enumerating tools and validating tool behavior.
2
3use std::collections::HashMap;
4use std::env;
5use std::fmt;
6use std::sync::Arc;
7
8use crate::generator::invocation_strategy;
9use crate::{RunConfig, RunFailure, SessionDriver, SessionError, TraceEntry};
10use proptest::strategy::{Strategy, ValueTree};
11use proptest::test_runner::TestRunner;
12use rmcp::model::Tool;
13
14mod listing;
15mod validators;
16
17pub use listing::{list_tools_http, list_tools_stdio, list_tools_with_session, ListToolsError};
18
19#[cfg(test)]
20#[path = "../../tests/internal/validation_unit_tests.rs"]
21mod tests;
22
23#[cfg(test)]
24use crate::SchemaError;
25#[cfg(test)]
26use crate::{HttpConfig, SchemaConfig, StdioConfig};
27#[cfg(test)]
28use listing::list_tools_with_connector;
29#[cfg(test)]
30use validators::{apply_validators, default_validator, output_schema_validator};
31
32const DEFAULT_CASES_PER_TOOL: usize = 50;
33const CASES_PER_TOOL_ENV: &str = "TOOLTEST_CASES_PER_TOOL";
34
35/// Middleware decision returned by a tool validator.
36#[derive(Clone, Debug)]
37pub enum ToolValidationDecision {
38    /// Accept the tool response and stop the validation chain.
39    Accept,
40    /// Reject the tool response with a failure.
41    Reject(RunFailure),
42    /// Defer to the next validator in the chain.
43    Defer,
44}
45
46/// Callable used to validate a tool response.
47pub type ToolValidationFn = Arc<dyn Fn(&Tool, &TraceEntry) -> ToolValidationDecision + Send + Sync>;
48
49/// Configuration for bulk tool validation.
50#[derive(Clone)]
51pub struct ToolValidationConfig {
52    /// Run-level configuration and predicates.
53    pub run: RunConfig,
54    /// Number of cases to exercise per tool.
55    pub cases_per_tool: usize,
56    /// Validators invoked after each tool call.
57    pub validators: Vec<ToolValidationFn>,
58}
59
60impl ToolValidationConfig {
61    /// Creates a validation configuration with defaults.
62    pub fn new() -> Self {
63        Self {
64            run: RunConfig::new(),
65            cases_per_tool: default_cases_per_tool(),
66            validators: validators::default_validators(),
67        }
68    }
69
70    /// Sets the per-tool case count.
71    pub fn with_cases_per_tool(mut self, cases_per_tool: usize) -> Self {
72        self.cases_per_tool = cases_per_tool.max(1);
73        self
74    }
75
76    /// Sets the run configuration used for validation.
77    pub fn with_run_config(mut self, run: RunConfig) -> Self {
78        self.run = run;
79        self
80    }
81
82    /// Adds a response validator ahead of the defaults.
83    pub fn with_validator(mut self, validator: ToolValidationFn) -> Self {
84        self.validators.insert(0, validator);
85        self
86    }
87}
88
89impl Default for ToolValidationConfig {
90    fn default() -> Self {
91        Self::new()
92    }
93}
94
95/// Summary of a bulk validation run.
96#[derive(Clone, Debug)]
97pub struct BulkToolValidationSummary {
98    /// Tool names that were validated.
99    pub tools: Vec<String>,
100    /// Number of cases exercised per tool.
101    pub cases_per_tool: usize,
102}
103
104/// Failure details for a tool validation run.
105#[derive(Clone, Debug)]
106pub struct ToolValidationFailure {
107    /// The tool that failed validation.
108    pub tool: String,
109    /// Failure reason.
110    pub failure: RunFailure,
111    /// Trace entries for the minimized failing case.
112    pub trace: Vec<TraceEntry>,
113}
114
115/// Errors emitted while validating tools.
116#[derive(Debug)]
117pub enum ToolValidationError {
118    /// Failed to communicate with the MCP endpoint.
119    Session(SessionError),
120    /// No tools are available for validation.
121    NoToolsAvailable,
122    /// Requested tool names were not found.
123    MissingTools { tools: Vec<String> },
124    /// Tool invocation generation failed.
125    Generation { tool: String, reason: String },
126    /// A tool validation failed.
127    ValidationFailed(ToolValidationFailure),
128}
129
130impl fmt::Display for ToolValidationError {
131    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
132        match self {
133            ToolValidationError::Session(error) => write!(f, "session error: {error:?}"),
134            ToolValidationError::NoToolsAvailable => write!(f, "no tools available for validation"),
135            ToolValidationError::MissingTools { tools } => {
136                write!(f, "missing tools: {}", tools.join(", "))
137            }
138            ToolValidationError::Generation { tool, reason } => {
139                write!(f, "failed to generate invocation for '{tool}': {reason}")
140            }
141            ToolValidationError::ValidationFailed(failure) => write!(
142                f,
143                "tool '{}' failed validation: {}",
144                failure.tool, failure.failure.reason
145            ),
146        }
147    }
148}
149
150impl std::error::Error for ToolValidationError {}
151
152impl From<SessionError> for ToolValidationError {
153    fn from(error: SessionError) -> Self {
154        ToolValidationError::Session(error)
155    }
156}
157
158/// Validates tools by name, or all tools when no name list is provided.
159pub async fn validate_tools(
160    session: &SessionDriver,
161    config: &ToolValidationConfig,
162    tool_names: Option<&[String]>,
163) -> Result<BulkToolValidationSummary, ToolValidationError> {
164    let tools = session.list_tools().await?;
165    if tools.is_empty() {
166        return Err(ToolValidationError::NoToolsAvailable);
167    }
168
169    let tools = select_tools(tools, tool_names)?;
170    for tool in &tools {
171        run_tool_cases(session, config, tool).await?;
172    }
173
174    Ok(BulkToolValidationSummary {
175        tools: tools.iter().map(|tool| tool.name.to_string()).collect(),
176        cases_per_tool: config.cases_per_tool.max(1),
177    })
178}
179
180/// Validates a single tool definition.
181pub async fn validate_tool(
182    session: &SessionDriver,
183    config: &ToolValidationConfig,
184    tool: &Tool,
185) -> Result<(), ToolValidationError> {
186    run_tool_cases(session, config, tool).await
187}
188
189#[allow(clippy::result_large_err)]
190fn select_tools(
191    tools: Vec<Tool>,
192    tool_names: Option<&[String]>,
193) -> Result<Vec<Tool>, ToolValidationError> {
194    let Some(tool_names) = tool_names else {
195        return Ok(tools);
196    };
197
198    let tool_map: HashMap<String, Tool> = tools
199        .into_iter()
200        .map(|tool| (tool.name.to_string(), tool))
201        .collect();
202
203    let mut missing = Vec::new();
204    let mut selected = Vec::new();
205    for name in tool_names {
206        if let Some(tool) = tool_map.get(name) {
207            selected.push(tool.clone());
208        } else {
209            missing.push(name.clone());
210        }
211    }
212
213    if !missing.is_empty() {
214        return Err(ToolValidationError::MissingTools { tools: missing });
215    }
216
217    Ok(selected)
218}
219
220async fn run_tool_cases(
221    session: &SessionDriver,
222    config: &ToolValidationConfig,
223    tool: &Tool,
224) -> Result<(), ToolValidationError> {
225    let strategy = invocation_strategy(std::slice::from_ref(tool), config.run.predicate.as_ref())
226        .map_err(|error| ToolValidationError::Generation {
227        tool: tool.name.to_string(),
228        reason: error.to_string(),
229    })?;
230
231    let cases = config.cases_per_tool.max(1);
232    let mut runner = TestRunner::default();
233
234    for _ in 0..cases {
235        let tree =
236            strategy
237                .new_tree(&mut runner)
238                .map_err(|reason| ToolValidationError::Generation {
239                    tool: tool.name.to_string(),
240                    reason: reason.to_string(),
241                })?;
242
243        if run_invocation(session, config, tool, tree.current())
244            .await?
245            .is_some()
246        {
247            let minimized = shrink_failure(session, config, tool, tree).await?;
248            return Err(ToolValidationError::ValidationFailed(minimized));
249        }
250    }
251
252    Ok(())
253}
254
255async fn run_invocation(
256    session: &SessionDriver,
257    config: &ToolValidationConfig,
258    tool: &Tool,
259    invocation: crate::ToolInvocation,
260) -> Result<Option<ToolValidationFailure>, ToolValidationError> {
261    let trace = session.send_tool_call(invocation).await?;
262    if let Err(failure) = validators::apply_validators(config, tool, &trace) {
263        return Ok(Some(ToolValidationFailure {
264            tool: tool.name.to_string(),
265            failure,
266            trace: vec![trace],
267        }));
268    }
269    Ok(None)
270}
271
272async fn shrink_failure<T>(
273    session: &SessionDriver,
274    config: &ToolValidationConfig,
275    tool: &Tool,
276    mut tree: T,
277) -> Result<ToolValidationFailure, ToolValidationError>
278where
279    T: ValueTree<Value = crate::ToolInvocation>,
280{
281    let Some(mut best) = run_invocation(session, config, tool, tree.current()).await? else {
282        return Err(ToolValidationError::Generation {
283            tool: tool.name.to_string(),
284            reason: "expected failing case to shrink".to_string(),
285        });
286    };
287
288    loop {
289        if !tree.simplify() {
290            break;
291        }
292
293        match run_invocation(session, config, tool, tree.current()).await? {
294            Some(failure) => {
295                best = failure;
296                continue;
297            }
298            None => {
299                let mut restored = false;
300                while tree.complicate() {
301                    if let Some(failure) =
302                        run_invocation(session, config, tool, tree.current()).await?
303                    {
304                        best = failure;
305                        restored = true;
306                        break;
307                    }
308                }
309                if !restored {
310                    break;
311                }
312            }
313        }
314    }
315
316    Ok(best)
317}
318
319fn default_cases_per_tool() -> usize {
320    match env::var(CASES_PER_TOOL_ENV) {
321        Ok(value) => value
322            .parse::<usize>()
323            .ok()
324            .filter(|v| *v > 0)
325            .unwrap_or(DEFAULT_CASES_PER_TOOL),
326        Err(_) => DEFAULT_CASES_PER_TOOL,
327    }
328}
tooltest_core/validation/mod.rs

tooltest_core/validation/
mod.rs