tooltest_core/
lib.rs

1//! Public API types for configuring and reporting tooltest runs.
2#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
3
4use std::collections::BTreeMap;
5use std::fmt;
6use std::sync::Arc;
7
8use serde::{Deserialize, Serialize};
9use serde_json::{Number, Value as JsonValue};
10
11mod generator;
12mod output_schema;
13mod runner;
14pub mod schema;
15pub mod session;
16mod validation;
17
18pub use rmcp::model::{
19    CallToolRequestParam, CallToolResult, ErrorCode, ErrorData, JsonObject, Tool,
20};
21pub use rmcp::service::{ClientInitializeError, ServiceError};
22pub use runner::{run_http, run_stdio, run_with_session, RunnerOptions};
23pub use schema::{
24    parse_call_tool_request, parse_call_tool_result, parse_list_tools, schema_version_label,
25    SchemaError,
26};
27pub use session::{SessionDriver, SessionError};
28pub use validation::{
29    list_tools_http, list_tools_stdio, list_tools_with_session, validate_tool, validate_tools,
30    BulkToolValidationSummary, ListToolsError, ToolValidationConfig, ToolValidationDecision,
31    ToolValidationError, ToolValidationFailure, ToolValidationFn,
32};
33
34#[cfg(test)]
35#[path = "../tests/internal/mod.rs"]
36mod tests;
37
38/// Schema versions supported by the tooltest core.
39#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
40#[serde(rename_all = "kebab-case")]
41pub enum SchemaVersion {
42    /// MCP schema version 2025-11-25.
43    #[default]
44    V2025_11_25,
45    /// Any other explicitly configured schema version string.
46    Other(String),
47}
48
49/// Configuration for state-machine generator behavior.
50///
51/// State-machine generation is always used for sequence runs; there is no legacy mode.
52#[derive(Clone, Debug, Default, Serialize, Deserialize)]
53pub struct StateMachineConfig {
54    /// Seed numbers added to the corpus before generation.
55    pub seed_numbers: Vec<Number>,
56    /// Seed strings added to the corpus before generation.
57    pub seed_strings: Vec<String>,
58    /// Mine whitespace-delimited text tokens into the corpus.
59    pub mine_text: bool,
60    /// Dump the final state-machine corpus after the run completes.
61    pub dump_corpus: bool,
62    /// Log newly mined corpus values after each tool response.
63    pub log_corpus_deltas: bool,
64    /// Allow schema-based generation when corpus lacks required values.
65    pub lenient_sourcing: bool,
66    /// Optional allowlist for coverage warnings and validation.
67    pub coverage_allowlist: Option<Vec<String>>,
68    /// Optional blocklist for coverage warnings and validation.
69    pub coverage_blocklist: Option<Vec<String>>,
70    /// Coverage validation rules applied after state-machine runs.
71    pub coverage_rules: Vec<CoverageRule>,
72}
73
74impl StateMachineConfig {
75    /// Sets the seed numbers for the state-machine corpus.
76    pub fn with_seed_numbers(mut self, seed_numbers: Vec<Number>) -> Self {
77        self.seed_numbers = seed_numbers;
78        self
79    }
80
81    /// Sets the seed strings for the state-machine corpus.
82    pub fn with_seed_strings(mut self, seed_strings: Vec<String>) -> Self {
83        self.seed_strings = seed_strings;
84        self
85    }
86
87    /// Enables mining of whitespace-delimited text tokens into the corpus.
88    pub fn with_mine_text(mut self, mine_text: bool) -> Self {
89        self.mine_text = mine_text;
90        self
91    }
92
93    /// Enables dumping the final state-machine corpus after the run completes.
94    pub fn with_dump_corpus(mut self, dump_corpus: bool) -> Self {
95        self.dump_corpus = dump_corpus;
96        self
97    }
98
99    /// Enables logging newly mined corpus values after each tool response.
100    pub fn with_log_corpus_deltas(mut self, log_corpus_deltas: bool) -> Self {
101        self.log_corpus_deltas = log_corpus_deltas;
102        self
103    }
104
105    /// Enables schema-based generation when corpus lacks required values.
106    pub fn with_lenient_sourcing(mut self, lenient_sourcing: bool) -> Self {
107        self.lenient_sourcing = lenient_sourcing;
108        self
109    }
110
111    /// Sets the coverage allowlist for state-machine runs.
112    pub fn with_coverage_allowlist(mut self, coverage_allowlist: Vec<String>) -> Self {
113        self.coverage_allowlist = Some(coverage_allowlist);
114        self
115    }
116
117    /// Sets the coverage blocklist for state-machine runs.
118    pub fn with_coverage_blocklist(mut self, coverage_blocklist: Vec<String>) -> Self {
119        self.coverage_blocklist = Some(coverage_blocklist);
120        self
121    }
122
123    /// Sets the coverage validation rules for state-machine runs.
124    pub fn with_coverage_rules(mut self, coverage_rules: Vec<CoverageRule>) -> Self {
125        self.coverage_rules = coverage_rules;
126        self
127    }
128}
129
130/// Configuration for MCP schema parsing and validation.
131#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
132pub struct SchemaConfig {
133    /// The selected MCP schema version.
134    pub version: SchemaVersion,
135}
136
137/// Configuration for a stdio-based MCP endpoint.
138#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
139pub struct StdioConfig {
140    /// Command to execute for the MCP server.
141    pub command: String,
142    /// Command-line arguments passed to the MCP server.
143    pub args: Vec<String>,
144    /// Environment variables to add or override for the MCP process.
145    pub env: BTreeMap<String, String>,
146    /// Optional working directory for the MCP process.
147    pub cwd: Option<String>,
148}
149
150impl StdioConfig {
151    /// Creates a stdio configuration with defaults for args, env, and cwd.
152    pub fn new(command: impl Into<String>) -> Self {
153        Self {
154            command: command.into(),
155            args: Vec::new(),
156            env: BTreeMap::new(),
157            cwd: None,
158        }
159    }
160}
161
162/// Configuration for a pre-run hook command.
163#[derive(Clone, Debug, Eq, PartialEq)]
164pub struct PreRunHook {
165    /// Shell command string to execute before each run and validation.
166    pub command: String,
167    /// Environment variables to add or override for the hook process.
168    pub env: BTreeMap<String, String>,
169    /// Optional working directory for the hook process.
170    pub cwd: Option<String>,
171}
172
173impl PreRunHook {
174    /// Creates a pre-run hook with default env and cwd settings.
175    pub fn new(command: impl Into<String>) -> Self {
176        Self {
177            command: command.into(),
178            env: BTreeMap::new(),
179            cwd: None,
180        }
181    }
182
183    fn apply_stdio_context(&mut self, endpoint: &StdioConfig) {
184        self.env = endpoint.env.clone();
185        self.cwd = endpoint.cwd.clone();
186    }
187}
188
189/// Configuration for an HTTP-based MCP endpoint.
190#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
191pub struct HttpConfig {
192    /// The HTTP endpoint URL for MCP requests.
193    pub url: String,
194    /// Optional bearer token to attach to Authorization headers.
195    pub auth_token: Option<String>,
196}
197
198/// Predicate callback used to decide whether a tool invocation is eligible.
199pub type ToolPredicate = Arc<dyn Fn(&str, &JsonValue) -> bool + Send + Sync>;
200pub type ToolNamePredicate = Arc<dyn Fn(&str) -> bool + Send + Sync>;
201
202/// Declarative JSON assertion DSL container.
203///
204/// Runs also apply default assertions that fail on tool error responses and
205/// validate structured output against declared output schemas.
206///
207/// Example:
208/// ```
209/// use serde_json::json;
210/// use tooltest_core::{
211///     AssertionCheck, AssertionRule, AssertionSet, AssertionTarget, ResponseAssertion,
212/// };
213///
214/// let assertions = AssertionSet {
215///     rules: vec![AssertionRule::Response(ResponseAssertion {
216///         tool: Some("echo".to_string()),
217///         checks: vec![AssertionCheck {
218///             target: AssertionTarget::StructuredOutput,
219///             pointer: "/status".to_string(),
220///             expected: json!("ok"),
221///         }],
222///     })],
223/// };
224/// ```
225#[derive(Clone, Debug, Default, Serialize, Deserialize)]
226pub struct AssertionSet {
227    /// Assertion rules evaluated during or after a run.
228    pub rules: Vec<AssertionRule>,
229}
230
231/// A single assertion rule in the JSON DSL.
232#[derive(Clone, Debug, Serialize, Deserialize)]
233#[serde(tag = "scope", content = "rule", rename_all = "snake_case")]
234pub enum AssertionRule {
235    /// Assertions evaluated against each tool response.
236    Response(ResponseAssertion),
237    /// Assertions evaluated against the full run sequence.
238    Sequence(SequenceAssertion),
239}
240
241/// Assertions evaluated against a tool response.
242#[derive(Clone, Debug, Serialize, Deserialize)]
243pub struct ResponseAssertion {
244    /// Optional tool name filter; when set, only matching tools are checked.
245    pub tool: Option<String>,
246    /// Checks applied to the response payloads (input, output, or structured output).
247    pub checks: Vec<AssertionCheck>,
248}
249
250/// Assertions evaluated against the entire run sequence.
251#[derive(Clone, Debug, Serialize, Deserialize)]
252pub struct SequenceAssertion {
253    /// Checks applied to the sequence payload.
254    pub checks: Vec<AssertionCheck>,
255}
256
257/// A single JSON-pointer based check.
258///
259/// `pointer` uses RFC 6901 JSON Pointer syntax.
260#[derive(Clone, Debug, Serialize, Deserialize)]
261pub struct AssertionCheck {
262    /// The target payload to inspect.
263    pub target: AssertionTarget,
264    /// JSON Pointer string used to select the value to compare.
265    pub pointer: String,
266    /// Expected JSON value at the pointer location.
267    pub expected: JsonValue,
268}
269
270/// Payload targets that can be inspected by assertions.
271#[derive(Clone, Debug, Serialize, Deserialize)]
272#[serde(rename_all = "snake_case")]
273pub enum AssertionTarget {
274    /// The generated tool input object.
275    Input,
276    /// The raw tool output object.
277    Output,
278    /// The structured tool output object, when present or required by schema.
279    StructuredOutput,
280    /// The full run sequence payload.
281    Sequence,
282}
283
284/// Top-level configuration for executing a tooltest run.
285#[derive(Clone)]
286pub struct RunConfig {
287    /// MCP schema configuration.
288    pub schema: SchemaConfig,
289    /// Optional predicate to filter eligible tools.
290    pub predicate: Option<ToolPredicate>,
291    /// Optional predicate to filter eligible tools by name.
292    pub tool_filter: Option<ToolNamePredicate>,
293    /// Assertion rules to evaluate during the run.
294    pub assertions: AssertionSet,
295    /// State-machine generator configuration.
296    pub state_machine: StateMachineConfig,
297    /// Optional pre-run hook to execute before validation and each case.
298    pub pre_run_hook: Option<PreRunHook>,
299}
300
301impl RunConfig {
302    /// Creates a run configuration with defaults for schema and assertions.
303    ///
304    /// The state-machine generator is always used, and it is strict by default
305    /// (required values must come from the corpus unless lenient sourcing is enabled).
306    pub fn new() -> Self {
307        Self {
308            schema: SchemaConfig::default(),
309            predicate: None,
310            tool_filter: None,
311            assertions: AssertionSet::default(),
312            state_machine: StateMachineConfig::default(),
313            pre_run_hook: None,
314        }
315    }
316
317    /// Sets the schema configuration.
318    pub fn with_schema(mut self, schema: SchemaConfig) -> Self {
319        self.schema = schema;
320        self
321    }
322
323    /// Sets the tool predicate used for eligibility filtering.
324    pub fn with_predicate(mut self, predicate: ToolPredicate) -> Self {
325        self.predicate = Some(predicate);
326        self
327    }
328
329    /// Sets the tool name predicate used for eligibility filtering.
330    pub fn with_tool_filter(mut self, predicate: ToolNamePredicate) -> Self {
331        self.tool_filter = Some(predicate);
332        self
333    }
334
335    /// Sets the assertion rules for the run.
336    pub fn with_assertions(mut self, assertions: AssertionSet) -> Self {
337        self.assertions = assertions;
338        self
339    }
340
341    /// Sets the state-machine generator configuration.
342    pub fn with_state_machine(mut self, state_machine: StateMachineConfig) -> Self {
343        self.state_machine = state_machine;
344        self
345    }
346
347    /// Sets the pre-run hook for this run.
348    pub fn with_pre_run_hook(mut self, hook: PreRunHook) -> Self {
349        self.pre_run_hook = Some(hook);
350        self
351    }
352
353    pub(crate) fn apply_stdio_pre_run_context(&mut self, endpoint: &StdioConfig) {
354        if let Some(hook) = self.pre_run_hook.as_mut() {
355            hook.apply_stdio_context(endpoint);
356        }
357    }
358}
359
360impl Default for RunConfig {
361    fn default() -> Self {
362        Self::new()
363    }
364}
365
366impl fmt::Debug for RunConfig {
367    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
368        f.debug_struct("RunConfig")
369            .field("schema", &self.schema)
370            .field("predicate", &self.predicate.is_some())
371            .field("tool_filter", &self.tool_filter.is_some())
372            .field("assertions", &self.assertions)
373            .field("state_machine", &self.state_machine)
374            .field("pre_run_hook", &self.pre_run_hook.is_some())
375            .finish()
376    }
377}
378
379/// A generated tool invocation.
380pub type ToolInvocation = CallToolRequestParam;
381
382/// A trace entry capturing MCP interactions.
383#[derive(Clone, Debug, Serialize, Deserialize)]
384#[serde(tag = "kind", rename_all = "snake_case")]
385pub enum TraceEntry {
386    /// A list-tools request was issued.
387    ListTools {
388        /// Optional failure detail when list-tools fails.
389        #[serde(skip_serializing_if = "Option::is_none")]
390        failure_reason: Option<String>,
391    },
392    /// A tool call, optionally annotated with a response on failure.
393    ToolCall {
394        /// The invocation that was sent.
395        invocation: ToolInvocation,
396        /// Optional response payload (omitted in compact traces).
397        #[serde(skip_serializing_if = "Option::is_none")]
398        response: Option<CallToolResult>,
399        /// Optional failure detail when a call fails.
400        #[serde(skip_serializing_if = "Option::is_none")]
401        failure_reason: Option<String>,
402    },
403}
404
405impl TraceEntry {
406    /// Creates a trace entry for a list-tools call.
407    pub fn list_tools() -> Self {
408        Self::ListTools {
409            failure_reason: None,
410        }
411    }
412
413    /// Creates a trace entry for a failed list-tools call.
414    pub fn list_tools_with_failure(reason: String) -> Self {
415        Self::ListTools {
416            failure_reason: Some(reason),
417        }
418    }
419
420    /// Creates a trace entry for a tool call without a response.
421    pub fn tool_call(invocation: ToolInvocation) -> Self {
422        Self::ToolCall {
423            invocation,
424            response: None,
425            failure_reason: None,
426        }
427    }
428
429    /// Creates a trace entry for a tool call with a response.
430    pub fn tool_call_with_response(invocation: ToolInvocation, response: CallToolResult) -> Self {
431        Self::ToolCall {
432            invocation,
433            response: Some(response),
434            failure_reason: None,
435        }
436    }
437
438    /// Returns the invocation and response when the entry is a tool call.
439    pub fn as_tool_call(&self) -> Option<(&ToolInvocation, Option<&CallToolResult>)> {
440        match self {
441            TraceEntry::ToolCall {
442                invocation,
443                response,
444                ..
445            } => Some((invocation, response.as_ref())),
446            TraceEntry::ListTools { .. } => None,
447        }
448    }
449}
450
451/// A minimized failing sequence from property-based testing.
452#[derive(Clone, Debug, Serialize, Deserialize)]
453pub struct MinimizedSequence {
454    /// The minimized tool invocations that reproduce the failure.
455    pub invocations: Vec<ToolInvocation>,
456}
457
458/// Outcome of a tooltest run.
459#[derive(Clone, Debug, Serialize, Deserialize)]
460#[serde(tag = "status", rename_all = "snake_case")]
461pub enum RunOutcome {
462    /// The run completed without assertion failures.
463    Success,
464    /// The run failed due to an error or assertion.
465    Failure(RunFailure),
466}
467
468/// Failure details for a tooltest run.
469#[derive(Clone, Debug, Serialize, Deserialize)]
470pub struct RunFailure {
471    /// Short description of the failure.
472    pub reason: String,
473    /// Optional structured failure code.
474    pub code: Option<String>,
475    /// Optional structured failure details.
476    pub details: Option<JsonValue>,
477}
478
479impl RunFailure {
480    /// Creates a run failure with only a reason string.
481    pub fn new(reason: impl Into<String>) -> Self {
482        Self {
483            reason: reason.into(),
484            code: None,
485            details: None,
486        }
487    }
488}
489
490/// Warning emitted during a tooltest run.
491#[derive(Clone, Debug, Serialize, Deserialize)]
492pub struct RunWarning {
493    /// Structured warning code.
494    pub code: RunWarningCode,
495    /// Human-readable warning message.
496    pub message: String,
497    /// Optional tool name associated with the warning.
498    #[serde(skip_serializing_if = "Option::is_none")]
499    pub tool: Option<String>,
500}
501
502/// Structured warning codes for tooltest runs.
503#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
504#[serde(rename_all = "snake_case")]
505pub enum RunWarningCode {
506    SchemaUnsupportedKeyword,
507}
508
509/// Warning describing a coverage issue in a state-machine run.
510#[derive(Clone, Debug, Serialize, Deserialize)]
511pub struct CoverageWarning {
512    /// Tool name that could not be called.
513    pub tool: String,
514    /// Reason the tool could not be called.
515    pub reason: CoverageWarningReason,
516}
517
518/// Structured reason codes for coverage warnings.
519#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
520#[serde(rename_all = "snake_case")]
521pub enum CoverageWarningReason {
522    MissingString,
523    MissingInteger,
524    MissingNumber,
525    MissingRequiredValue,
526}
527
528/// Coverage report for state-machine runs.
529#[derive(Clone, Debug, Serialize, Deserialize)]
530pub struct CoverageReport {
531    /// Successful tool call counts.
532    pub counts: BTreeMap<String, u64>,
533    /// Coverage warnings for uncallable tools.
534    pub warnings: Vec<CoverageWarning>,
535}
536
537/// Snapshot of the state-machine corpus.
538#[derive(Clone, Debug, Serialize, Deserialize)]
539pub struct CorpusReport {
540    /// Numbers observed in the corpus.
541    pub numbers: Vec<Number>,
542    /// Integers observed in the corpus.
543    pub integers: Vec<i64>,
544    /// Strings observed in the corpus.
545    pub strings: Vec<String>,
546}
547
548/// Coverage validation rules for state-machine runs.
549#[derive(Clone, Debug, Serialize, Deserialize)]
550#[serde(tag = "rule", rename_all = "snake_case")]
551pub enum CoverageRule {
552    /// Require a minimum number of successful calls per tool.
553    MinCallsPerTool { min: u64 },
554    /// Require that all callable tools are called at least once.
555    NoUncalledTools,
556    /// Require a minimum percentage of callable tools to be called.
557    PercentCalled { min_percent: f64 },
558}
559
560impl CoverageRule {
561    /// Helper to enforce minimum calls per tool.
562    pub fn min_calls_per_tool(min: u64) -> Self {
563        Self::MinCallsPerTool { min }
564    }
565
566    /// Helper to enforce no uncalled tools.
567    pub fn no_uncalled_tools() -> Self {
568        Self::NoUncalledTools
569    }
570
571    /// Helper to enforce minimum percentage of tools called.
572    pub fn percent_called(min_percent: f64) -> Self {
573        Self::PercentCalled { min_percent }
574    }
575}
576
577/// Results of a tooltest run.
578#[derive(Clone, Debug, Serialize, Deserialize)]
579pub struct RunResult {
580    /// Overall run outcome.
581    pub outcome: RunOutcome,
582    /// Trace of MCP calls (responses are only included on failures).
583    pub trace: Vec<TraceEntry>,
584    /// Minimized sequence for failures, when available.
585    pub minimized: Option<MinimizedSequence>,
586    /// Non-fatal warnings collected during the run.
587    pub warnings: Vec<RunWarning>,
588    /// Coverage report for state-machine runs, when enabled.
589    pub coverage: Option<CoverageReport>,
590    /// Corpus snapshot for state-machine runs, when enabled.
591    #[serde(skip_serializing_if = "Option::is_none")]
592    pub corpus: Option<CorpusReport>,
593}