tooltest_core/
lib.rs

1//! Public API types for configuring and reporting tooltest runs.
2#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
3
4use std::collections::BTreeMap;
5use std::fmt;
6use std::sync::Arc;
7
8use serde::{Deserialize, Serialize};
9use serde_json::{Number, Value as JsonValue};
10
11mod generator;
12mod output_schema;
13mod runner;
14pub mod schema;
15pub mod session;
16mod validation;
17
18pub use rmcp::model::{
19    CallToolRequestParam, CallToolResult, ErrorCode, ErrorData, JsonObject, Tool,
20};
21pub use rmcp::service::{ClientInitializeError, ServiceError};
22pub use runner::{run_http, run_stdio, run_with_session, RunnerOptions};
23pub use schema::{
24    parse_call_tool_request, parse_call_tool_result, parse_list_tools, schema_version_label,
25    SchemaError,
26};
27pub use session::{SessionDriver, SessionError};
28pub use validation::{
29    list_tools_http, list_tools_stdio, list_tools_with_session, validate_tool, validate_tools,
30    BulkToolValidationSummary, ListToolsError, ToolValidationConfig, ToolValidationDecision,
31    ToolValidationError, ToolValidationFailure, ToolValidationFn,
32};
33
34#[cfg(test)]
35#[path = "../tests/internal/mod.rs"]
36mod tests;
37
38/// Schema versions supported by the tooltest core.
39#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
40#[serde(rename_all = "kebab-case")]
41pub enum SchemaVersion {
42    /// MCP schema version 2025-11-25.
43    #[default]
44    V2025_11_25,
45    /// Any other explicitly configured schema version string.
46    Other(String),
47}
48
49/// Configuration for state-machine generator behavior.
50///
51/// State-machine generation is always used for sequence runs; there is no legacy mode.
52#[derive(Clone, Debug, Default, Serialize, Deserialize)]
53pub struct StateMachineConfig {
54    /// Seed numbers added to the corpus before generation.
55    pub seed_numbers: Vec<Number>,
56    /// Seed strings added to the corpus before generation.
57    pub seed_strings: Vec<String>,
58    /// Mine whitespace-delimited text tokens into the corpus.
59    pub mine_text: bool,
60    /// Dump the final state-machine corpus after the run completes.
61    pub dump_corpus: bool,
62    /// Log newly mined corpus values after each tool response.
63    pub log_corpus_deltas: bool,
64    /// Allow schema-based generation when corpus lacks required values.
65    pub lenient_sourcing: bool,
66    /// Optional allowlist for coverage warnings and validation.
67    pub coverage_allowlist: Option<Vec<String>>,
68    /// Optional blocklist for coverage warnings and validation.
69    pub coverage_blocklist: Option<Vec<String>>,
70    /// Coverage validation rules applied after state-machine runs.
71    pub coverage_rules: Vec<CoverageRule>,
72}
73
74impl StateMachineConfig {
75    /// Sets the seed numbers for the state-machine corpus.
76    pub fn with_seed_numbers(mut self, seed_numbers: Vec<Number>) -> Self {
77        self.seed_numbers = seed_numbers;
78        self
79    }
80
81    /// Sets the seed strings for the state-machine corpus.
82    pub fn with_seed_strings(mut self, seed_strings: Vec<String>) -> Self {
83        self.seed_strings = seed_strings;
84        self
85    }
86
87    /// Enables mining of whitespace-delimited text tokens into the corpus.
88    pub fn with_mine_text(mut self, mine_text: bool) -> Self {
89        self.mine_text = mine_text;
90        self
91    }
92
93    /// Enables dumping the final state-machine corpus after the run completes.
94    pub fn with_dump_corpus(mut self, dump_corpus: bool) -> Self {
95        self.dump_corpus = dump_corpus;
96        self
97    }
98
99    /// Enables logging newly mined corpus values after each tool response.
100    pub fn with_log_corpus_deltas(mut self, log_corpus_deltas: bool) -> Self {
101        self.log_corpus_deltas = log_corpus_deltas;
102        self
103    }
104
105    /// Enables schema-based generation when corpus lacks required values.
106    pub fn with_lenient_sourcing(mut self, lenient_sourcing: bool) -> Self {
107        self.lenient_sourcing = lenient_sourcing;
108        self
109    }
110
111    /// Sets the coverage allowlist for state-machine runs.
112    pub fn with_coverage_allowlist(mut self, coverage_allowlist: Vec<String>) -> Self {
113        self.coverage_allowlist = Some(coverage_allowlist);
114        self
115    }
116
117    /// Sets the coverage blocklist for state-machine runs.
118    pub fn with_coverage_blocklist(mut self, coverage_blocklist: Vec<String>) -> Self {
119        self.coverage_blocklist = Some(coverage_blocklist);
120        self
121    }
122
123    /// Sets the coverage validation rules for state-machine runs.
124    pub fn with_coverage_rules(mut self, coverage_rules: Vec<CoverageRule>) -> Self {
125        self.coverage_rules = coverage_rules;
126        self
127    }
128}
129
130/// Configuration for MCP schema parsing and validation.
131#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
132pub struct SchemaConfig {
133    /// The selected MCP schema version.
134    pub version: SchemaVersion,
135}
136
137/// Configuration for a stdio-based MCP endpoint.
138#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
139pub struct StdioConfig {
140    /// Command to execute for the MCP server.
141    pub command: String,
142    /// Command-line arguments passed to the MCP server.
143    pub args: Vec<String>,
144    /// Environment variables to add or override for the MCP process.
145    pub env: BTreeMap<String, String>,
146    /// Optional working directory for the MCP process.
147    pub cwd: Option<String>,
148}
149
150impl StdioConfig {
151    /// Creates a stdio configuration with defaults for args, env, and cwd.
152    pub fn new(command: impl Into<String>) -> Self {
153        Self {
154            command: command.into(),
155            args: Vec::new(),
156            env: BTreeMap::new(),
157            cwd: None,
158        }
159    }
160}
161
162/// Configuration for a pre-run hook command.
163#[derive(Clone, Debug, Eq, PartialEq)]
164pub struct PreRunHook {
165    /// Shell command string to execute before each run and validation.
166    pub command: String,
167    /// Environment variables to add or override for the hook process.
168    pub env: BTreeMap<String, String>,
169    /// Optional working directory for the hook process.
170    pub cwd: Option<String>,
171}
172
173impl PreRunHook {
174    /// Creates a pre-run hook with default env and cwd settings.
175    pub fn new(command: impl Into<String>) -> Self {
176        Self {
177            command: command.into(),
178            env: BTreeMap::new(),
179            cwd: None,
180        }
181    }
182
183    fn apply_stdio_context(&mut self, endpoint: &StdioConfig) {
184        self.env = endpoint.env.clone();
185        self.cwd = endpoint.cwd.clone();
186    }
187}
188
189/// Configuration for an HTTP-based MCP endpoint.
190#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
191pub struct HttpConfig {
192    /// The HTTP endpoint URL for MCP requests.
193    pub url: String,
194    /// Optional bearer token to attach to Authorization headers.
195    pub auth_token: Option<String>,
196}
197
198/// Predicate callback used to decide whether a tool invocation is eligible.
199pub type ToolPredicate = Arc<dyn Fn(&str, &JsonValue) -> bool + Send + Sync>;
200
201/// Declarative JSON assertion DSL container.
202///
203/// Runs also apply default assertions that fail on tool error responses and
204/// validate structured output against declared output schemas.
205///
206/// Example:
207/// ```
208/// use serde_json::json;
209/// use tooltest_core::{
210///     AssertionCheck, AssertionRule, AssertionSet, AssertionTarget, ResponseAssertion,
211/// };
212///
213/// let assertions = AssertionSet {
214///     rules: vec![AssertionRule::Response(ResponseAssertion {
215///         tool: Some("echo".to_string()),
216///         checks: vec![AssertionCheck {
217///             target: AssertionTarget::StructuredOutput,
218///             pointer: "/status".to_string(),
219///             expected: json!("ok"),
220///         }],
221///     })],
222/// };
223/// ```
224#[derive(Clone, Debug, Default, Serialize, Deserialize)]
225pub struct AssertionSet {
226    /// Assertion rules evaluated during or after a run.
227    pub rules: Vec<AssertionRule>,
228}
229
230/// A single assertion rule in the JSON DSL.
231#[derive(Clone, Debug, Serialize, Deserialize)]
232#[serde(tag = "scope", content = "rule", rename_all = "snake_case")]
233pub enum AssertionRule {
234    /// Assertions evaluated against each tool response.
235    Response(ResponseAssertion),
236    /// Assertions evaluated against the full run sequence.
237    Sequence(SequenceAssertion),
238}
239
240/// Assertions evaluated against a tool response.
241#[derive(Clone, Debug, Serialize, Deserialize)]
242pub struct ResponseAssertion {
243    /// Optional tool name filter; when set, only matching tools are checked.
244    pub tool: Option<String>,
245    /// Checks applied to the response payloads (input, output, or structured output).
246    pub checks: Vec<AssertionCheck>,
247}
248
249/// Assertions evaluated against the entire run sequence.
250#[derive(Clone, Debug, Serialize, Deserialize)]
251pub struct SequenceAssertion {
252    /// Checks applied to the sequence payload.
253    pub checks: Vec<AssertionCheck>,
254}
255
256/// A single JSON-pointer based check.
257///
258/// `pointer` uses RFC 6901 JSON Pointer syntax.
259#[derive(Clone, Debug, Serialize, Deserialize)]
260pub struct AssertionCheck {
261    /// The target payload to inspect.
262    pub target: AssertionTarget,
263    /// JSON Pointer string used to select the value to compare.
264    pub pointer: String,
265    /// Expected JSON value at the pointer location.
266    pub expected: JsonValue,
267}
268
269/// Payload targets that can be inspected by assertions.
270#[derive(Clone, Debug, Serialize, Deserialize)]
271#[serde(rename_all = "snake_case")]
272pub enum AssertionTarget {
273    /// The generated tool input object.
274    Input,
275    /// The raw tool output object.
276    Output,
277    /// The structured tool output object, when present or required by schema.
278    StructuredOutput,
279    /// The full run sequence payload.
280    Sequence,
281}
282
283/// Top-level configuration for executing a tooltest run.
284#[derive(Clone)]
285pub struct RunConfig {
286    /// MCP schema configuration.
287    pub schema: SchemaConfig,
288    /// Optional predicate to filter eligible tools.
289    pub predicate: Option<ToolPredicate>,
290    /// Assertion rules to evaluate during the run.
291    pub assertions: AssertionSet,
292    /// State-machine generator configuration.
293    pub state_machine: StateMachineConfig,
294    /// Optional pre-run hook to execute before validation and each case.
295    pub pre_run_hook: Option<PreRunHook>,
296}
297
298impl RunConfig {
299    /// Creates a run configuration with defaults for schema and assertions.
300    ///
301    /// The state-machine generator is always used, and it is strict by default
302    /// (required values must come from the corpus unless lenient sourcing is enabled).
303    pub fn new() -> Self {
304        Self {
305            schema: SchemaConfig::default(),
306            predicate: None,
307            assertions: AssertionSet::default(),
308            state_machine: StateMachineConfig::default(),
309            pre_run_hook: None,
310        }
311    }
312
313    /// Sets the schema configuration.
314    pub fn with_schema(mut self, schema: SchemaConfig) -> Self {
315        self.schema = schema;
316        self
317    }
318
319    /// Sets the tool predicate used for eligibility filtering.
320    pub fn with_predicate(mut self, predicate: ToolPredicate) -> Self {
321        self.predicate = Some(predicate);
322        self
323    }
324
325    /// Sets the assertion rules for the run.
326    pub fn with_assertions(mut self, assertions: AssertionSet) -> Self {
327        self.assertions = assertions;
328        self
329    }
330
331    /// Sets the state-machine generator configuration.
332    pub fn with_state_machine(mut self, state_machine: StateMachineConfig) -> Self {
333        self.state_machine = state_machine;
334        self
335    }
336
337    /// Sets the pre-run hook for this run.
338    pub fn with_pre_run_hook(mut self, hook: PreRunHook) -> Self {
339        self.pre_run_hook = Some(hook);
340        self
341    }
342
343    pub(crate) fn apply_stdio_pre_run_context(&mut self, endpoint: &StdioConfig) {
344        if let Some(hook) = self.pre_run_hook.as_mut() {
345            hook.apply_stdio_context(endpoint);
346        }
347    }
348}
349
350impl Default for RunConfig {
351    fn default() -> Self {
352        Self::new()
353    }
354}
355
356impl fmt::Debug for RunConfig {
357    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
358        f.debug_struct("RunConfig")
359            .field("schema", &self.schema)
360            .field("predicate", &self.predicate.is_some())
361            .field("assertions", &self.assertions)
362            .field("state_machine", &self.state_machine)
363            .field("pre_run_hook", &self.pre_run_hook.is_some())
364            .finish()
365    }
366}
367
368/// A generated tool invocation.
369pub type ToolInvocation = CallToolRequestParam;
370
371/// A trace entry capturing MCP interactions.
372#[derive(Clone, Debug, Serialize, Deserialize)]
373#[serde(tag = "kind", rename_all = "snake_case")]
374pub enum TraceEntry {
375    /// A list-tools request was issued.
376    ListTools {
377        /// Optional failure detail when list-tools fails.
378        #[serde(skip_serializing_if = "Option::is_none")]
379        failure_reason: Option<String>,
380    },
381    /// A tool call, optionally annotated with a response on failure.
382    ToolCall {
383        /// The invocation that was sent.
384        invocation: ToolInvocation,
385        /// Optional response payload (omitted in compact traces).
386        #[serde(skip_serializing_if = "Option::is_none")]
387        response: Option<CallToolResult>,
388        /// Optional failure detail when a call fails.
389        #[serde(skip_serializing_if = "Option::is_none")]
390        failure_reason: Option<String>,
391    },
392}
393
394impl TraceEntry {
395    /// Creates a trace entry for a list-tools call.
396    pub fn list_tools() -> Self {
397        Self::ListTools {
398            failure_reason: None,
399        }
400    }
401
402    /// Creates a trace entry for a failed list-tools call.
403    pub fn list_tools_with_failure(reason: String) -> Self {
404        Self::ListTools {
405            failure_reason: Some(reason),
406        }
407    }
408
409    /// Creates a trace entry for a tool call without a response.
410    pub fn tool_call(invocation: ToolInvocation) -> Self {
411        Self::ToolCall {
412            invocation,
413            response: None,
414            failure_reason: None,
415        }
416    }
417
418    /// Creates a trace entry for a tool call with a response.
419    pub fn tool_call_with_response(invocation: ToolInvocation, response: CallToolResult) -> Self {
420        Self::ToolCall {
421            invocation,
422            response: Some(response),
423            failure_reason: None,
424        }
425    }
426
427    /// Returns the invocation and response when the entry is a tool call.
428    pub fn as_tool_call(&self) -> Option<(&ToolInvocation, Option<&CallToolResult>)> {
429        match self {
430            TraceEntry::ToolCall {
431                invocation,
432                response,
433                ..
434            } => Some((invocation, response.as_ref())),
435            TraceEntry::ListTools { .. } => None,
436        }
437    }
438}
439
440/// A minimized failing sequence from property-based testing.
441#[derive(Clone, Debug, Serialize, Deserialize)]
442pub struct MinimizedSequence {
443    /// The minimized tool invocations that reproduce the failure.
444    pub invocations: Vec<ToolInvocation>,
445}
446
447/// Outcome of a tooltest run.
448#[derive(Clone, Debug, Serialize, Deserialize)]
449#[serde(tag = "status", rename_all = "snake_case")]
450pub enum RunOutcome {
451    /// The run completed without assertion failures.
452    Success,
453    /// The run failed due to an error or assertion.
454    Failure(RunFailure),
455}
456
457/// Failure details for a tooltest run.
458#[derive(Clone, Debug, Serialize, Deserialize)]
459pub struct RunFailure {
460    /// Short description of the failure.
461    pub reason: String,
462    /// Optional structured failure code.
463    pub code: Option<String>,
464    /// Optional structured failure details.
465    pub details: Option<JsonValue>,
466}
467
468impl RunFailure {
469    /// Creates a run failure with only a reason string.
470    pub fn new(reason: impl Into<String>) -> Self {
471        Self {
472            reason: reason.into(),
473            code: None,
474            details: None,
475        }
476    }
477}
478
479/// Warning emitted during a tooltest run.
480#[derive(Clone, Debug, Serialize, Deserialize)]
481pub struct RunWarning {
482    /// Structured warning code.
483    pub code: RunWarningCode,
484    /// Human-readable warning message.
485    pub message: String,
486    /// Optional tool name associated with the warning.
487    #[serde(skip_serializing_if = "Option::is_none")]
488    pub tool: Option<String>,
489}
490
491/// Structured warning codes for tooltest runs.
492#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
493#[serde(rename_all = "snake_case")]
494pub enum RunWarningCode {
495    SchemaUnsupportedKeyword,
496}
497
498/// Warning describing a coverage issue in a state-machine run.
499#[derive(Clone, Debug, Serialize, Deserialize)]
500pub struct CoverageWarning {
501    /// Tool name that could not be called.
502    pub tool: String,
503    /// Reason the tool could not be called.
504    pub reason: CoverageWarningReason,
505}
506
507/// Structured reason codes for coverage warnings.
508#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
509#[serde(rename_all = "snake_case")]
510pub enum CoverageWarningReason {
511    MissingString,
512    MissingInteger,
513    MissingNumber,
514    MissingRequiredValue,
515}
516
517/// Coverage report for state-machine runs.
518#[derive(Clone, Debug, Serialize, Deserialize)]
519pub struct CoverageReport {
520    /// Successful tool call counts.
521    pub counts: BTreeMap<String, u64>,
522    /// Coverage warnings for uncallable tools.
523    pub warnings: Vec<CoverageWarning>,
524}
525
526/// Snapshot of the state-machine corpus.
527#[derive(Clone, Debug, Serialize, Deserialize)]
528pub struct CorpusReport {
529    /// Numbers observed in the corpus.
530    pub numbers: Vec<Number>,
531    /// Integers observed in the corpus.
532    pub integers: Vec<i64>,
533    /// Strings observed in the corpus.
534    pub strings: Vec<String>,
535}
536
537/// Coverage validation rules for state-machine runs.
538#[derive(Clone, Debug, Serialize, Deserialize)]
539#[serde(tag = "rule", rename_all = "snake_case")]
540pub enum CoverageRule {
541    /// Require a minimum number of successful calls per tool.
542    MinCallsPerTool { min: u64 },
543    /// Require that all callable tools are called at least once.
544    NoUncalledTools,
545    /// Require a minimum percentage of callable tools to be called.
546    PercentCalled { min_percent: f64 },
547}
548
549impl CoverageRule {
550    /// Helper to enforce minimum calls per tool.
551    pub fn min_calls_per_tool(min: u64) -> Self {
552        Self::MinCallsPerTool { min }
553    }
554
555    /// Helper to enforce no uncalled tools.
556    pub fn no_uncalled_tools() -> Self {
557        Self::NoUncalledTools
558    }
559
560    /// Helper to enforce minimum percentage of tools called.
561    pub fn percent_called(min_percent: f64) -> Self {
562        Self::PercentCalled { min_percent }
563    }
564}
565
566/// Results of a tooltest run.
567#[derive(Clone, Debug, Serialize, Deserialize)]
568pub struct RunResult {
569    /// Overall run outcome.
570    pub outcome: RunOutcome,
571    /// Trace of MCP calls (responses are only included on failures).
572    pub trace: Vec<TraceEntry>,
573    /// Minimized sequence for failures, when available.
574    pub minimized: Option<MinimizedSequence>,
575    /// Non-fatal warnings collected during the run.
576    pub warnings: Vec<RunWarning>,
577    /// Coverage report for state-machine runs, when enabled.
578    pub coverage: Option<CoverageReport>,
579    /// Corpus snapshot for state-machine runs, when enabled.
580    #[serde(skip_serializing_if = "Option::is_none")]
581    pub corpus: Option<CorpusReport>,
582}