tooltest_core/
lib.rs

1//! Public API types for configuring and reporting tooltest runs.
2#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
3#![cfg_attr(not(test), deny(clippy::expect_used, clippy::unwrap_used))]
4
5use std::collections::BTreeMap;
6use std::fmt;
7use std::sync::Arc;
8
9use reqwest::Url;
10use schemars::JsonSchema;
11use serde::{Deserialize, Serialize};
12use serde_json::{Number, Value as JsonValue};
13
14#[cfg(test)]
15use tooltest_test_support as _;
16
17mod coverage_filter;
18mod generator;
19mod input;
20mod lint;
21mod lint_config;
22mod lints;
23mod output_schema;
24mod runner;
25pub mod schema;
26mod schema_dialect;
27pub mod session;
28mod validation;
29
30pub use input::{
31    TooltestHttpTarget, TooltestInput, TooltestPreRunHook, TooltestRunConfig, TooltestStdioTarget,
32    TooltestTarget, TooltestTargetConfig, TooltestTargetHttp, TooltestTargetStdio,
33};
34pub use lint::{
35    LintConfigSource, LintDefinition, LintFinding, LintLevel, LintPhase, LintRule, LintSuite,
36    ListLintContext, ResponseLintContext, RunLintContext,
37};
38pub use lint_config::{default_tooltest_toml, load_lint_suite};
39pub use lints::{
40    CoverageLint, JsonSchemaDialectCompatLint, JsonSchemaKeywordCompatLint,
41    MaxStructuredContentBytesLint, MaxToolsLint, McpSchemaMinVersionLint,
42    MissingStructuredContentLint, NoCrashLint, OutputSchemaCompileLint,
43    DEFAULT_JSON_SCHEMA_DIALECT,
44};
45pub use rmcp::model::{
46    CallToolRequestParam, CallToolResult, ErrorCode, ErrorData, JsonObject, Tool,
47};
48pub use rmcp::service::{ClientInitializeError, ServiceError};
49pub use runner::{run_http, run_stdio, run_with_session, RunnerOptions};
50pub use schema::{
51    parse_call_tool_request, parse_call_tool_result, parse_list_tools, schema_version_label,
52    SchemaError,
53};
54pub use session::{SessionDriver, SessionError};
55pub use validation::{list_tools_http, list_tools_stdio, list_tools_with_session, ListToolsError};
56
57#[cfg(test)]
58#[path = "../tests/internal/mod.rs"]
59mod tests;
60
61/// Schema versions supported by the tooltest core.
62#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
63#[serde(rename_all = "kebab-case")]
64pub enum SchemaVersion {
65    /// MCP schema version 2025-11-25.
66    #[default]
67    V2025_11_25,
68    /// Any other explicitly configured schema version string.
69    Other(String),
70}
71
72/// Configuration for state-machine generator behavior.
73///
74/// State-machine generation is always used for sequence runs; there is no legacy mode.
75#[derive(Clone, Debug, Default, Serialize, Deserialize, JsonSchema)]
76#[serde(default, deny_unknown_fields)]
77pub struct StateMachineConfig {
78    /// Seed numbers added to the corpus before generation.
79    pub seed_numbers: Vec<Number>,
80    /// Seed strings added to the corpus before generation.
81    pub seed_strings: Vec<String>,
82    /// Mine whitespace-delimited text tokens into the corpus.
83    pub mine_text: bool,
84    /// Dump the final state-machine corpus after the run completes.
85    pub dump_corpus: bool,
86    /// Log newly mined corpus values after each tool response.
87    pub log_corpus_deltas: bool,
88    /// Allow schema-based generation when corpus lacks required values.
89    pub lenient_sourcing: bool,
90    /// Optional allowlist for coverage warnings and validation.
91    pub coverage_allowlist: Option<Vec<String>>,
92    /// Optional blocklist for coverage warnings and validation.
93    pub coverage_blocklist: Option<Vec<String>>,
94}
95
96impl StateMachineConfig {
97    /// Sets the seed numbers for the state-machine corpus.
98    pub fn with_seed_numbers(mut self, seed_numbers: Vec<Number>) -> Self {
99        self.seed_numbers = seed_numbers;
100        self
101    }
102
103    /// Sets the seed strings for the state-machine corpus.
104    pub fn with_seed_strings(mut self, seed_strings: Vec<String>) -> Self {
105        self.seed_strings = seed_strings;
106        self
107    }
108
109    /// Enables mining of whitespace-delimited text tokens into the corpus.
110    pub fn with_mine_text(mut self, mine_text: bool) -> Self {
111        self.mine_text = mine_text;
112        self
113    }
114
115    /// Enables dumping the final state-machine corpus after the run completes.
116    pub fn with_dump_corpus(mut self, dump_corpus: bool) -> Self {
117        self.dump_corpus = dump_corpus;
118        self
119    }
120
121    /// Enables logging newly mined corpus values after each tool response.
122    pub fn with_log_corpus_deltas(mut self, log_corpus_deltas: bool) -> Self {
123        self.log_corpus_deltas = log_corpus_deltas;
124        self
125    }
126
127    /// Enables schema-based generation when corpus lacks required values.
128    pub fn with_lenient_sourcing(mut self, lenient_sourcing: bool) -> Self {
129        self.lenient_sourcing = lenient_sourcing;
130        self
131    }
132
133    /// Sets the coverage allowlist for state-machine runs.
134    pub fn with_coverage_allowlist(mut self, coverage_allowlist: Vec<String>) -> Self {
135        self.coverage_allowlist = Some(coverage_allowlist);
136        self
137    }
138
139    /// Sets the coverage blocklist for state-machine runs.
140    pub fn with_coverage_blocklist(mut self, coverage_blocklist: Vec<String>) -> Self {
141        self.coverage_blocklist = Some(coverage_blocklist);
142        self
143    }
144}
145
146/// Configuration for MCP schema parsing and validation.
147#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
148pub struct SchemaConfig {
149    /// The selected MCP schema version.
150    pub version: SchemaVersion,
151}
152
153fn validate_stdio_command(command: &str) -> Result<(), String> {
154    if command.trim().is_empty() {
155        return Err("stdio command must not be empty".to_string());
156    }
157    Ok(())
158}
159
160fn validate_http_url(url: &str) -> Result<(), String> {
161    let parsed = Url::parse(url).map_err(|error| format!("invalid http url '{url}': {error}"))?;
162    match parsed.host_str() {
163        Some(host) if !host.is_empty() => {}
164        _ => {
165            return Err(format!("invalid http url '{url}': missing host"));
166        }
167    }
168    match parsed.scheme() {
169        "http" | "https" => {}
170        scheme => {
171            return Err(format!(
172                "invalid http url '{url}': scheme must be http or https (got '{scheme}')"
173            ));
174        }
175    }
176    Ok(())
177}
178
179fn deserialize_stdio_command<'de, D>(deserializer: D) -> Result<String, D::Error>
180where
181    D: serde::Deserializer<'de>,
182{
183    let command = String::deserialize(deserializer)?;
184    validate_stdio_command(&command).map_err(serde::de::Error::custom)?;
185    Ok(command)
186}
187
188fn deserialize_http_url<'de, D>(deserializer: D) -> Result<String, D::Error>
189where
190    D: serde::Deserializer<'de>,
191{
192    let url = String::deserialize(deserializer)?;
193    validate_http_url(&url).map_err(serde::de::Error::custom)?;
194    Ok(url)
195}
196
197/// Configuration for a stdio-based MCP endpoint.
198///
199/// Downstream crates cannot construct this type via a struct literal; use
200/// [`StdioConfig::new`] to ensure invariants are validated.
201///
202/// ```rust,compile_fail
203/// use tooltest_core::StdioConfig;
204///
205/// let _ = StdioConfig {
206///     command: "server".to_string(),
207///     args: Vec::new(),
208///     env: std::collections::BTreeMap::new(),
209///     cwd: None,
210/// };
211/// ```
212#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
213pub struct StdioConfig {
214    /// Command to execute for the MCP server.
215    #[serde(deserialize_with = "deserialize_stdio_command")]
216    command: String,
217    /// Command-line arguments passed to the MCP server.
218    #[serde(default)]
219    pub args: Vec<String>,
220    /// Environment variables to add or override for the MCP process.
221    #[serde(default)]
222    pub env: BTreeMap<String, String>,
223    /// Optional working directory for the MCP process.
224    #[serde(default)]
225    pub cwd: Option<String>,
226}
227
228impl StdioConfig {
229    /// Creates a stdio configuration with defaults for args, env, and cwd.
230    pub fn new(command: impl Into<String>) -> Result<Self, String> {
231        let command = command.into();
232        validate_stdio_command(&command)?;
233        Ok(Self {
234            command,
235            args: Vec::new(),
236            env: BTreeMap::new(),
237            cwd: None,
238        })
239    }
240
241    /// Returns the configured MCP server command.
242    pub fn command(&self) -> &str {
243        &self.command
244    }
245}
246
247/// Configuration for a pre-run hook command.
248#[derive(Clone, Debug, Eq, PartialEq)]
249pub struct PreRunHook {
250    /// Shell command string to execute before each run and validation.
251    pub command: String,
252    /// Environment variables to add or override for the hook process.
253    pub env: BTreeMap<String, String>,
254    /// Optional working directory for the hook process.
255    pub cwd: Option<String>,
256}
257
258impl PreRunHook {
259    /// Creates a pre-run hook with default env and cwd settings.
260    pub fn new(command: impl Into<String>) -> Self {
261        Self {
262            command: command.into(),
263            env: BTreeMap::new(),
264            cwd: None,
265        }
266    }
267
268    fn apply_stdio_context(&mut self, endpoint: &StdioConfig) {
269        self.env = endpoint.env.clone();
270        self.cwd = endpoint.cwd.clone();
271    }
272}
273
274/// Configuration for an HTTP-based MCP endpoint.
275///
276/// Downstream crates cannot construct this type via a struct literal; use
277/// [`HttpConfig::new`] to ensure invariants are validated.
278///
279/// ```rust,compile_fail
280/// use tooltest_core::HttpConfig;
281///
282/// let _ = HttpConfig {
283///     url: "http://localhost:3000/mcp".to_string(),
284///     auth_token: None,
285/// };
286/// ```
287#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
288pub struct HttpConfig {
289    /// The HTTP endpoint URL for MCP requests.
290    #[serde(deserialize_with = "deserialize_http_url")]
291    url: String,
292    /// Optional bearer token to attach to Authorization headers.
293    #[serde(default)]
294    pub auth_token: Option<String>,
295}
296
297impl HttpConfig {
298    /// Creates an HTTP configuration with optional auth token.
299    pub fn new(url: impl Into<String>) -> Result<Self, String> {
300        let url = url.into();
301        validate_http_url(&url)?;
302        Ok(Self {
303            url,
304            auth_token: None,
305        })
306    }
307
308    /// Returns the configured HTTP URL.
309    pub fn url(&self) -> &str {
310        &self.url
311    }
312}
313
314/// Predicate callback used to decide whether a tool invocation is eligible.
315pub type ToolPredicate = Arc<dyn Fn(&str, &JsonValue) -> bool + Send + Sync>;
316pub type ToolNamePredicate = Arc<dyn Fn(&str) -> bool + Send + Sync>;
317
318/// Declarative JSON assertion DSL container.
319///
320/// Runs also apply default assertions that fail on MCP protocol errors,
321/// schema-invalid responses, and (when configured) tool result error responses.
322///
323/// Example:
324/// ```
325/// use serde_json::json;
326/// use tooltest_core::{
327///     AssertionCheck, AssertionRule, AssertionSet, AssertionTarget, ResponseAssertion,
328/// };
329///
330/// let assertions = AssertionSet {
331///     rules: vec![AssertionRule::Response(ResponseAssertion {
332///         tool: Some("echo".to_string()),
333///         checks: vec![AssertionCheck {
334///             target: AssertionTarget::StructuredOutput,
335///             pointer: "/status".to_string(),
336///             expected: json!("ok"),
337///         }],
338///     })],
339/// };
340/// ```
341#[derive(Clone, Debug, Default, Serialize, Deserialize)]
342pub struct AssertionSet {
343    /// Assertion rules evaluated during or after a run.
344    pub rules: Vec<AssertionRule>,
345}
346
347/// A single assertion rule in the JSON DSL.
348#[derive(Clone, Debug, Serialize, Deserialize)]
349#[serde(tag = "scope", content = "rule", rename_all = "snake_case")]
350pub enum AssertionRule {
351    /// Assertions evaluated against each tool response.
352    Response(ResponseAssertion),
353    /// Assertions evaluated against the full run sequence.
354    Sequence(SequenceAssertion),
355}
356
357/// Assertions evaluated against a tool response.
358#[derive(Clone, Debug, Serialize, Deserialize)]
359pub struct ResponseAssertion {
360    /// Optional tool name filter; when set, only matching tools are checked.
361    pub tool: Option<String>,
362    /// Checks applied to the response payloads (input, output, or structured output).
363    pub checks: Vec<AssertionCheck>,
364}
365
366/// Assertions evaluated against the entire run sequence.
367#[derive(Clone, Debug, Serialize, Deserialize)]
368pub struct SequenceAssertion {
369    /// Checks applied to the sequence payload.
370    pub checks: Vec<AssertionCheck>,
371}
372
373/// A single JSON-pointer based check.
374///
375/// `pointer` uses RFC 6901 JSON Pointer syntax.
376#[derive(Clone, Debug, Serialize, Deserialize)]
377pub struct AssertionCheck {
378    /// The target payload to inspect.
379    pub target: AssertionTarget,
380    /// JSON Pointer string used to select the value to compare.
381    pub pointer: String,
382    /// Expected JSON value at the pointer location.
383    pub expected: JsonValue,
384}
385
386/// Payload targets that can be inspected by assertions.
387#[derive(Clone, Debug, Serialize, Deserialize)]
388#[serde(rename_all = "snake_case")]
389pub enum AssertionTarget {
390    /// The generated tool input object.
391    Input,
392    /// The raw tool output object.
393    Output,
394    /// The structured tool output object, when present or required by schema.
395    StructuredOutput,
396    /// The full run sequence payload.
397    Sequence,
398}
399
400/// Top-level configuration for executing a tooltest run.
401///
402/// Downstream crates cannot construct this type via a struct literal; use
403/// [`RunConfig::new`] and the builder methods to ensure invariants are
404/// validated.
405///
406/// ```rust,compile_fail
407/// use tooltest_core::RunConfig;
408///
409/// let _ = RunConfig {
410///     uncallable_limit: 0,
411///     ..RunConfig::new()
412/// };
413/// ```
414#[derive(Clone)]
415pub struct RunConfig {
416    /// MCP schema configuration.
417    pub schema: SchemaConfig,
418    /// Optional predicate to filter eligible tools.
419    pub predicate: Option<ToolPredicate>,
420    /// Optional predicate to filter eligible tools by name.
421    pub tool_filter: Option<ToolNamePredicate>,
422    /// Assertion rules to evaluate during the run.
423    pub assertions: AssertionSet,
424    /// Whether tool result error responses (`isError`) should fail the run.
425    pub in_band_error_forbidden: bool,
426    /// State-machine generator configuration.
427    pub state_machine: StateMachineConfig,
428    /// Optional pre-run hook to execute before validation and each case.
429    pub pre_run_hook: Option<PreRunHook>,
430    /// Emit full tool responses in traces instead of compact invocation-only entries.
431    pub full_trace: bool,
432    /// Include uncallable tool traces when coverage validation fails.
433    pub show_uncallable: bool,
434    /// Number of calls per tool to include in uncallable traces.
435    uncallable_limit: usize,
436    /// Optional trace sink for streaming per-case traces.
437    pub trace_sink: Option<Arc<dyn TraceSink>>,
438    /// Configured lint rules for the run.
439    pub lints: LintSuite,
440}
441
442impl RunConfig {
443    /// Creates a run configuration with defaults for schema and assertions.
444    ///
445    /// The state-machine generator is always used, and it is strict by default
446    /// (required values must come from the corpus unless lenient sourcing is enabled).
447    pub fn new() -> Self {
448        Self {
449            schema: SchemaConfig::default(),
450            predicate: None,
451            tool_filter: None,
452            assertions: AssertionSet::default(),
453            in_band_error_forbidden: false,
454            state_machine: StateMachineConfig::default(),
455            pre_run_hook: None,
456            full_trace: false,
457            show_uncallable: false,
458            uncallable_limit: 1,
459            trace_sink: None,
460            lints: LintSuite::default(),
461        }
462    }
463
464    /// Sets the schema configuration.
465    pub fn with_schema(mut self, schema: SchemaConfig) -> Self {
466        self.schema = schema;
467        self
468    }
469
470    /// Sets the tool predicate used for eligibility filtering.
471    pub fn with_predicate(mut self, predicate: ToolPredicate) -> Self {
472        self.predicate = Some(predicate);
473        self
474    }
475
476    /// Sets the tool name predicate used for eligibility filtering.
477    pub fn with_tool_filter(mut self, predicate: ToolNamePredicate) -> Self {
478        self.tool_filter = Some(predicate);
479        self
480    }
481
482    /// Sets the assertion rules for the run.
483    pub fn with_assertions(mut self, assertions: AssertionSet) -> Self {
484        self.assertions = assertions;
485        self
486    }
487
488    /// Sets whether tool result error responses (`isError`) should fail the run.
489    pub fn with_in_band_error_forbidden(mut self, forbidden: bool) -> Self {
490        self.in_band_error_forbidden = forbidden;
491        self
492    }
493
494    /// Sets the state-machine generator configuration.
495    pub fn with_state_machine(mut self, state_machine: StateMachineConfig) -> Self {
496        self.state_machine = state_machine;
497        self
498    }
499
500    /// Sets the pre-run hook for this run.
501    pub fn with_pre_run_hook(mut self, hook: PreRunHook) -> Self {
502        self.pre_run_hook = Some(hook);
503        self
504    }
505
506    /// Enables full trace output with tool responses.
507    pub fn with_full_trace(mut self, enabled: bool) -> Self {
508        self.full_trace = enabled;
509        self
510    }
511
512    /// Enables uncallable tool trace output for coverage validation failures.
513    pub fn with_show_uncallable(mut self, enabled: bool) -> Self {
514        self.show_uncallable = enabled;
515        self
516    }
517
518    /// Sets the call limit for uncallable tool traces.
519    pub fn with_uncallable_limit(mut self, limit: usize) -> Result<Self, String> {
520        if limit < 1 {
521            return Err("uncallable-limit must be at least 1".to_string());
522        }
523        self.uncallable_limit = limit;
524        Ok(self)
525    }
526
527    /// Returns the call limit for uncallable tool traces.
528    pub fn uncallable_limit(&self) -> usize {
529        self.uncallable_limit
530    }
531
532    /// Sets a trace sink that receives per-case traces.
533    pub fn with_trace_sink(mut self, sink: Arc<dyn TraceSink>) -> Self {
534        self.trace_sink = Some(sink);
535        self
536    }
537
538    /// Sets the configured lints for this run.
539    pub fn with_lints(mut self, lints: LintSuite) -> Self {
540        self.lints = lints;
541        self
542    }
543
544    pub(crate) fn apply_stdio_pre_run_context(&mut self, endpoint: &StdioConfig) {
545        if let Some(hook) = self.pre_run_hook.as_mut() {
546            hook.apply_stdio_context(endpoint);
547        }
548    }
549}
550
551impl Default for RunConfig {
552    fn default() -> Self {
553        Self::new()
554    }
555}
556
557impl fmt::Debug for RunConfig {
558    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
559        f.debug_struct("RunConfig")
560            .field("schema", &self.schema)
561            .field("predicate", &self.predicate.is_some())
562            .field("tool_filter", &self.tool_filter.is_some())
563            .field("assertions", &self.assertions)
564            .field("in_band_error_forbidden", &self.in_band_error_forbidden)
565            .field("state_machine", &self.state_machine)
566            .field("pre_run_hook", &self.pre_run_hook.is_some())
567            .field("show_uncallable", &self.show_uncallable)
568            .field("uncallable_limit", &self.uncallable_limit)
569            .field("trace_sink", &self.trace_sink.is_some())
570            .field("lints", &self.lints.len())
571            .field("lint_config_source", &self.lints.source())
572            .finish()
573    }
574}
575
576/// A generated tool invocation.
577pub type ToolInvocation = CallToolRequestParam;
578
579/// A trace entry capturing MCP interactions.
580#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
581#[serde(tag = "kind", rename_all = "snake_case")]
582pub enum TraceEntry {
583    /// A list-tools request was issued.
584    ListTools {
585        /// Optional failure detail when list-tools fails.
586        #[serde(skip_serializing_if = "Option::is_none")]
587        failure_reason: Option<String>,
588    },
589    /// A tool call, optionally annotated with a response on failure.
590    ToolCall {
591        /// The invocation that was sent.
592        invocation: ToolInvocation,
593        /// Optional response payload (omitted in compact traces).
594        #[serde(skip_serializing_if = "Option::is_none")]
595        response: Option<CallToolResult>,
596        /// Optional failure detail when a call fails.
597        #[serde(skip_serializing_if = "Option::is_none")]
598        failure_reason: Option<String>,
599    },
600}
601
602impl TraceEntry {
603    /// Creates a trace entry for a list-tools call.
604    pub fn list_tools() -> Self {
605        Self::ListTools {
606            failure_reason: None,
607        }
608    }
609
610    /// Creates a trace entry for a failed list-tools call.
611    pub fn list_tools_with_failure(reason: String) -> Self {
612        Self::ListTools {
613            failure_reason: Some(reason),
614        }
615    }
616
617    /// Creates a trace entry for a tool call without a response.
618    pub fn tool_call(invocation: ToolInvocation) -> Self {
619        Self::ToolCall {
620            invocation,
621            response: None,
622            failure_reason: None,
623        }
624    }
625
626    /// Creates a trace entry for a tool call with a response.
627    pub fn tool_call_with_response(invocation: ToolInvocation, response: CallToolResult) -> Self {
628        Self::ToolCall {
629            invocation,
630            response: Some(response),
631            failure_reason: None,
632        }
633    }
634
635    /// Returns the invocation and response when the entry is a tool call.
636    pub fn as_tool_call(&self) -> Option<(&ToolInvocation, Option<&CallToolResult>)> {
637        match self {
638            TraceEntry::ToolCall {
639                invocation,
640                response,
641                ..
642            } => Some((invocation, response.as_ref())),
643            TraceEntry::ListTools { .. } => None,
644        }
645    }
646}
647
648/// A minimized failing sequence from property-based testing.
649#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
650pub struct MinimizedSequence {
651    /// The minimized tool invocations that reproduce the failure.
652    pub invocations: Vec<ToolInvocation>,
653}
654
655/// Outcome of a tooltest run.
656#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
657#[serde(tag = "status", rename_all = "snake_case")]
658pub enum RunOutcome {
659    /// The run completed without assertion failures.
660    Success,
661    /// The run failed due to an error or assertion.
662    Failure(RunFailure),
663}
664
665/// Failure details for a tooltest run.
666#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
667pub struct RunFailure {
668    /// Short description of the failure.
669    pub reason: String,
670    /// Optional structured failure code.
671    #[serde(skip_serializing_if = "Option::is_none")]
672    pub code: Option<String>,
673    /// Optional structured failure details.
674    #[serde(skip_serializing_if = "Option::is_none")]
675    pub details: Option<JsonValue>,
676}
677
678impl RunFailure {
679    /// Creates a run failure with only a reason string.
680    pub fn new(reason: impl Into<String>) -> Self {
681        Self {
682            reason: reason.into(),
683            code: None,
684            details: None,
685        }
686    }
687}
688
689/// Warning emitted during a tooltest run.
690#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
691pub struct RunWarning {
692    /// Structured warning code.
693    pub code: RunWarningCode,
694    /// Human-readable warning message.
695    pub message: String,
696    /// Optional tool name associated with the warning.
697    #[serde(skip_serializing_if = "Option::is_none")]
698    pub tool: Option<String>,
699    /// Optional structured warning details.
700    #[serde(skip_serializing_if = "Option::is_none")]
701    pub details: Option<JsonValue>,
702}
703
704/// Receives per-case traces when enabled.
705pub trait TraceSink: Send + Sync {
706    /// Records a full trace for a single generated case.
707    fn record(&self, case_index: u64, trace: &[TraceEntry]);
708}
709
710/// Structured warning codes for tooltest runs.
711#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, JsonSchema)]
712#[serde(transparent)]
713#[schemars(transparent)]
714pub struct RunWarningCode(pub String);
715
716impl RunWarningCode {
717    #[deprecated(
718        note = "Use RunWarningCode::lint(\"missing_structured_content\"); this warning code is lint-only."
719    )]
720    pub const MISSING_STRUCTURED_CONTENT: &'static str = "missing_structured_content";
721
722    #[deprecated(
723        note = "Use RunWarningCode::lint(\"missing_structured_content\"); this warning code is lint-only."
724    )]
725    pub fn missing_structured_content() -> Self {
726        Self("missing_structured_content".to_string())
727    }
728
729    pub fn lint(id: impl Into<String>) -> Self {
730        Self(format!("lint.{}", id.into()))
731    }
732
733    pub fn as_str(&self) -> &str {
734        self.0.as_str()
735    }
736
737    pub fn lint_id(&self) -> Option<&str> {
738        self.0.strip_prefix("lint.")
739    }
740}
741
742/// Warning describing a coverage issue in a state-machine run.
743#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
744pub struct CoverageWarning {
745    /// Tool name that could not be called.
746    pub tool: String,
747    /// Reason the tool could not be called.
748    pub reason: CoverageWarningReason,
749}
750
751/// Structured reason codes for coverage warnings.
752#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, JsonSchema)]
753#[serde(rename_all = "snake_case")]
754pub enum CoverageWarningReason {
755    MissingString,
756    MissingInteger,
757    MissingNumber,
758    MissingRequiredValue,
759}
760
761/// Recorded call details for tools with zero successes.
762#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
763pub struct UncallableToolCall {
764    /// Tool invocation input.
765    pub input: ToolInvocation,
766    /// Successful output payload, when present.
767    #[serde(skip_serializing_if = "Option::is_none")]
768    pub output: Option<CallToolResult>,
769    /// Error payload when the tool returned an error result.
770    #[serde(skip_serializing_if = "Option::is_none")]
771    pub error: Option<CallToolResult>,
772    /// RFC3339 timestamp when the call completed.
773    pub timestamp: String,
774}
775
776/// Coverage report for state-machine runs.
777#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
778pub struct CoverageReport {
779    /// Successful tool call counts.
780    pub counts: BTreeMap<String, u64>,
781    /// Unsuccessful tool call counts (isError = true).
782    pub failures: BTreeMap<String, u64>,
783    /// Coverage warnings for uncallable tools.
784    pub warnings: Vec<CoverageWarning>,
785    /// Last N calls for tools with zero successes.
786    pub uncallable_traces: BTreeMap<String, Vec<UncallableToolCall>>,
787}
788
789/// Snapshot of the state-machine corpus.
790#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
791pub struct CorpusReport {
792    /// Numbers observed in the corpus.
793    pub numbers: Vec<Number>,
794    /// Integers observed in the corpus.
795    pub integers: Vec<i64>,
796    /// Strings observed in the corpus.
797    pub strings: Vec<String>,
798}
799
800/// Coverage validation rules for state-machine runs.
801#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
802#[serde(tag = "rule", rename_all = "snake_case", deny_unknown_fields)]
803pub enum CoverageRule {
804    /// Require a minimum number of successful calls per tool.
805    MinCallsPerTool { min: u64 },
806    /// Require that all callable tools are called at least once.
807    NoUncalledTools,
808    /// Require a minimum percentage of callable tools to be called.
809    PercentCalled { min_percent: f64 },
810}
811
812impl CoverageRule {
813    /// Helper to enforce minimum calls per tool.
814    pub fn min_calls_per_tool(min: u64) -> Self {
815        Self::MinCallsPerTool { min }
816    }
817
818    /// Helper to enforce no uncalled tools.
819    pub fn no_uncalled_tools() -> Self {
820        Self::NoUncalledTools
821    }
822
823    /// Helper to enforce minimum percentage of tools called.
824    pub fn percent_called(min_percent: f64) -> Self {
825        Self::PercentCalled { min_percent }
826    }
827}
828
829/// Results of a tooltest run.
830#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
831pub struct RunResult {
832    /// Overall run outcome.
833    pub outcome: RunOutcome,
834    /// Trace of MCP calls (responses are only included on failures).
835    pub trace: Vec<TraceEntry>,
836    /// Minimized sequence for failures, when available.
837    #[serde(skip_serializing_if = "Option::is_none")]
838    pub minimized: Option<MinimizedSequence>,
839    /// Non-fatal warnings collected during the run.
840    pub warnings: Vec<RunWarning>,
841    /// Coverage report for state-machine runs, when enabled.
842    #[serde(skip_serializing_if = "Option::is_none")]
843    pub coverage: Option<CoverageReport>,
844    /// Corpus snapshot for state-machine runs, when enabled.
845    #[serde(skip_serializing_if = "Option::is_none")]
846    pub corpus: Option<CorpusReport>,
847}
tooltest_core/lib.rs

tooltest_core/
lib.rs