1#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
3#![cfg_attr(not(test), deny(clippy::expect_used, clippy::unwrap_used))]
4
5use std::collections::BTreeMap;
6use std::fmt;
7use std::sync::Arc;
8
9use reqwest::Url;
10use schemars::JsonSchema;
11use serde::{Deserialize, Serialize};
12use serde_json::{Number, Value as JsonValue};
13
14#[cfg(test)]
15use tooltest_test_support as _;
16
17mod coverage_filter;
18mod generator;
19mod input;
20mod lint;
21mod lint_config;
22mod lints;
23mod output_schema;
24mod runner;
25pub mod schema;
26mod schema_dialect;
27pub mod session;
28mod validation;
29
30pub use input::{
31 TooltestHttpTarget, TooltestInput, TooltestPreRunHook, TooltestRunConfig, TooltestStdioTarget,
32 TooltestTarget, TooltestTargetConfig, TooltestTargetHttp, TooltestTargetStdio,
33};
34pub use lint::{
35 LintConfigSource, LintDefinition, LintFinding, LintLevel, LintPhase, LintRule, LintSuite,
36 ListLintContext, ResponseLintContext, RunLintContext,
37};
38pub use lint_config::{default_tooltest_toml, load_lint_suite};
39pub use lints::{
40 CoverageLint, JsonSchemaDialectCompatLint, JsonSchemaKeywordCompatLint,
41 MaxStructuredContentBytesLint, MaxToolsLint, McpSchemaMinVersionLint,
42 MissingStructuredContentLint, NoCrashLint, OutputSchemaCompileLint,
43 DEFAULT_JSON_SCHEMA_DIALECT,
44};
45pub use rmcp::model::{
46 CallToolRequestParam, CallToolResult, ErrorCode, ErrorData, JsonObject, Tool,
47};
48pub use rmcp::service::{ClientInitializeError, ServiceError};
49pub use runner::{run_http, run_stdio, run_with_session, RunnerOptions};
50pub use schema::{
51 parse_call_tool_request, parse_call_tool_result, parse_list_tools, schema_version_label,
52 SchemaError,
53};
54pub use session::{SessionDriver, SessionError};
55pub use validation::{list_tools_http, list_tools_stdio, list_tools_with_session, ListToolsError};
56
57#[cfg(test)]
58#[path = "../tests/internal/mod.rs"]
59mod tests;
60
61#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
63#[serde(rename_all = "kebab-case")]
64pub enum SchemaVersion {
65 #[default]
67 V2025_11_25,
68 Other(String),
70}
71
72#[derive(Clone, Debug, Default, Serialize, Deserialize, JsonSchema)]
76#[serde(default, deny_unknown_fields)]
77pub struct StateMachineConfig {
78 pub seed_numbers: Vec<Number>,
80 pub seed_strings: Vec<String>,
82 pub mine_text: bool,
84 pub dump_corpus: bool,
86 pub log_corpus_deltas: bool,
88 pub lenient_sourcing: bool,
90 pub coverage_allowlist: Option<Vec<String>>,
92 pub coverage_blocklist: Option<Vec<String>>,
94}
95
96impl StateMachineConfig {
97 pub fn with_seed_numbers(mut self, seed_numbers: Vec<Number>) -> Self {
99 self.seed_numbers = seed_numbers;
100 self
101 }
102
103 pub fn with_seed_strings(mut self, seed_strings: Vec<String>) -> Self {
105 self.seed_strings = seed_strings;
106 self
107 }
108
109 pub fn with_mine_text(mut self, mine_text: bool) -> Self {
111 self.mine_text = mine_text;
112 self
113 }
114
115 pub fn with_dump_corpus(mut self, dump_corpus: bool) -> Self {
117 self.dump_corpus = dump_corpus;
118 self
119 }
120
121 pub fn with_log_corpus_deltas(mut self, log_corpus_deltas: bool) -> Self {
123 self.log_corpus_deltas = log_corpus_deltas;
124 self
125 }
126
127 pub fn with_lenient_sourcing(mut self, lenient_sourcing: bool) -> Self {
129 self.lenient_sourcing = lenient_sourcing;
130 self
131 }
132
133 pub fn with_coverage_allowlist(mut self, coverage_allowlist: Vec<String>) -> Self {
135 self.coverage_allowlist = Some(coverage_allowlist);
136 self
137 }
138
139 pub fn with_coverage_blocklist(mut self, coverage_blocklist: Vec<String>) -> Self {
141 self.coverage_blocklist = Some(coverage_blocklist);
142 self
143 }
144}
145
146#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
148pub struct SchemaConfig {
149 pub version: SchemaVersion,
151}
152
153fn validate_stdio_command(command: &str) -> Result<(), String> {
154 if command.trim().is_empty() {
155 return Err("stdio command must not be empty".to_string());
156 }
157 Ok(())
158}
159
160fn validate_http_url(url: &str) -> Result<(), String> {
161 let parsed = Url::parse(url).map_err(|error| format!("invalid http url '{url}': {error}"))?;
162 match parsed.host_str() {
163 Some(host) if !host.is_empty() => {}
164 _ => {
165 return Err(format!("invalid http url '{url}': missing host"));
166 }
167 }
168 match parsed.scheme() {
169 "http" | "https" => {}
170 scheme => {
171 return Err(format!(
172 "invalid http url '{url}': scheme must be http or https (got '{scheme}')"
173 ));
174 }
175 }
176 Ok(())
177}
178
179fn deserialize_stdio_command<'de, D>(deserializer: D) -> Result<String, D::Error>
180where
181 D: serde::Deserializer<'de>,
182{
183 let command = String::deserialize(deserializer)?;
184 validate_stdio_command(&command).map_err(serde::de::Error::custom)?;
185 Ok(command)
186}
187
188fn deserialize_http_url<'de, D>(deserializer: D) -> Result<String, D::Error>
189where
190 D: serde::Deserializer<'de>,
191{
192 let url = String::deserialize(deserializer)?;
193 validate_http_url(&url).map_err(serde::de::Error::custom)?;
194 Ok(url)
195}
196
197#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
213pub struct StdioConfig {
214 #[serde(deserialize_with = "deserialize_stdio_command")]
216 command: String,
217 #[serde(default)]
219 pub args: Vec<String>,
220 #[serde(default)]
222 pub env: BTreeMap<String, String>,
223 #[serde(default)]
225 pub cwd: Option<String>,
226}
227
228impl StdioConfig {
229 pub fn new(command: impl Into<String>) -> Result<Self, String> {
231 let command = command.into();
232 validate_stdio_command(&command)?;
233 Ok(Self {
234 command,
235 args: Vec::new(),
236 env: BTreeMap::new(),
237 cwd: None,
238 })
239 }
240
241 pub fn command(&self) -> &str {
243 &self.command
244 }
245}
246
247#[derive(Clone, Debug, Eq, PartialEq)]
249pub struct PreRunHook {
250 pub command: String,
252 pub env: BTreeMap<String, String>,
254 pub cwd: Option<String>,
256}
257
258impl PreRunHook {
259 pub fn new(command: impl Into<String>) -> Self {
261 Self {
262 command: command.into(),
263 env: BTreeMap::new(),
264 cwd: None,
265 }
266 }
267
268 fn apply_stdio_context(&mut self, endpoint: &StdioConfig) {
269 self.env = endpoint.env.clone();
270 self.cwd = endpoint.cwd.clone();
271 }
272}
273
274#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
288pub struct HttpConfig {
289 #[serde(deserialize_with = "deserialize_http_url")]
291 url: String,
292 #[serde(default)]
294 pub auth_token: Option<String>,
295}
296
297impl HttpConfig {
298 pub fn new(url: impl Into<String>) -> Result<Self, String> {
300 let url = url.into();
301 validate_http_url(&url)?;
302 Ok(Self {
303 url,
304 auth_token: None,
305 })
306 }
307
308 pub fn url(&self) -> &str {
310 &self.url
311 }
312}
313
314pub type ToolPredicate = Arc<dyn Fn(&str, &JsonValue) -> bool + Send + Sync>;
316pub type ToolNamePredicate = Arc<dyn Fn(&str) -> bool + Send + Sync>;
317
318#[derive(Clone, Debug, Default, Serialize, Deserialize)]
342pub struct AssertionSet {
343 pub rules: Vec<AssertionRule>,
345}
346
347#[derive(Clone, Debug, Serialize, Deserialize)]
349#[serde(tag = "scope", content = "rule", rename_all = "snake_case")]
350pub enum AssertionRule {
351 Response(ResponseAssertion),
353 Sequence(SequenceAssertion),
355}
356
357#[derive(Clone, Debug, Serialize, Deserialize)]
359pub struct ResponseAssertion {
360 pub tool: Option<String>,
362 pub checks: Vec<AssertionCheck>,
364}
365
366#[derive(Clone, Debug, Serialize, Deserialize)]
368pub struct SequenceAssertion {
369 pub checks: Vec<AssertionCheck>,
371}
372
373#[derive(Clone, Debug, Serialize, Deserialize)]
377pub struct AssertionCheck {
378 pub target: AssertionTarget,
380 pub pointer: String,
382 pub expected: JsonValue,
384}
385
386#[derive(Clone, Debug, Serialize, Deserialize)]
388#[serde(rename_all = "snake_case")]
389pub enum AssertionTarget {
390 Input,
392 Output,
394 StructuredOutput,
396 Sequence,
398}
399
400#[derive(Clone)]
415pub struct RunConfig {
416 pub schema: SchemaConfig,
418 pub predicate: Option<ToolPredicate>,
420 pub tool_filter: Option<ToolNamePredicate>,
422 pub assertions: AssertionSet,
424 pub in_band_error_forbidden: bool,
426 pub state_machine: StateMachineConfig,
428 pub pre_run_hook: Option<PreRunHook>,
430 pub full_trace: bool,
432 pub show_uncallable: bool,
434 uncallable_limit: usize,
436 pub trace_sink: Option<Arc<dyn TraceSink>>,
438 pub lints: LintSuite,
440}
441
442impl RunConfig {
443 pub fn new() -> Self {
448 Self {
449 schema: SchemaConfig::default(),
450 predicate: None,
451 tool_filter: None,
452 assertions: AssertionSet::default(),
453 in_band_error_forbidden: false,
454 state_machine: StateMachineConfig::default(),
455 pre_run_hook: None,
456 full_trace: false,
457 show_uncallable: false,
458 uncallable_limit: 1,
459 trace_sink: None,
460 lints: LintSuite::default(),
461 }
462 }
463
464 pub fn with_schema(mut self, schema: SchemaConfig) -> Self {
466 self.schema = schema;
467 self
468 }
469
470 pub fn with_predicate(mut self, predicate: ToolPredicate) -> Self {
472 self.predicate = Some(predicate);
473 self
474 }
475
476 pub fn with_tool_filter(mut self, predicate: ToolNamePredicate) -> Self {
478 self.tool_filter = Some(predicate);
479 self
480 }
481
482 pub fn with_assertions(mut self, assertions: AssertionSet) -> Self {
484 self.assertions = assertions;
485 self
486 }
487
488 pub fn with_in_band_error_forbidden(mut self, forbidden: bool) -> Self {
490 self.in_band_error_forbidden = forbidden;
491 self
492 }
493
494 pub fn with_state_machine(mut self, state_machine: StateMachineConfig) -> Self {
496 self.state_machine = state_machine;
497 self
498 }
499
500 pub fn with_pre_run_hook(mut self, hook: PreRunHook) -> Self {
502 self.pre_run_hook = Some(hook);
503 self
504 }
505
506 pub fn with_full_trace(mut self, enabled: bool) -> Self {
508 self.full_trace = enabled;
509 self
510 }
511
512 pub fn with_show_uncallable(mut self, enabled: bool) -> Self {
514 self.show_uncallable = enabled;
515 self
516 }
517
518 pub fn with_uncallable_limit(mut self, limit: usize) -> Result<Self, String> {
520 if limit < 1 {
521 return Err("uncallable-limit must be at least 1".to_string());
522 }
523 self.uncallable_limit = limit;
524 Ok(self)
525 }
526
527 pub fn uncallable_limit(&self) -> usize {
529 self.uncallable_limit
530 }
531
532 pub fn with_trace_sink(mut self, sink: Arc<dyn TraceSink>) -> Self {
534 self.trace_sink = Some(sink);
535 self
536 }
537
538 pub fn with_lints(mut self, lints: LintSuite) -> Self {
540 self.lints = lints;
541 self
542 }
543
544 pub(crate) fn apply_stdio_pre_run_context(&mut self, endpoint: &StdioConfig) {
545 if let Some(hook) = self.pre_run_hook.as_mut() {
546 hook.apply_stdio_context(endpoint);
547 }
548 }
549}
550
551impl Default for RunConfig {
552 fn default() -> Self {
553 Self::new()
554 }
555}
556
557impl fmt::Debug for RunConfig {
558 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
559 f.debug_struct("RunConfig")
560 .field("schema", &self.schema)
561 .field("predicate", &self.predicate.is_some())
562 .field("tool_filter", &self.tool_filter.is_some())
563 .field("assertions", &self.assertions)
564 .field("in_band_error_forbidden", &self.in_band_error_forbidden)
565 .field("state_machine", &self.state_machine)
566 .field("pre_run_hook", &self.pre_run_hook.is_some())
567 .field("show_uncallable", &self.show_uncallable)
568 .field("uncallable_limit", &self.uncallable_limit)
569 .field("trace_sink", &self.trace_sink.is_some())
570 .field("lints", &self.lints.len())
571 .field("lint_config_source", &self.lints.source())
572 .finish()
573 }
574}
575
576pub type ToolInvocation = CallToolRequestParam;
578
579#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
581#[serde(tag = "kind", rename_all = "snake_case")]
582pub enum TraceEntry {
583 ListTools {
585 #[serde(skip_serializing_if = "Option::is_none")]
587 failure_reason: Option<String>,
588 },
589 ToolCall {
591 invocation: ToolInvocation,
593 #[serde(skip_serializing_if = "Option::is_none")]
595 response: Option<CallToolResult>,
596 #[serde(skip_serializing_if = "Option::is_none")]
598 failure_reason: Option<String>,
599 },
600}
601
602impl TraceEntry {
603 pub fn list_tools() -> Self {
605 Self::ListTools {
606 failure_reason: None,
607 }
608 }
609
610 pub fn list_tools_with_failure(reason: String) -> Self {
612 Self::ListTools {
613 failure_reason: Some(reason),
614 }
615 }
616
617 pub fn tool_call(invocation: ToolInvocation) -> Self {
619 Self::ToolCall {
620 invocation,
621 response: None,
622 failure_reason: None,
623 }
624 }
625
626 pub fn tool_call_with_response(invocation: ToolInvocation, response: CallToolResult) -> Self {
628 Self::ToolCall {
629 invocation,
630 response: Some(response),
631 failure_reason: None,
632 }
633 }
634
635 pub fn as_tool_call(&self) -> Option<(&ToolInvocation, Option<&CallToolResult>)> {
637 match self {
638 TraceEntry::ToolCall {
639 invocation,
640 response,
641 ..
642 } => Some((invocation, response.as_ref())),
643 TraceEntry::ListTools { .. } => None,
644 }
645 }
646}
647
648#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
650pub struct MinimizedSequence {
651 pub invocations: Vec<ToolInvocation>,
653}
654
655#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
657#[serde(tag = "status", rename_all = "snake_case")]
658pub enum RunOutcome {
659 Success,
661 Failure(RunFailure),
663}
664
665#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
667pub struct RunFailure {
668 pub reason: String,
670 #[serde(skip_serializing_if = "Option::is_none")]
672 pub code: Option<String>,
673 #[serde(skip_serializing_if = "Option::is_none")]
675 pub details: Option<JsonValue>,
676}
677
678impl RunFailure {
679 pub fn new(reason: impl Into<String>) -> Self {
681 Self {
682 reason: reason.into(),
683 code: None,
684 details: None,
685 }
686 }
687}
688
689#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
691pub struct RunWarning {
692 pub code: RunWarningCode,
694 pub message: String,
696 #[serde(skip_serializing_if = "Option::is_none")]
698 pub tool: Option<String>,
699 #[serde(skip_serializing_if = "Option::is_none")]
701 pub details: Option<JsonValue>,
702}
703
704pub trait TraceSink: Send + Sync {
706 fn record(&self, case_index: u64, trace: &[TraceEntry]);
708}
709
710#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, JsonSchema)]
712#[serde(transparent)]
713#[schemars(transparent)]
714pub struct RunWarningCode(pub String);
715
716impl RunWarningCode {
717 #[deprecated(
718 note = "Use RunWarningCode::lint(\"missing_structured_content\"); this warning code is lint-only."
719 )]
720 pub const MISSING_STRUCTURED_CONTENT: &'static str = "missing_structured_content";
721
722 #[deprecated(
723 note = "Use RunWarningCode::lint(\"missing_structured_content\"); this warning code is lint-only."
724 )]
725 pub fn missing_structured_content() -> Self {
726 Self("missing_structured_content".to_string())
727 }
728
729 pub fn lint(id: impl Into<String>) -> Self {
730 Self(format!("lint.{}", id.into()))
731 }
732
733 pub fn as_str(&self) -> &str {
734 self.0.as_str()
735 }
736
737 pub fn lint_id(&self) -> Option<&str> {
738 self.0.strip_prefix("lint.")
739 }
740}
741
742#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
744pub struct CoverageWarning {
745 pub tool: String,
747 pub reason: CoverageWarningReason,
749}
750
751#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, JsonSchema)]
753#[serde(rename_all = "snake_case")]
754pub enum CoverageWarningReason {
755 MissingString,
756 MissingInteger,
757 MissingNumber,
758 MissingRequiredValue,
759}
760
761#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
763pub struct UncallableToolCall {
764 pub input: ToolInvocation,
766 #[serde(skip_serializing_if = "Option::is_none")]
768 pub output: Option<CallToolResult>,
769 #[serde(skip_serializing_if = "Option::is_none")]
771 pub error: Option<CallToolResult>,
772 pub timestamp: String,
774}
775
776#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
778pub struct CoverageReport {
779 pub counts: BTreeMap<String, u64>,
781 pub failures: BTreeMap<String, u64>,
783 pub warnings: Vec<CoverageWarning>,
785 pub uncallable_traces: BTreeMap<String, Vec<UncallableToolCall>>,
787}
788
789#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
791pub struct CorpusReport {
792 pub numbers: Vec<Number>,
794 pub integers: Vec<i64>,
796 pub strings: Vec<String>,
798}
799
800#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
802#[serde(tag = "rule", rename_all = "snake_case", deny_unknown_fields)]
803pub enum CoverageRule {
804 MinCallsPerTool { min: u64 },
806 NoUncalledTools,
808 PercentCalled { min_percent: f64 },
810}
811
812impl CoverageRule {
813 pub fn min_calls_per_tool(min: u64) -> Self {
815 Self::MinCallsPerTool { min }
816 }
817
818 pub fn no_uncalled_tools() -> Self {
820 Self::NoUncalledTools
821 }
822
823 pub fn percent_called(min_percent: f64) -> Self {
825 Self::PercentCalled { min_percent }
826 }
827}
828
829#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
831pub struct RunResult {
832 pub outcome: RunOutcome,
834 pub trace: Vec<TraceEntry>,
836 #[serde(skip_serializing_if = "Option::is_none")]
838 pub minimized: Option<MinimizedSequence>,
839 pub warnings: Vec<RunWarning>,
841 #[serde(skip_serializing_if = "Option::is_none")]
843 pub coverage: Option<CoverageReport>,
844 #[serde(skip_serializing_if = "Option::is_none")]
846 pub corpus: Option<CorpusReport>,
847}