use std::time::Duration;
use anyhow::Result;
use rand::SeedableRng;
use rand_chacha::ChaCha20Rng;
use serde::Serialize;
use serde_json::{json, Map, Value};
use crate::{
client::CallOutcome,
corpus::Corpus,
finding::{Finding, FindingKind, ReproInfo},
property::{
dsl::{FixtureExpect, Sequence, SequenceFixture, StepOutcome},
jsonpath, runner,
},
seed::{derive_seed, derive_seed_canonical},
target::SeverityConfig,
};
use super::{exec::McpExec, reporter::Reporter};
#[derive(Debug, Clone, Default, Serialize)]
pub struct SequenceReport {
pub passed: Vec<String>,
pub findings_count: usize,
pub skipped_missing_tool: Vec<SkippedSequence>,
}
#[derive(Debug, Clone, Serialize)]
pub struct SkippedSequence {
pub sequence: String,
pub missing_tool: String,
}
pub struct SequencePlan {
pub sequences: Vec<Sequence>,
pub master_seed: u64,
pub timeout: Duration,
pub transport_name: String,
pub severity: SeverityConfig,
}
impl SequencePlan {
pub async fn execute<C: McpExec + ?Sized>(
self,
client: &mut C,
corpus: &Corpus,
reporter: &mut dyn Reporter,
) -> Result<SequenceReport> {
let live_tools = client.list_tools().await?;
let tool_names: std::collections::BTreeSet<String> =
live_tools.iter().map(|t| t.name.to_string()).collect();
let mut report = SequenceReport::default();
for sequence in &self.sequences {
if let Some(missing) = sequence
.steps
.iter()
.find(|s| !tool_names.contains(&s.call))
.map(|s| s.call.clone())
{
reporter.on_skipped(
&sequence.name,
&format!("step calls `{missing}` which the server does not advertise"),
);
report.skipped_missing_tool.push(SkippedSequence {
sequence: sequence.name.clone(),
missing_tool: missing,
});
continue;
}
reporter.on_iteration_start(&sequence.name, 0);
let canonical = derive_seed_canonical(self.master_seed, &sequence.name, 0);
let seed = derive_seed(self.master_seed, &sequence.name, 0);
let mut rng = ChaCha20Rng::from_seed(canonical);
let outcome = run_one_sequence(client, sequence, &mut rng, self.timeout).await;
match outcome {
SequenceOutcome::Pass => {
report.passed.push(sequence.name.clone());
}
SequenceOutcome::Fail {
step_index,
step_call,
detail,
last_input,
} => {
let mut finding = Finding::new(
FindingKind::SequenceFailure {
sequence: sequence.name.clone(),
step_index,
step_call: step_call.clone(),
},
sequence.name.clone(),
format!("sequence `{}` failed at step {step_index}", sequence.name),
detail,
ReproInfo {
seed,
tool_call: last_input,
transport: self.transport_name.clone(),
composition_trail: Vec::new(),
},
);
if let Some(override_sev) = self.severity.resolve(finding.kind.keyword()) {
finding = finding.with_severity(override_sev);
}
corpus.write_finding(&finding)?;
reporter.on_finding(&finding);
report.findings_count += 1;
}
}
reporter.on_iteration_end(&sequence.name, 0);
}
Ok(report)
}
}
enum SequenceOutcome {
Pass,
Fail {
step_index: usize,
step_call: String,
detail: String,
last_input: Value,
},
}
async fn run_one_sequence<C: McpExec + ?Sized>(
client: &mut C,
sequence: &Sequence,
rng: &mut ChaCha20Rng,
timeout: Duration,
) -> SequenceOutcome {
let mut context = SequenceContext::new();
for (step_index, step) in sequence.steps.iter().enumerate() {
let raw_input = step
.with
.clone()
.map(|map| Value::Object(map.into_iter().collect::<Map<_, _>>()))
.unwrap_or(Value::Object(Map::new()));
let input = match context.substitute(&raw_input) {
Ok(value) => value,
Err(err) => {
return SequenceOutcome::Fail {
step_index,
step_call: step.call.clone(),
detail: format!(
"could not substitute step references in `with:` of step \
{step_index}: {err}"
),
last_input: raw_input,
};
}
};
let response = invoke(client, &step.call, input.clone(), timeout, rng).await;
let expected = step.expect.unwrap_or_default();
if let Some(detail) = check_step_outcome(&response, expected) {
return SequenceOutcome::Fail {
step_index,
step_call: step.call.clone(),
detail: format!(
"step {step_index} (`{}`) outcome mismatch: {detail}\n\
input: {}\nresponse: {}",
step.call,
serde_json::to_string_pretty(&input).unwrap_or_default(),
serde_json::to_string_pretty(&response).unwrap_or_default(),
),
last_input: input,
};
}
if !step.assertions.is_empty() {
if let Err(err) =
runner::evaluate_step_assertions(&step.assertions, input.clone(), response.clone())
{
return SequenceOutcome::Fail {
step_index,
step_call: step.call.clone(),
detail: format!(
"step {step_index} (`{}`) assertion failed: {err}\n\
input: {}\nresponse: {}",
step.call,
serde_json::to_string_pretty(&input).unwrap_or_default(),
serde_json::to_string_pretty(&response).unwrap_or_default(),
),
last_input: input,
};
}
}
if let Some(bind) = step.bind.as_ref() {
context.bind(
bind.clone(),
json!({
"input": input,
"response": response,
}),
);
}
}
SequenceOutcome::Pass
}
fn check_step_outcome(response: &Value, expected: StepOutcome) -> Option<String> {
let observed_error = response
.get("isError")
.and_then(Value::as_bool)
.unwrap_or(false);
match expected {
StepOutcome::Ok => {
if observed_error {
Some("expected ok, observed isError=true".into())
} else {
None
}
}
StepOutcome::Error => {
if observed_error {
None
} else {
Some("expected isError=true, observed ok response".into())
}
}
}
}
async fn invoke<C: McpExec + ?Sized>(
client: &mut C,
tool: &str,
input: Value,
timeout: Duration,
_rng: &mut ChaCha20Rng,
) -> Value {
match client.call_tool(tool, input, timeout).await {
CallOutcome::Ok(result) => serde_json::to_value(result).unwrap_or(Value::Null),
CallOutcome::Hang(duration) => json!({
"content": [{"type": "text", "text": format!("timeout after {duration:?}")}],
"isError": true,
}),
CallOutcome::Crash(reason) => json!({
"content": [{"type": "text", "text": reason}],
"isError": true,
}),
CallOutcome::ProtocolError(message) => json!({
"content": [{"type": "text", "text": message}],
"isError": true,
}),
}
}
pub struct SequenceContext {
bindings: std::collections::BTreeMap<String, Value>,
}
impl Default for SequenceContext {
fn default() -> Self {
Self::new()
}
}
impl SequenceContext {
pub fn new() -> Self {
Self {
bindings: Default::default(),
}
}
pub fn bind(&mut self, name: String, envelope: Value) {
self.bindings.insert(name, envelope);
}
pub fn substitute(&self, value: &Value) -> Result<Value, String> {
match value {
Value::String(raw) => self.substitute_string(raw),
Value::Array(items) => items
.iter()
.map(|item| self.substitute(item))
.collect::<Result<Vec<_>, _>>()
.map(Value::Array),
Value::Object(map) => {
let mut out = Map::with_capacity(map.len());
for (k, v) in map {
out.insert(k.clone(), self.substitute(v)?);
}
Ok(Value::Object(out))
}
other => Ok(other.clone()),
}
}
fn substitute_string(&self, raw: &str) -> Result<Value, String> {
if let Some(inner) = single_placeholder(raw) {
return self.resolve_path(inner);
}
let mut out = String::with_capacity(raw.len());
let mut rest = raw;
while let Some(idx) = rest.find("{{") {
out.push_str(&rest[..idx]);
let after_open = &rest[idx + 2..];
let close = after_open
.find("}}")
.ok_or_else(|| format!("unterminated `{{{{...` in `{raw}`"))?;
let inner = after_open[..close].trim();
let resolved = self.resolve_path(inner)?;
match resolved {
Value::String(s) => out.push_str(&s),
other => out.push_str(&other.to_string()),
}
rest = &after_open[close + 2..];
}
out.push_str(rest);
Ok(Value::String(out))
}
fn resolve_path(&self, path: &str) -> Result<Value, String> {
let inner = path
.strip_prefix("steps.")
.ok_or_else(|| format!("placeholder must start with `steps.`: `{path}`"))?;
let (bind, rest) = inner.split_once('.').unwrap_or((inner, ""));
let envelope = self
.bindings
.get(bind)
.ok_or_else(|| format!("no step bound under `{bind}` (yet?)"))?;
if rest.is_empty() {
return Ok(envelope.clone());
}
let jsonpath = format!("$.{rest}");
jsonpath::resolve_one(envelope, &jsonpath)
.map_err(|err| format!("resolving `{path}`: {err}"))
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SequenceFixtureOutcome {
Match,
Mismatch {
expected: FixtureExpect,
observed: FixtureExpect,
detail: String,
},
Structural {
error: String,
},
}
pub fn evaluate_sequence_fixture(
sequence: &Sequence,
fixture: &SequenceFixture,
) -> SequenceFixtureOutcome {
if fixture.responses.len() != sequence.steps.len() {
return SequenceFixtureOutcome::Structural {
error: format!(
"fixture provides {} responses but sequence has {} steps",
fixture.responses.len(),
sequence.steps.len()
),
};
}
let mut context = SequenceContext::new();
let mut sequence_failed_at: Option<(usize, String)> = None;
for (step_index, step) in sequence.steps.iter().enumerate() {
let raw_input = step
.with
.clone()
.map(|map| Value::Object(map.into_iter().collect::<Map<_, _>>()))
.unwrap_or(Value::Object(Map::new()));
let input = match context.substitute(&raw_input) {
Ok(value) => value,
Err(err) => {
return SequenceFixtureOutcome::Structural {
error: format!(
"could not substitute step references in step {step_index}: {err}"
),
};
}
};
let response = fixture.responses[step_index].clone();
let expected = step.expect.unwrap_or_default();
if let Some(detail) = check_step_outcome(&response, expected) {
sequence_failed_at = Some((step_index, format!("outcome mismatch: {detail}")));
break;
}
if !step.assertions.is_empty() {
if let Err(err) =
runner::evaluate_step_assertions(&step.assertions, input.clone(), response.clone())
{
sequence_failed_at = Some((step_index, format!("assertion failed: {err}")));
break;
}
}
if let Some(bind) = step.bind.as_ref() {
context.bind(
bind.clone(),
json!({
"input": input,
"response": response,
}),
);
}
}
let observed = if sequence_failed_at.is_some() {
FixtureExpect::Fail
} else {
FixtureExpect::Pass
};
if observed == fixture.expect {
SequenceFixtureOutcome::Match
} else {
let detail = sequence_failed_at
.map(|(idx, msg)| format!("step {idx}: {msg}"))
.unwrap_or_else(|| "all steps passed".to_string());
SequenceFixtureOutcome::Mismatch {
expected: fixture.expect,
observed,
detail,
}
}
}
fn single_placeholder(raw: &str) -> Option<&str> {
let trimmed = raw.trim();
let inner = trimmed.strip_prefix("{{")?.strip_suffix("}}")?;
if inner.contains("{{") || inner.contains("}}") {
return None;
}
Some(inner.trim())
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn single_placeholder_preserves_type() {
let mut ctx = SequenceContext::new();
ctx.bind(
"login".into(),
json!({"input": {}, "response": {"structuredContent": {"id": 42}}}),
);
let out = ctx
.substitute(&json!("{{steps.login.response.structuredContent.id}}"))
.unwrap();
assert_eq!(out, json!(42));
}
#[test]
fn mixed_text_substitutes_inline() {
let mut ctx = SequenceContext::new();
ctx.bind(
"login".into(),
json!({"input": {}, "response": {"structuredContent": {"token": "abc"}}}),
);
let out = ctx
.substitute(&json!(
"Bearer {{steps.login.response.structuredContent.token}}"
))
.unwrap();
assert_eq!(out, json!("Bearer abc"));
}
#[test]
fn unknown_step_surfaces_error() {
let ctx = SequenceContext::new();
let err = ctx.substitute(&json!("{{steps.missing.x}}")).unwrap_err();
assert!(err.contains("missing"), "{err}");
}
#[test]
fn unterminated_placeholder_errors() {
let mut ctx = SequenceContext::new();
ctx.bind("a".into(), json!({}));
let err = ctx.substitute(&json!("hello {{steps.a")).unwrap_err();
assert!(err.contains("unterminated"));
}
#[test]
fn step_outcome_ok_default_passes_when_no_is_error() {
let r = json!({"content": [{"type": "text", "text": "ok"}]});
assert!(check_step_outcome(&r, StepOutcome::Ok).is_none());
}
#[test]
fn step_outcome_error_passes_when_is_error_true() {
let r = json!({"isError": true, "content": []});
assert!(check_step_outcome(&r, StepOutcome::Error).is_none());
}
#[test]
fn step_outcome_mismatch_returns_detail() {
let r = json!({"isError": true, "content": []});
assert!(check_step_outcome(&r, StepOutcome::Ok).is_some());
}
}