Skip to main content

wallfacer_core/run/
sequence.rs

1//! Phase L — sequence-aware property runner.
2//!
3//! A [`crate::property::dsl::Sequence`] is a chain of tool calls
4//! sharing a single MCP client and a step-context. Earlier steps can
5//! `bind` their `{input, response}` envelope under a name, and later
6//! steps reference it via `{{steps.<bind>.<jsonpath>}}` placeholders
7//! inside their `with:` arguments.
8//!
9//! The runner threads bindings through a [`SequenceContext`] map and
10//! substitutes placeholders just before invoking each step. This is
11//! deliberately late-bound: a step's `with:` block can depend on the
12//! *response* of a previous step, not just its inputs.
13//!
14//! Reconnect semantics — sequences hold per-connection state
15//! (authentication tokens, server-side session ids, in-memory
16//! bookkeeping). The runner therefore does **not** issue a reconnect
17//! when a step's call hangs or returns a protocol error: the sequence
18//! is marked failed and the remaining steps are skipped, but the
19//! caller's `Client` is left untouched so a subsequent sequence can
20//! still observe whatever state the broken step left behind.
21//!
22//! Findings emitted by this module carry
23//! [`crate::finding::FindingKind::SequenceFailure`] tagged with the
24//! offending step index; the corpus folder uses the sequence name as
25//! the per-finding tool slot so a sequence's findings cluster
26//! together.
27
28use std::time::Duration;
29
30use anyhow::Result;
31use rand::SeedableRng;
32use rand_chacha::ChaCha20Rng;
33use serde::Serialize;
34use serde_json::{json, Map, Value};
35
36use crate::{
37    client::CallOutcome,
38    corpus::Corpus,
39    finding::{Finding, FindingKind, ReproInfo},
40    fuzz_corpus::{response_fingerprint, CorpusTrigger, FuzzCorpus, FuzzCorpusEntry},
41    mutate::corpus_mutator,
42    property::{
43        dsl::{FixtureExpect, Sequence, SequenceFixture, StepOutcome},
44        jsonpath, runner,
45    },
46    seed::{derive_seed, derive_seed_canonical},
47    target::SeverityConfig,
48};
49
50use super::{exec::McpExec, reporter::Reporter};
51
52/// Outcome of a single sequence run.
53#[derive(Debug, Clone, Default, Serialize)]
54pub struct SequenceReport {
55    /// Sequences whose every step passed.
56    pub passed: Vec<String>,
57    /// Number of distinct findings (one per failing sequence).
58    pub findings_count: usize,
59    /// Sequences skipped because at least one of their steps targeted
60    /// a tool the server didn't advertise. The runner refuses to
61    /// partially execute a sequence — pre-flight check fails the
62    /// whole thing — so the operator sees a single skip per sequence.
63    pub skipped_missing_tool: Vec<SkippedSequence>,
64}
65
66/// One sequence skipped because of a missing tool.
67#[derive(Debug, Clone, Serialize)]
68pub struct SkippedSequence {
69    /// Sequence name from YAML.
70    pub sequence: String,
71    /// First missing tool the runner spotted (the sequence may
72    /// reference more, but reporting one is enough for triage).
73    pub missing_tool: String,
74}
75
76/// Plan for executing a batch of [`Sequence`]s.
77pub struct SequencePlan {
78    /// Sequences to run, in declaration order.
79    pub sequences: Vec<Sequence>,
80    /// Master seed; per-sequence seeds derive from `master_seed +
81    /// sequence_name` deterministically.
82    pub master_seed: u64,
83    /// Per-step call timeout.
84    pub timeout: Duration,
85    /// Transport label for [`ReproInfo`].
86    pub transport_name: String,
87    /// `[severity]` overrides from `wallfacer.toml`.
88    pub severity: SeverityConfig,
89    /// Phase V — optional persistent corpus, shared with the v0.6
90    /// fuzzer. When set, sequence steps mutate from the corpus
91    /// `mutate_ratio` fraction of the time and save inputs that
92    /// trigger findings or produce a previously-unseen response
93    /// fingerprint per step. Cross-pollinates with `wallfacer fuzz
94    /// --corpus-feedback`: fuzz-discovered "interesting ids" can
95    /// seed sequence steps that call the same tool.
96    pub fuzz_corpus: Option<crate::fuzz_corpus::FuzzCorpus>,
97    /// Phase V — fraction of sequence-step inputs that mutate from
98    /// the corpus instead of using the YAML literal verbatim.
99    /// Range `0.0..=1.0`. Default `0.9` matches the fuzz default.
100    /// Ignored when [`Self::fuzz_corpus`] is `None` or the per-tool
101    /// corpus is empty.
102    pub mutate_ratio: f64,
103}
104
105impl SequencePlan {
106    /// Drives the sequence loop. Returns once every sequence has
107    /// either passed, produced a finding, or been skipped for a
108    /// missing tool.
109    ///
110    /// Lifecycle events (`on_run_start` / `on_run_end`) are *not*
111    /// emitted: callers compose this plan with the property plan and
112    /// run them through a single reporter instance, so wrapping each
113    /// sub-run with its own start/end would split the JSON envelope
114    /// and confuse downstream consumers. The reporter sees a clean
115    /// stream of `on_finding` / `on_skipped` calls with the sequence
116    /// findings interleaved with the single-tool findings.
117    pub async fn execute<C: McpExec + ?Sized>(
118        self,
119        client: &mut C,
120        corpus: &Corpus,
121        reporter: &mut dyn Reporter,
122    ) -> Result<SequenceReport> {
123        let live_tools = client.list_tools().await?;
124        let tool_names: std::collections::BTreeSet<String> =
125            live_tools.iter().map(|t| t.name.to_string()).collect();
126
127        let mut report = SequenceReport::default();
128
129        // Phase V — preload the response-fingerprint set from
130        // every prior corpus entry across the tools this batch of
131        // sequences calls into. A novel fingerprint at run time is
132        // one not present in this set.
133        let mut seen_fingerprints: std::collections::BTreeSet<String> =
134            std::collections::BTreeSet::new();
135        if let Some(corpus_ref) = self.fuzz_corpus.as_ref() {
136            let touched_tools: std::collections::BTreeSet<&str> = self
137                .sequences
138                .iter()
139                .flat_map(|s| s.steps.iter().map(|st| st.call.as_str()))
140                .collect();
141            for tool in touched_tools {
142                if let Ok(entries) = corpus_ref.list(tool) {
143                    for e in entries {
144                        seen_fingerprints.insert(e.fingerprint);
145                    }
146                }
147            }
148        }
149
150        for sequence in &self.sequences {
151            // Pre-flight: refuse to run a sequence that references a
152            // tool the server doesn't advertise. Half-running a
153            // sequence would leak state (e.g. the create step fired
154            // but the delete step couldn't), which is worse than
155            // skipping cleanly.
156            if let Some(missing) = sequence
157                .steps
158                .iter()
159                .find(|s| !tool_names.contains(&s.call))
160                .map(|s| s.call.clone())
161            {
162                reporter.on_skipped(
163                    &sequence.name,
164                    &format!("step calls `{missing}` which the server does not advertise"),
165                );
166                report.skipped_missing_tool.push(SkippedSequence {
167                    sequence: sequence.name.clone(),
168                    missing_tool: missing,
169                });
170                continue;
171            }
172
173            reporter.on_iteration_start(&sequence.name, 0);
174            let canonical = derive_seed_canonical(self.master_seed, &sequence.name, 0);
175            let seed = derive_seed(self.master_seed, &sequence.name, 0);
176            let mut rng = ChaCha20Rng::from_seed(canonical);
177
178            let outcome = run_one_sequence(
179                client,
180                sequence,
181                &mut rng,
182                self.timeout,
183                self.fuzz_corpus.as_ref(),
184                self.mutate_ratio,
185                &mut seen_fingerprints,
186            )
187            .await;
188            match outcome {
189                SequenceOutcome::Pass => {
190                    report.passed.push(sequence.name.clone());
191                }
192                SequenceOutcome::Fail {
193                    step_index,
194                    step_call,
195                    detail,
196                    last_input,
197                } => {
198                    let mut finding = Finding::new(
199                        FindingKind::SequenceFailure {
200                            sequence: sequence.name.clone(),
201                            step_index,
202                            step_call: step_call.clone(),
203                        },
204                        sequence.name.clone(),
205                        format!("sequence `{}` failed at step {step_index}", sequence.name),
206                        detail,
207                        ReproInfo {
208                            seed,
209                            tool_call: last_input,
210                            transport: self.transport_name.clone(),
211                            composition_trail: Vec::new(),
212                        },
213                    );
214                    if let Some(override_sev) = self.severity.resolve(finding.kind.keyword()) {
215                        finding = finding.with_severity(override_sev);
216                    }
217                    corpus.write_finding(&finding)?;
218                    reporter.on_finding(&finding);
219                    report.findings_count += 1;
220                }
221            }
222            reporter.on_iteration_end(&sequence.name, 0);
223        }
224
225        Ok(report)
226    }
227}
228
229/// Internal result of running a single sequence.
230enum SequenceOutcome {
231    Pass,
232    Fail {
233        step_index: usize,
234        step_call: String,
235        detail: String,
236        last_input: Value,
237    },
238}
239
240/// Executes one [`Sequence`]. Stops at the first failing step and
241/// returns the offending step's index plus a free-form detail string.
242///
243/// Phase V — when `corpus` is `Some`, the runner mutates each
244/// step's `with:` block from the per-tool corpus
245/// `mutate_ratio` fraction of the time and saves any input that
246/// triggered a finding *or* produced a previously-unseen response
247/// fingerprint.
248async fn run_one_sequence<C: McpExec + ?Sized>(
249    client: &mut C,
250    sequence: &Sequence,
251    rng: &mut ChaCha20Rng,
252    timeout: Duration,
253    corpus: Option<&FuzzCorpus>,
254    mutate_ratio: f64,
255    seen_fingerprints: &mut std::collections::BTreeSet<String>,
256) -> SequenceOutcome {
257    let mut context = SequenceContext::new();
258
259    for (step_index, step) in sequence.steps.iter().enumerate() {
260        // Resolve every `{{steps.<bind>.<path>}}` placeholder in the
261        // step's `with:` block against the running context. We do this
262        // before invoking so that on substitution failure we surface a
263        // structural error pointing at the right step.
264        let raw_input = step
265            .with
266            .clone()
267            .map(|map| Value::Object(map.into_iter().collect::<Map<_, _>>()))
268            .unwrap_or(Value::Object(Map::new()));
269        let substituted = match context.substitute(&raw_input) {
270            Ok(value) => value,
271            Err(err) => {
272                return SequenceOutcome::Fail {
273                    step_index,
274                    step_call: step.call.clone(),
275                    detail: format!(
276                        "could not substitute step references in `with:` of step \
277                         {step_index}: {err}"
278                    ),
279                    last_input: raw_input,
280                };
281            }
282        };
283
284        // Phase V — 90/10 mutate-vs-literal split. We mutate the
285        // *substituted* input so step-references already point at
286        // real bound values; mutation rolls dice on top, the
287        // bindings stay coherent. When the corpus has no entry
288        // for this step's tool, fall back to the literal input.
289        use rand::Rng;
290        let prior: Vec<FuzzCorpusEntry> = corpus
291            .map(|c| c.list(&step.call).unwrap_or_default())
292            .unwrap_or_default();
293        let input = if !prior.is_empty() && rng.gen_bool(mutate_ratio.clamp(0.0, 1.0)) {
294            // Pick a random corpus entry, mutate it. The seed
295            // input is whatever the past run found interesting;
296            // current substituted input is ignored on this path
297            // — by design, that's how we explore beyond the
298            // hand-written YAML.
299            let pick = &prior[rng.gen_range(0..prior.len())];
300            corpus_mutator::mutate(&pick.input, rng)
301        } else {
302            substituted.clone()
303        };
304
305        let response = invoke(client, &step.call, input.clone(), timeout, rng).await;
306
307        // Phase V — fingerprint the response BEFORE we destructure
308        // it for outcome / assertion checks below. Save the input
309        // when the fingerprint is novel.
310        let fingerprint = response_fingerprint(&response);
311        if let Some(corpus_ref) = corpus {
312            if seen_fingerprints.insert(fingerprint.clone()) {
313                let _ = corpus_ref.save(&FuzzCorpusEntry {
314                    tool: step.call.clone(),
315                    input: input.clone(),
316                    trigger: CorpusTrigger::NewFingerprint,
317                    fingerprint: fingerprint.clone(),
318                    timestamp: chrono::Utc::now(),
319                });
320            }
321        }
322
323        // Phase V — helper that saves the failing step's input
324        // under the strongest signal (`Finding`). Overrides any
325        // earlier `NewFingerprint` save for the same input via the
326        // corpus's input-key dedup.
327        let save_finding = |corpus_ref: &FuzzCorpus, fingerprint: &str, input: &Value| {
328            let _ = corpus_ref.save(&FuzzCorpusEntry {
329                tool: step.call.clone(),
330                input: input.clone(),
331                trigger: CorpusTrigger::Finding {
332                    kind: "sequence_failure".to_string(),
333                },
334                fingerprint: fingerprint.to_string(),
335                timestamp: chrono::Utc::now(),
336            });
337        };
338
339        // Outcome class check (Ok / Error). Only matters when `expect`
340        // is set: with the default the runner falls through to the
341        // assertion list.
342        let expected = step.expect.unwrap_or_default();
343        if let Some(detail) = check_step_outcome(&response, expected) {
344            if let Some(c) = corpus {
345                save_finding(c, &fingerprint, &input);
346            }
347            return SequenceOutcome::Fail {
348                step_index,
349                step_call: step.call.clone(),
350                detail: format!(
351                    "step {step_index} (`{}`) outcome mismatch: {detail}\n\
352                     input: {}\nresponse: {}",
353                    step.call,
354                    serde_json::to_string_pretty(&input).unwrap_or_default(),
355                    serde_json::to_string_pretty(&response).unwrap_or_default(),
356                ),
357                last_input: input,
358            };
359        }
360
361        // Per-step assertions reuse the existing
362        // `runner::evaluate_one` against an `{input, response}`
363        // context, exactly like single-tool invariants do.
364        if !step.assertions.is_empty() {
365            if let Err(err) =
366                runner::evaluate_step_assertions(&step.assertions, input.clone(), response.clone())
367            {
368                if let Some(c) = corpus {
369                    save_finding(c, &fingerprint, &input);
370                }
371                return SequenceOutcome::Fail {
372                    step_index,
373                    step_call: step.call.clone(),
374                    detail: format!(
375                        "step {step_index} (`{}`) assertion failed: {err}\n\
376                         input: {}\nresponse: {}",
377                        step.call,
378                        serde_json::to_string_pretty(&input).unwrap_or_default(),
379                        serde_json::to_string_pretty(&response).unwrap_or_default(),
380                    ),
381                    last_input: input,
382                };
383            }
384        }
385
386        // Bind the step's envelope so subsequent steps can reference
387        // it. Both input and response are exposed under
388        // `steps.<bind>.{input,response}`.
389        if let Some(bind) = step.bind.as_ref() {
390            context.bind(
391                bind.clone(),
392                json!({
393                    "input": input,
394                    "response": response,
395                }),
396            );
397        }
398    }
399
400    SequenceOutcome::Pass
401}
402
403/// Verifies the step's response matches the declared
404/// [`StepOutcome`]. Returns `None` on match, `Some(detail)` on
405/// mismatch.
406fn check_step_outcome(response: &Value, expected: StepOutcome) -> Option<String> {
407    let observed_error = response
408        .get("isError")
409        .and_then(Value::as_bool)
410        .unwrap_or(false);
411    match expected {
412        StepOutcome::Ok => {
413            if observed_error {
414                Some("expected ok, observed isError=true".into())
415            } else {
416                None
417            }
418        }
419        StepOutcome::Error => {
420            if observed_error {
421                None
422            } else {
423                Some("expected isError=true, observed ok response".into())
424            }
425        }
426    }
427}
428
429/// Per-call invoke wrapper. Mirrors the property runner's
430/// `invoke()` but **does not reconnect** on failure: sequences depend
431/// on per-connection state surviving across steps.
432async fn invoke<C: McpExec + ?Sized>(
433    client: &mut C,
434    tool: &str,
435    input: Value,
436    timeout: Duration,
437    _rng: &mut ChaCha20Rng,
438) -> Value {
439    match client.call_tool(tool, input, timeout).await {
440        CallOutcome::Ok(result) => serde_json::to_value(result).unwrap_or(Value::Null),
441        CallOutcome::Hang(duration) => json!({
442            "content": [{"type": "text", "text": format!("timeout after {duration:?}")}],
443            "isError": true,
444        }),
445        CallOutcome::Crash(reason) => json!({
446            "content": [{"type": "text", "text": reason}],
447            "isError": true,
448        }),
449        CallOutcome::ProtocolError(message) => json!({
450            "content": [{"type": "text", "text": message}],
451            "isError": true,
452        }),
453    }
454}
455
456/// Shared per-sequence context. Holds the `{input, response}` envelope
457/// of every bound step indexed by bind name, and resolves
458/// `{{steps.<bind>.<jsonpath>}}` placeholders inside an arbitrary JSON
459/// value tree.
460pub struct SequenceContext {
461    /// Map of bind name → `{input, response}` envelope.
462    bindings: std::collections::BTreeMap<String, Value>,
463}
464
465impl Default for SequenceContext {
466    fn default() -> Self {
467        Self::new()
468    }
469}
470
471impl SequenceContext {
472    pub fn new() -> Self {
473        Self {
474            bindings: Default::default(),
475        }
476    }
477
478    pub fn bind(&mut self, name: String, envelope: Value) {
479        self.bindings.insert(name, envelope);
480    }
481
482    /// Walks the JSON tree of `value` and replaces every
483    /// `{{steps.<bind>.<jsonpath>}}` placeholder in any string with
484    /// the resolved value. Strings that consist of *exactly* one
485    /// placeholder become the resolved value (preserving its JSON
486    /// type, e.g. number/object). Strings with surrounding text get
487    /// the resolved value stringified into the gap.
488    pub fn substitute(&self, value: &Value) -> Result<Value, String> {
489        match value {
490            Value::String(raw) => self.substitute_string(raw),
491            Value::Array(items) => items
492                .iter()
493                .map(|item| self.substitute(item))
494                .collect::<Result<Vec<_>, _>>()
495                .map(Value::Array),
496            Value::Object(map) => {
497                let mut out = Map::with_capacity(map.len());
498                for (k, v) in map {
499                    out.insert(k.clone(), self.substitute(v)?);
500                }
501                Ok(Value::Object(out))
502            }
503            other => Ok(other.clone()),
504        }
505    }
506
507    fn substitute_string(&self, raw: &str) -> Result<Value, String> {
508        // Special-case: when the entire string is a single placeholder,
509        // preserve the resolved value's native JSON type. This lets
510        // sequences pass numbers/objects/arrays to subsequent steps
511        // without coercion to JSON-encoded strings.
512        if let Some(inner) = single_placeholder(raw) {
513            return self.resolve_path(inner);
514        }
515
516        // Mixed-text path: replace each placeholder with the resolved
517        // value stringified, then return as a String. This handles
518        // patterns like `"Bearer {{steps.login.response.structuredContent.token}}"`.
519        let mut out = String::with_capacity(raw.len());
520        let mut rest = raw;
521        while let Some(idx) = rest.find("{{") {
522            out.push_str(&rest[..idx]);
523            let after_open = &rest[idx + 2..];
524            let close = after_open
525                .find("}}")
526                .ok_or_else(|| format!("unterminated `{{{{...` in `{raw}`"))?;
527            let inner = after_open[..close].trim();
528            let resolved = self.resolve_path(inner)?;
529            match resolved {
530                Value::String(s) => out.push_str(&s),
531                other => out.push_str(&other.to_string()),
532            }
533            rest = &after_open[close + 2..];
534        }
535        out.push_str(rest);
536        Ok(Value::String(out))
537    }
538
539    /// Resolves a path of the form `steps.<bind>.<jsonpath>` against
540    /// the current bindings.
541    fn resolve_path(&self, path: &str) -> Result<Value, String> {
542        // Accept both `steps.NAME.X.Y` and `steps.NAME` forms; the
543        // latter returns the entire `{input, response}` envelope.
544        let inner = path
545            .strip_prefix("steps.")
546            .ok_or_else(|| format!("placeholder must start with `steps.`: `{path}`"))?;
547        let (bind, rest) = inner.split_once('.').unwrap_or((inner, ""));
548        let envelope = self
549            .bindings
550            .get(bind)
551            .ok_or_else(|| format!("no step bound under `{bind}` (yet?)"))?;
552        if rest.is_empty() {
553            return Ok(envelope.clone());
554        }
555        let jsonpath = format!("$.{rest}");
556        jsonpath::resolve_one(envelope, &jsonpath)
557            .map_err(|err| format!("resolving `{path}`: {err}"))
558    }
559}
560
561/// Outcome of evaluating one [`SequenceFixture`] against its parent
562/// [`Sequence`]. Mirrors [`runner::FixtureOutcome`] but speaks in
563/// step-aware terms so the `pack test` reporter can show which step
564/// of which sequence broke.
565#[derive(Debug, Clone, PartialEq, Eq)]
566pub enum SequenceFixtureOutcome {
567    /// Observed sequence outcome matches `fixture.expect`.
568    Match,
569    /// Observed sequence outcome differs from `fixture.expect`.
570    Mismatch {
571        /// What the fixture promised (`pass`/`fail`).
572        expected: FixtureExpect,
573        /// What the runner actually observed.
574        observed: FixtureExpect,
575        /// Free-form detail (assertion message + step index when
576        /// applicable).
577        detail: String,
578    },
579    /// Structural error — typically a step-count mismatch between the
580    /// sequence's `steps` and the fixture's `responses`.
581    Structural {
582        /// Free-form description of the structural problem.
583        error: String,
584    },
585}
586
587/// Evaluates one [`SequenceFixture`] against its [`Sequence`] without
588/// hitting an MCP server: each step's `with:` map is substituted
589/// against the running [`SequenceContext`] just like the live runner
590/// does, but the response comes from `fixture.responses[i]` instead
591/// of a real call.
592pub fn evaluate_sequence_fixture(
593    sequence: &Sequence,
594    fixture: &SequenceFixture,
595) -> SequenceFixtureOutcome {
596    if fixture.responses.len() != sequence.steps.len() {
597        return SequenceFixtureOutcome::Structural {
598            error: format!(
599                "fixture provides {} responses but sequence has {} steps",
600                fixture.responses.len(),
601                sequence.steps.len()
602            ),
603        };
604    }
605
606    let mut context = SequenceContext::new();
607    let mut sequence_failed_at: Option<(usize, String)> = None;
608
609    for (step_index, step) in sequence.steps.iter().enumerate() {
610        let raw_input = step
611            .with
612            .clone()
613            .map(|map| Value::Object(map.into_iter().collect::<Map<_, _>>()))
614            .unwrap_or(Value::Object(Map::new()));
615        let input = match context.substitute(&raw_input) {
616            Ok(value) => value,
617            Err(err) => {
618                return SequenceFixtureOutcome::Structural {
619                    error: format!(
620                        "could not substitute step references in step {step_index}: {err}"
621                    ),
622                };
623            }
624        };
625        let response = fixture.responses[step_index].clone();
626
627        let expected = step.expect.unwrap_or_default();
628        if let Some(detail) = check_step_outcome(&response, expected) {
629            sequence_failed_at = Some((step_index, format!("outcome mismatch: {detail}")));
630            break;
631        }
632
633        if !step.assertions.is_empty() {
634            if let Err(err) =
635                runner::evaluate_step_assertions(&step.assertions, input.clone(), response.clone())
636            {
637                sequence_failed_at = Some((step_index, format!("assertion failed: {err}")));
638                break;
639            }
640        }
641
642        if let Some(bind) = step.bind.as_ref() {
643            context.bind(
644                bind.clone(),
645                json!({
646                    "input": input,
647                    "response": response,
648                }),
649            );
650        }
651    }
652
653    let observed = if sequence_failed_at.is_some() {
654        FixtureExpect::Fail
655    } else {
656        FixtureExpect::Pass
657    };
658
659    if observed == fixture.expect {
660        SequenceFixtureOutcome::Match
661    } else {
662        let detail = sequence_failed_at
663            .map(|(idx, msg)| format!("step {idx}: {msg}"))
664            .unwrap_or_else(|| "all steps passed".to_string());
665        SequenceFixtureOutcome::Mismatch {
666            expected: fixture.expect,
667            observed,
668            detail,
669        }
670    }
671}
672
673/// Returns `Some(inner)` when `raw` is exactly `{{ ... }}` with no
674/// surrounding text. The trimmed inner expression is returned.
675fn single_placeholder(raw: &str) -> Option<&str> {
676    let trimmed = raw.trim();
677    let inner = trimmed.strip_prefix("{{")?.strip_suffix("}}")?;
678    // Reject placeholders that *contain* another `{{...}}` — those are
679    // mixed-text expressions that need the slow path.
680    if inner.contains("{{") || inner.contains("}}") {
681        return None;
682    }
683    Some(inner.trim())
684}
685
686#[cfg(test)]
687#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
688mod tests {
689    use super::*;
690    use serde_json::json;
691
692    #[test]
693    fn single_placeholder_preserves_type() {
694        let mut ctx = SequenceContext::new();
695        ctx.bind(
696            "login".into(),
697            json!({"input": {}, "response": {"structuredContent": {"id": 42}}}),
698        );
699        let out = ctx
700            .substitute(&json!("{{steps.login.response.structuredContent.id}}"))
701            .unwrap();
702        assert_eq!(out, json!(42));
703    }
704
705    #[test]
706    fn mixed_text_substitutes_inline() {
707        let mut ctx = SequenceContext::new();
708        ctx.bind(
709            "login".into(),
710            json!({"input": {}, "response": {"structuredContent": {"token": "abc"}}}),
711        );
712        let out = ctx
713            .substitute(&json!(
714                "Bearer {{steps.login.response.structuredContent.token}}"
715            ))
716            .unwrap();
717        assert_eq!(out, json!("Bearer abc"));
718    }
719
720    #[test]
721    fn unknown_step_surfaces_error() {
722        let ctx = SequenceContext::new();
723        let err = ctx.substitute(&json!("{{steps.missing.x}}")).unwrap_err();
724        assert!(err.contains("missing"), "{err}");
725    }
726
727    #[test]
728    fn unterminated_placeholder_errors() {
729        let mut ctx = SequenceContext::new();
730        ctx.bind("a".into(), json!({}));
731        let err = ctx.substitute(&json!("hello {{steps.a")).unwrap_err();
732        assert!(err.contains("unterminated"));
733    }
734
735    #[test]
736    fn step_outcome_ok_default_passes_when_no_is_error() {
737        let r = json!({"content": [{"type": "text", "text": "ok"}]});
738        assert!(check_step_outcome(&r, StepOutcome::Ok).is_none());
739    }
740
741    #[test]
742    fn step_outcome_error_passes_when_is_error_true() {
743        let r = json!({"isError": true, "content": []});
744        assert!(check_step_outcome(&r, StepOutcome::Error).is_none());
745    }
746
747    #[test]
748    fn step_outcome_mismatch_returns_detail() {
749        let r = json!({"isError": true, "content": []});
750        assert!(check_step_outcome(&r, StepOutcome::Ok).is_some());
751    }
752}