Skip to main content

mockforge_bench/conformance/
self_test.rs

1//! Positive + per-category negative request driver against a live server.
2//!
3//! Issue #79 round 13 (4) — Srikanth's (e) ask: a way to test both
4//! positive and negative compliance scenarios separately, where the
5//! positive cases should pass and the negative cases should be
6//! rejected.
7//!
8//! This module sits *alongside* the existing conformance executor
9//! (which drives k6 / native checks on a single positive call per
10//! operation). The self-test driver synthesises per-category
11//! deliberately-bad requests and asserts that the server actually
12//! rejects them with a 4xx — useful when verifying that
13//! `validate_request_with_all` is wired correctly for the user's spec
14//! (the exact gap that round-13 (3) fixed).
15//!
16//! Scope of the initial MVP: covers the highest-signal negatives —
17//! empty body when one is required, missing required query/header
18//! params, and wrong-type path params. Doesn't try to mutate every
19//! field of a JSON-Schema-validated body; that's a follow-up.
20
21use super::spec_driven::{AnnotatedOperation, ApiKeyLocation, SecuritySchemeInfo};
22use reqwest::{Client, Method};
23use std::collections::BTreeMap;
24use std::net::IpAddr;
25use std::sync::atomic::{AtomicUsize, Ordering};
26use std::sync::{Arc, Mutex};
27use std::time::Duration;
28
29/// Round 23 (c-iii) — per-direction body cap when capturing
30/// request/response payloads to `conformance-self-test-requests.jsonl`.
31/// 16 KiB keeps a 1000-case run under ~32 MB even if every payload
32/// fills the cap, while still preserving enough of a typical JSON body
33/// (or a stack-trace error response) to debug from.
34const CAPTURE_BODY_CAP_BYTES: usize = 16 * 1024;
35
36/// Round 17.2 — cap on schema-driven negatives per operation. A spec
37/// with 100 properties per body could produce hundreds of mutations
38/// for a single operation; combined with thousands of operations
39/// that's a runaway test matrix. 12 covers the highest-signal
40/// mutations (type mismatch + required-removed + a few constraint
41/// breaks) without exploding wall time on large specs.
42const SCHEMA_MUTATION_CAP: usize = 12;
43
44/// Round 25 (k) — content-type swap probes. For operations declaring a
45/// JSON request body, each entry below produces one probe that lies
46/// about Content-Type while keeping the JSON payload. A spec-compliant
47/// server should respond 415 (or 400). Order matches the order
48/// Srikanth listed in his round-23 reply: XML, YAML, multipart, and
49/// the URL-encoded variant he added in round 24.
50const CONTENT_TYPE_SWAP_VARIANTS: &[(&str, &str)] = &[
51    ("application/xml", "request-body:content-type-mismatch:xml"),
52    ("application/yaml", "request-body:content-type-mismatch:yaml"),
53    ("multipart/form-data", "request-body:content-type-mismatch:multipart"),
54    (
55        "application/x-www-form-urlencoded",
56        "request-body:content-type-mismatch:urlencoded",
57    ),
58];
59
60/// Round 27 (k variant b) — embedded content payloads. Content-Type
61/// stays `application/json` and the envelope IS valid JSON; we just
62/// stuff a non-JSON snippet into a string field's value. The test
63/// surfaces servers that try to parse string field contents (e.g.
64/// XML-EE expanders, YAML loaders, urlencoded parsers) and crash on
65/// the payload — a 5xx here is the finding. Label, payload pairs:
66const EMBEDDED_CONTENT_VARIANTS: &[(&str, &str)] = &[
67    ("request-body:embedded-content:xml", "<root><cmd>execute()</cmd></root>"),
68    ("request-body:embedded-content:yaml", "key: value\n- item1\n- item2"),
69    (
70        "request-body:embedded-content:multipart",
71        "--boundary\r\nContent-Disposition: form-data; name=\"x\"\r\n\r\nval\r\n--boundary--",
72    ),
73    ("request-body:embedded-content:urlencoded", "a=1&b=2&c=hello%20world"),
74];
75
76/// Configuration for a self-test run.
77#[derive(Debug, Clone)]
78pub struct SelfTestConfig {
79    pub target_url: String,
80    pub skip_tls_verify: bool,
81    pub timeout: Duration,
82    /// Optional extra headers to attach to every request (e.g. auth).
83    pub extra_headers: Vec<(String, String)>,
84    /// Delay between requests to avoid hammering the server.
85    pub delay_between_requests: Duration,
86    /// Round 18.1 — base path to prepend to every spec path. When the
87    /// spec declares `/users` and the deployed API is served under
88    /// `/api`, `--base-path /api` should make the self-test hit
89    /// `https://target/api/users` instead of `https://target/users`.
90    /// Pre-fix this was ignored entirely and every operation 404'd
91    /// (Srikanth's vCenter run on 0.3.152: 1275 positives, 1275 4xx).
92    pub base_path: Option<String>,
93    /// Round 18.5 — local source IPs to bind outgoing requests to.
94    /// Each IP must already be assigned to an interface on the host.
95    /// Operations round-robin through the resulting client pool.
96    pub source_ips: Vec<IpAddr>,
97    /// Round 18.5 — fake source IPs to advertise via forwarded-IP
98    /// headers (used to exercise GEODB lookup at the destination).
99    /// Rotated per operation.
100    pub geo_source_ips: Vec<IpAddr>,
101    /// Which forwarded-IP header(s) to populate when `geo_source_ips`
102    /// is non-empty. Empty → no-op; default below sets the standard
103    /// three-header set.
104    pub geo_source_headers: Vec<String>,
105    /// Round 23 (c-iii) — when `Some`, every probe captures method, URL,
106    /// request headers/body and response status/headers/body into this
107    /// sink. Caller drains it after `run_self_test` and writes
108    /// `conformance-self-test-requests.jsonl`. None → no capture (zero
109    /// extra allocations on the hot path).
110    pub capture: Option<Arc<Mutex<Vec<CaseCapture>>>>,
111    /// Round 25 — when true, validate every probe's response body
112    /// against the spec's response schema for the actual status
113    /// returned (closes round 21.3 / Srikanth's a2 / a3 ask). The
114    /// validation result lands in `CaseCapture::response_schema_error`
115    /// (None → matched, or no schema for that status). Default false:
116    /// JSON-Schema validation of large response bodies adds wall-clock
117    /// time and the user has to opt in.
118    pub validate_response_schemas: bool,
119    /// Round 33 (#823) — human-readable label for the OpenAPI spec
120    /// this run is exercising. Stamped on every `CaseCapture` so the
121    /// per-endpoint summary can attribute rows back to a spec in
122    /// multi-spec / multi-target benches. `None` when the bench didn't
123    /// track a spec path.
124    pub spec_label: Option<String>,
125    /// Round 47 (#79) — Srikanth on 0.3.191: "I did not see network
126    /// logs in the mockforge bench and conformance traffic if used
127    /// the [self-test] command". The r46 wire-level event sink only
128    /// existed on the native conformance executor; this matches it on
129    /// the self-test side. When `Some`, every `reqwest::Error` from
130    /// `send().await` is classified and pushed to this sink; caller
131    /// drains it into `conformance-network-events.json` next to the
132    /// JSONL capture. None → no extra allocations on the hot path.
133    pub network_events: Option<Arc<Mutex<Vec<NetworkEvent>>>>,
134    /// Round 49 (#79) — current iteration number (1-indexed). The
135    /// runner stamps it on every CaseCapture so the JSONL line and
136    /// violation rows carry the iteration counter. Defaults to 1
137    /// for non-looping runs.
138    pub current_iteration: u32,
139}
140
141/// Round 47 (#79) — wire-level network event captured by the self-test
142/// driver. Same shape as the native executor's `NetworkEvent` so
143/// downstream tooling can consume one file across executor variants.
144#[derive(Debug, Clone, serde::Serialize)]
145pub struct NetworkEvent {
146    pub timestamp: chrono::DateTime<chrono::Utc>,
147    pub check: String,
148    pub method: String,
149    pub url: String,
150    pub kind: String,
151    pub message: String,
152}
153
154/// Round 23 (c-iii) — one captured request/response pair, one per
155/// probe (positive or negative). Serialised as a JSON line in
156/// `conformance-self-test-requests.jsonl`. Headers are kept as
157/// `BTreeMap` for stable ordering. Bodies are truncated to
158/// `CAPTURE_BODY_CAP_BYTES`; `*_truncated` flags whether more was
159/// dropped.
160#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
161pub struct CaseCapture {
162    pub label: String,
163    pub method: String,
164    pub url: String,
165    pub request_headers: BTreeMap<String, String>,
166    pub request_body: Option<String>,
167    pub request_body_truncated: bool,
168    pub response_status: u16,
169    pub response_headers: BTreeMap<String, String>,
170    pub response_body: Option<String>,
171    pub response_body_truncated: bool,
172    pub error: Option<String>,
173    /// Round 25 — when `validate_response_schemas` is on and the spec
174    /// declares a schema for `response_status`, this carries the
175    /// validation message (or None when the body matched, or no schema
176    /// was declared for that status). Serialised verbatim in the JSONL
177    /// and rendered in the HTML viewer.
178    #[serde(default, skip_serializing_if = "Option::is_none")]
179    pub response_schema_error: Option<String>,
180    /// Round 28 — Srikanth's "Is it possible to put expected response
181    /// code status in both jsonl and jsonl report" ask. Human-readable
182    /// expected status range: `"2xx-3xx"` for positive probes,
183    /// `"4xx"` for negatives. Lets users `jq` for misses
184    /// (`.response_status as $s | .expected_status_range == "4xx"
185    /// and ($s < 400 or $s >= 500)`) and powers the HTML viewer's
186    /// "show mismatches only" filter.
187    #[serde(default)]
188    pub expected_status_range: String,
189    /// Round 33 (#823) — the spec's path template (e.g.
190    /// `/users/{id}`) before path-param substitution. Lets the
191    /// per-endpoint summary collapse `/users/X` and `/users/Y` into
192    /// one row. Empty string when the call site predates this field
193    /// (older `CaseCapture` payloads on disk also deserialise OK).
194    #[serde(default)]
195    pub path_template: String,
196    /// Round 33 (#823) — basename (or fallback to full path) of the
197    /// OpenAPI spec file this probe came from. Lets multi-spec runs
198    /// attribute rows back to the spec they came from. `None` when
199    /// the bench didn't track a spec path.
200    #[serde(default, skip_serializing_if = "Option::is_none")]
201    pub spec_label: Option<String>,
202    /// Round 36 (#876) — mockforge version that ran the probe.
203    /// Stamped from `CARGO_PKG_VERSION` at compile time. Also sent
204    /// as the `X-Mockforge-Client-Version` request header so a
205    /// matching `ServerConformanceViolation.client_mockforge_version`
206    /// can be cross-correlated. Empty string when the capture
207    /// pre-dates this field.
208    #[serde(default)]
209    pub mockforge_version: String,
210    /// Round 36 (#876) — wall-clock moment the bench driver sent the
211    /// request, as RFC3339 / ISO-8601. Also sent as the
212    /// `X-Mockforge-Client-Sent-At` request header so the server-side
213    /// `ServerConformanceViolation.client_sent_at` carries the same
214    /// value. Empty string when the capture pre-dates this field.
215    #[serde(default)]
216    pub client_sent_at: String,
217    /// Round 49 (#79) — Srikanth on 0.3.193: "Is it possible to
218    /// differentiate in the logs what is the iteration count that
219    /// way I will know how many requests are sent with that
220    /// violation." Stamped from the
221    /// `SelfTestConfig::current_iteration` field by the outer loop
222    /// in command.rs before each call to `run_self_test_with_deadline`.
223    /// 1-indexed; defaults to 1 for single-iteration runs so an older
224    /// JSONL that didn't carry the field deserialises as iteration 1.
225    #[serde(default = "default_iteration")]
226    pub iteration: u32,
227}
228
229fn default_iteration() -> u32 {
230    1
231}
232
233impl Default for SelfTestConfig {
234    fn default() -> Self {
235        Self {
236            target_url: "http://localhost:3000".into(),
237            skip_tls_verify: false,
238            timeout: Duration::from_secs(15),
239            extra_headers: Vec::new(),
240            delay_between_requests: Duration::from_millis(0),
241            base_path: None,
242            source_ips: Vec::new(),
243            geo_source_ips: Vec::new(),
244            geo_source_headers: default_geo_source_headers(),
245            capture: None,
246            validate_response_schemas: false,
247            spec_label: None,
248            network_events: None,
249            current_iteration: 1,
250        }
251    }
252}
253
254/// Truncate `body` to `CAPTURE_BODY_CAP_BYTES` on a UTF-8 boundary,
255/// returning the trimmed string and whether truncation occurred. Used
256/// for both request and response bodies in the capture sink.
257fn truncate_body_for_capture(body: &str) -> (String, bool) {
258    if body.len() <= CAPTURE_BODY_CAP_BYTES {
259        return (body.to_string(), false);
260    }
261    let mut end = CAPTURE_BODY_CAP_BYTES;
262    while end > 0 && !body.is_char_boundary(end) {
263        end -= 1;
264    }
265    (body[..end].to_string(), true)
266}
267
268/// Default forwarded-IP header set. Covers the three conventions a
269/// real GEODB front-end is likely to read in this order of
270/// preference: Cloudflare (`CF-Connecting-IP`), Akamai/CloudFront
271/// (`True-Client-IP`), then the de-facto standard
272/// `X-Forwarded-For`. Override via `--geo-source-header` to test a
273/// specific stack.
274pub fn default_geo_source_headers() -> Vec<String> {
275    vec![
276        "X-Forwarded-For".to_string(),
277        "True-Client-IP".to_string(),
278        "CF-Connecting-IP".to_string(),
279    ]
280}
281
282/// Outcome of a single test case (positive or negative).
283#[derive(Debug, Clone, serde::Serialize)]
284pub struct CaseOutcome {
285    pub label: String,
286    pub expected_4xx: bool,
287    pub actual_status: u16,
288    /// True when the response status matches expectation
289    /// (positive → 2xx-3xx, negative → 4xx).
290    pub passed: bool,
291}
292
293/// All cases run against one annotated operation.
294#[derive(Debug, Clone, serde::Serialize)]
295pub struct OperationResult {
296    pub method: String,
297    pub path: String,
298    pub positive: Option<CaseOutcome>,
299    pub negatives: Vec<CaseOutcome>,
300}
301
302/// Summary report rolled up across all operations.
303#[derive(Debug, Default, Clone, serde::Serialize)]
304pub struct SelfTestReport {
305    pub positive_pass: usize,
306    pub positive_fail: usize,
307    /// Per category: count of negative cases the server correctly
308    /// rejected with a 4xx (we caught the spec violation).
309    pub negative_caught: BTreeMap<String, usize>,
310    /// Per category: count of negative cases that should have been
311    /// rejected but came back with a non-4xx (validator gap).
312    pub negative_missed: BTreeMap<String, usize>,
313    pub operations: Vec<OperationResult>,
314}
315
316impl SelfTestReport {
317    /// All-pass means every positive case got 2xx-3xx and every
318    /// negative case got 4xx.
319    pub fn all_passed(&self) -> bool {
320        self.positive_fail == 0 && self.negative_missed.values().sum::<usize>() == 0
321    }
322
323    /// Round 18.1 — detect the "self-test target is misconfigured"
324    /// case where every positive failed with the *same* status code.
325    /// The classic example: `--base-path /api` was forgotten so every
326    /// request hits a path the server doesn't know and returns 404.
327    /// Pre-warning, the user saw all-green negative buckets (because
328    /// "missing route" 404s look like "validator rejected") and no
329    /// indication that the run was meaningless. Returns Some(status)
330    /// when ≥10 positives all failed with the same status, else None.
331    pub fn detect_target_misconfiguration(&self) -> Option<u16> {
332        if self.positive_pass > 0 || self.positive_fail < 10 {
333            return None;
334        }
335        let mut seen: Option<u16> = None;
336        for op in &self.operations {
337            let Some(p) = &op.positive else {
338                continue;
339            };
340            if p.passed {
341                return None;
342            }
343            match seen {
344                None => seen = Some(p.actual_status),
345                Some(s) if s != p.actual_status => return None,
346                _ => {}
347            }
348        }
349        seen
350    }
351
352    /// Round 47 (#79) — fold a second iteration of the self-test into
353    /// this report so multi-iteration runs aggregate counters across
354    /// passes. Per-category caught / missed counters sum; positive
355    /// counters sum; the `operations` vec records every probe outcome
356    /// so the iteration-N misconfiguration detector still works. Used
357    /// by command.rs's `--conformance-self-test-iterations` /
358    /// `--conformance-self-test-duration` loop.
359    pub fn merge_iteration(&mut self, other: SelfTestReport) {
360        self.positive_pass = self.positive_pass.saturating_add(other.positive_pass);
361        self.positive_fail = self.positive_fail.saturating_add(other.positive_fail);
362        for (k, v) in other.negative_caught {
363            let slot = self.negative_caught.entry(k).or_insert(0);
364            *slot = slot.saturating_add(v);
365        }
366        for (k, v) in other.negative_missed {
367            let slot = self.negative_missed.entry(k).or_insert(0);
368            *slot = slot.saturating_add(v);
369        }
370        self.operations.extend(other.operations);
371    }
372
373    /// Human-readable summary string. One line for positives, one per
374    /// category for negatives. Designed to slot into existing
375    /// `TerminalReporter` output.
376    pub fn render_summary(&self) -> String {
377        let mut out = String::new();
378        out.push_str(&format!(
379            "Positives: {} pass / {} fail\n",
380            self.positive_pass, self.positive_fail
381        ));
382        let mut keys: Vec<&String> =
383            self.negative_caught.keys().chain(self.negative_missed.keys()).collect();
384        keys.sort();
385        keys.dedup();
386        for cat in keys {
387            let caught = self.negative_caught.get(cat).copied().unwrap_or(0);
388            let missed = self.negative_missed.get(cat).copied().unwrap_or(0);
389            let mark = if missed == 0 { "✓" } else { "⚠" };
390            out.push_str(&format!(
391                "Negatives [{}]: {} caught / {} missed  {}\n",
392                cat, caught, missed, mark
393            ));
394        }
395        out
396    }
397}
398
399/// Execute the self-test plan against `config.target_url` for every
400/// `AnnotatedOperation`. Returns the aggregated report; callers
401/// decide how to display it (e.g. via `render_summary` or by writing
402/// the JSON serialisation to disk).
403pub async fn run_self_test(
404    operations: &[AnnotatedOperation],
405    config: &SelfTestConfig,
406) -> Result<SelfTestReport, reqwest::Error> {
407    run_self_test_with_deadline(operations, config, None).await
408}
409
410/// Round 49 (#79) — Srikanth on 0.3.193: `--conformance-self-test-
411/// duration 5m` ran 5:46 because the outer iteration loop in
412/// command.rs only checks the deadline AFTER a full matrix pass
413/// completes. For long iterations this can overshoot by minutes,
414/// which breaks automation that relies on a fixed wall-clock budget.
415/// New optional `deadline` parameter lets the runner break out
416/// mid-iteration once the deadline elapses; returns the partial
417/// report with whatever operations finished before the deadline.
418pub async fn run_self_test_with_deadline(
419    operations: &[AnnotatedOperation],
420    config: &SelfTestConfig,
421    deadline: Option<std::time::Instant>,
422) -> Result<SelfTestReport, reqwest::Error> {
423    // Round 18.5 — build a client pool when `source_ips` is set,
424    // one reqwest::Client per IP, each bound to its local address.
425    // Operations round-robin through the pool. Empty pool → single
426    // default client (the pre-18.5 behaviour).
427    let clients = build_client_pool(config)?;
428    let client_cursor = AtomicUsize::new(0);
429    let geo_cursor = AtomicUsize::new(0);
430
431    let mut report = SelfTestReport::default();
432    for op in operations {
433        // Round 49 — mid-iteration deadline check. Breaks out of the
434        // per-operation loop the moment the wall-clock budget
435        // elapses, so a 5m budget never overshoots by more than one
436        // probe's round-trip.
437        if let Some(d) = deadline {
438            if std::time::Instant::now() >= d {
439                break;
440            }
441        }
442        let client_idx = client_cursor.fetch_add(1, Ordering::Relaxed) % clients.len();
443        let client = &clients[client_idx];
444        let geo_ip = if config.geo_source_ips.is_empty() {
445            None
446        } else {
447            let idx = geo_cursor.fetch_add(1, Ordering::Relaxed) % config.geo_source_ips.len();
448            Some(config.geo_source_ips[idx])
449        };
450        let result = test_operation(client, config, op, geo_ip).await;
451        if let Some(p) = &result.positive {
452            if p.passed {
453                report.positive_pass += 1;
454            } else {
455                report.positive_fail += 1;
456            }
457        }
458        for neg in &result.negatives {
459            let cat = neg.label.split(':').next().unwrap_or("other").to_string();
460            if neg.passed {
461                *report.negative_caught.entry(cat).or_insert(0) += 1;
462            } else {
463                *report.negative_missed.entry(cat).or_insert(0) += 1;
464            }
465        }
466        report.operations.push(result);
467        if !config.delay_between_requests.is_zero() {
468            tokio::time::sleep(config.delay_between_requests).await;
469        }
470    }
471    Ok(report)
472}
473
474/// Round 18.5 — append GEODB forwarded-IP headers to the
475/// operation's declared headers. Returns the original vec untouched
476/// when `geo_ip` is None or `geo_headers` is empty.
477///
478/// If the operation already declares one of the geo headers (rare
479/// but legal), we keep the operation's value — the caller's spec
480/// wins.
481fn effective_op_headers(
482    base: &[(String, String)],
483    geo_ip: Option<IpAddr>,
484    geo_headers: &[String],
485) -> Vec<(String, String)> {
486    let mut out = base.to_vec();
487    let Some(ip) = geo_ip else {
488        return out;
489    };
490    let value = ip.to_string();
491    for h in geo_headers {
492        // Case-insensitive duplicate check: don't override the
493        // spec's own declared value for the header.
494        if out.iter().any(|(k, _)| k.eq_ignore_ascii_case(h)) {
495            continue;
496        }
497        out.push((h.clone(), value.clone()));
498    }
499    out
500}
501
502/// Round 18.5 — build a pool of reqwest clients, one per declared
503/// source IP. Empty `source_ips` → a single default client.
504///
505/// The OS must already have each `source_ip` assigned to an
506/// interface; reqwest's `.local_address()` issues a `bind()` syscall
507/// at connect time, so an IP the kernel doesn't recognise surfaces
508/// as `EADDRNOTAVAIL` at request time, not at builder time.
509fn build_client_pool(config: &SelfTestConfig) -> Result<Vec<Client>, reqwest::Error> {
510    let make = |bind: Option<IpAddr>| -> Result<Client, reqwest::Error> {
511        let mut builder = Client::builder().timeout(config.timeout);
512        if config.skip_tls_verify {
513            builder = builder.danger_accept_invalid_certs(true);
514        }
515        if let Some(addr) = bind {
516            builder = builder.local_address(addr);
517        }
518        builder.build()
519    };
520    if config.source_ips.is_empty() {
521        Ok(vec![make(None)?])
522    } else {
523        config.source_ips.iter().map(|ip| make(Some(*ip))).collect()
524    }
525}
526
527async fn test_operation(
528    client: &Client,
529    config: &SelfTestConfig,
530    op: &AnnotatedOperation,
531    geo_ip: Option<IpAddr>,
532) -> OperationResult {
533    // Round 25 — track the sink length BEFORE we run any probes for
534    // this operation, so that after the probes finish we can mutate
535    // exactly the entries that belong to this op (the capture sink is
536    // shared but `run_self_test` iterates operations sequentially).
537    // Used by the response-schema validation pass below.
538    let sink_start = config.capture.as_ref().and_then(|s| s.lock().ok().map(|g| g.len()));
539
540    let url = build_url_with_base(
541        &config.target_url,
542        config.base_path.as_deref(),
543        &op.path,
544        &op.path_params,
545    );
546    let method = Method::from_bytes(op.method.to_uppercase().as_bytes()).unwrap_or(Method::GET);
547
548    // Round 34 (#828) — stamp every `CaseCapture` with the spec
549    // template PREFIXED by `--base-path`, so the per-endpoint
550    // summary's `path` column matches what the user sees in URLs
551    // and logs. Srikanth searched for `/api/appliance/access/...`
552    // and didn't find it because round 33 stored just `/appliance/
553    // access/...`. Same normalization as `build_url_with_base`:
554    // leading `/` auto-added, trailing `/` stripped, empty
555    // base_path → no prefix at all.
556    let path_template = {
557        let prefix = match config.base_path.as_deref() {
558            Some(bp) if !bp.is_empty() => {
559                let trimmed = bp.trim_end_matches('/');
560                if trimmed.starts_with('/') {
561                    trimmed.to_string()
562                } else {
563                    format!("/{}", trimmed)
564                }
565            }
566            _ => String::new(),
567        };
568        let path = if op.path.starts_with('/') {
569            op.path.clone()
570        } else {
571            format!("/{}", op.path)
572        };
573        format!("{prefix}{path}")
574    };
575
576    // Round 18.5 — pre-compute the operation's effective headers
577    // with the geo source IP baked in. Doing it once here keeps the
578    // per-case `send_case` calls below unchanged. When `geo_ip` is
579    // None the result equals `op.header_params`.
580    let op_headers = effective_op_headers(&op.header_params, geo_ip, &config.geo_source_headers);
581
582    // ── Positive case ────────────────────────────────────────────
583    let positive = send_case(
584        client,
585        config,
586        method.clone(),
587        &url,
588        "positive",
589        ExpectedOutcome::Success,
590        op.sample_body.as_deref(),
591        op.query_params.clone(),
592        op_headers.clone(),
593        &path_template,
594    )
595    .await;
596
597    // ── Negative cases ───────────────────────────────────────────
598    let mut negatives = Vec::new();
599
600    // (a) empty body when one is required.
601    //
602    // Round 16 — drop the `sample_body.is_some()` precondition. Operations
603    // whose body annotator couldn't synthesize a sample previously got
604    // zero negatives (so the self-test reported "all passing" even on
605    // POST /resource with a required body). The spec saying the operation
606    // *has* a request body is enough — an empty object is a valid
607    // negative regardless of whether we have a positive sample.
608    if op.request_body_content_type.is_some() {
609        negatives.push(
610            send_case(
611                client,
612                config,
613                method.clone(),
614                &url,
615                "request-body:empty",
616                ExpectedOutcome::ClientError,
617                Some("{}"),
618                op.query_params.clone(),
619                op_headers.clone(),
620                &path_template,
621            )
622            .await,
623        );
624
625        // (b) wrong-shaped body (array instead of object) — exercises
626        // top-level type validation independently of which fields are
627        // required.
628        negatives.push(
629            send_case(
630                client,
631                config,
632                method.clone(),
633                &url,
634                "request-body:wrong-type",
635                ExpectedOutcome::ClientError,
636                Some("[]"),
637                op.query_params.clone(),
638                op_headers.clone(),
639                &path_template,
640            )
641            .await,
642        );
643
644        // Round 25 (k) — content-type swap probes.
645        //
646        // For operations declaring `application/json` request bodies, send
647        // the SAME json payload (or a synthesised one) under four other
648        // content types: `application/xml`, `application/yaml`,
649        // `multipart/form-data`, `application/x-www-form-urlencoded`.
650        // The spec says the endpoint accepts only JSON, so a strict server
651        // should respond 415 Unsupported Media Type (or 400 if it tries
652        // to parse and fails). A 2xx means the server is accepting
653        // payloads outside its declared content negotiation, which is the
654        // failure mode behind a lot of "we crashed on a malformed XML
655        // upload" incidents.
656        //
657        // Variant (a) of Srikanth's round-23 g ask: lie about the
658        // Content-Type header. The body shape is honest JSON; only the
659        // header is swapped. Variant (b) (JSON envelope with embedded
660        // non-JSON field values) is deferred to round 26 because it
661        // requires a schema-aware field walker.
662        if op
663            .request_body_content_type
664            .as_deref()
665            .map(|ct| ct.contains("json"))
666            .unwrap_or(false)
667        {
668            let payload = op.sample_body.as_deref().unwrap_or("{}");
669            for (ct, label) in CONTENT_TYPE_SWAP_VARIANTS {
670                negatives.push(
671                    send_case_with_extra(
672                        client,
673                        config,
674                        method.clone(),
675                        &url,
676                        label,
677                        ExpectedOutcome::ClientError,
678                        Some(payload),
679                        op.query_params.clone(),
680                        // Strip any Content-Type already on the operation
681                        // headers (the spec's positive value) so the
682                        // probe's value is the only one the server sees.
683                        op_headers
684                            .iter()
685                            .filter(|(k, _)| !k.eq_ignore_ascii_case("content-type"))
686                            .cloned()
687                            .collect(),
688                        // The wrong Content-Type rides on `extra_headers`
689                        // so it lands AFTER `send_case_with_extra`'s
690                        // unconditional `application/json` insertion in
691                        // request-body mode. Actually `send_case_with_extra`
692                        // only sets Content-Type when a body is present
693                        // AND there's no manual override; passing the
694                        // override here wins because reqwest preserves
695                        // the last-set header value.
696                        vec![("Content-Type".to_string(), (*ct).to_string())],
697                        &path_template,
698                    )
699                    .await,
700                );
701            }
702
703            // Round 27 (k variant b) — embedded non-JSON content
704            // inside a valid JSON envelope. Content-Type stays
705            // application/json (honest) and the body parses as JSON;
706            // only the string-valued payload changes. We expect 2xx-3xx
707            // because the envelope is spec-shape, so the probe surfaces
708            // servers that crash (5xx) trying to parse the embedded
709            // snippet as XML/YAML/etc. A 4xx is also a finding because
710            // it usually means the server's pattern/format validator
711            // tripped on the payload contents, but the user can decide
712            // from the JSONL whether that's a bug or correct narrow-
713            // string-field behaviour.
714            for (label, snippet) in EMBEDDED_CONTENT_VARIANTS {
715                let payload = op.sample_body.as_deref().unwrap_or("{}");
716                // Round 34 (#829) — skip the probe entirely when the
717                // positive sample has no string leaf we can mutate.
718                // The previous round-27 fallback `{"data": <snippet>}`
719                // produced a body that doesn't match the spec's actual
720                // schema for endpoints like vCenter's `consolecli` PUT
721                // (which wants `{enabled: bool}`), so the server
722                // correctly 400'd and the bench misreported the
723                // mismatch as an expectation failure.
724                let Some(body) = embed_payload_in_first_string_field(payload, snippet) else {
725                    continue;
726                };
727                negatives.push(
728                    send_case(
729                        client,
730                        config,
731                        method.clone(),
732                        &url,
733                        label,
734                        // expected_4xx=false: any non-2xx is a probe
735                        // failure. 5xx in particular is "server panicked
736                        // on the embedded content".
737                        ExpectedOutcome::NotServerError,
738                        Some(&body),
739                        op.query_params.clone(),
740                        op_headers.clone(),
741                        &path_template,
742                    )
743                    .await,
744                );
745            }
746        }
747
748        // Round 17.2 — schema-aware negatives.
749        //
750        // When both a positive sample AND the resolved body schema are
751        // available, mutate the sample per-field (type mismatch,
752        // min/max bounds, pattern, enum out-of-range, required-field
753        // removal) and assert each is rejected with 4xx. Capped at
754        // SCHEMA_MUTATION_CAP per operation so a 100-property body
755        // doesn't explode the test matrix.
756        if let (Some(sample_str), Some(schema)) =
757            (op.sample_body.as_deref(), op.request_body_schema.as_ref())
758        {
759            if let Ok(sample) = serde_json::from_str::<serde_json::Value>(sample_str) {
760                let mutations = super::schema_mutator::mutate_body(&sample, schema);
761                for m in mutations.into_iter().take(SCHEMA_MUTATION_CAP) {
762                    let body_str = serde_json::to_string(&m.body).unwrap_or_default();
763                    negatives.push(
764                        send_case(
765                            client,
766                            config,
767                            method.clone(),
768                            &url,
769                            &m.label,
770                            ExpectedOutcome::ClientError,
771                            Some(&body_str),
772                            op.query_params.clone(),
773                            // Round 24 (f) — was `op.header_params`, which
774                            // skipped the geo-IP header. Use `op_headers`
775                            // so the geo IP rides with the negative probe
776                            // too (positive vs negative coverage must be
777                            // symmetric, otherwise a GEODB front-end sees
778                            // the rotating IP only on positives).
779                            op_headers.clone(),
780                            &path_template,
781                        )
782                        .await,
783                    );
784                }
785            }
786        }
787    }
788
789    // Round 17.2 — URI-length probe. Spec-agnostic but schema-aware in
790    // spirit: most servers cap URIs at 8 KB or so. Append a 9 KB query
791    // string to the URL and expect 414 URI Too Long (or 400). Skipped
792    // for operations that already have a heavy positive query.
793    {
794        let pad = "p=".to_string() + &"x".repeat(9_000);
795        let bad_url = if url.contains('?') {
796            format!("{url}&{pad}")
797        } else {
798            format!("{url}?{pad}")
799        };
800        negatives.push(
801            send_case(
802                client,
803                config,
804                method.clone(),
805                &bad_url,
806                "parameters:uri-too-long",
807                ExpectedOutcome::ClientError,
808                op.sample_body.as_deref(),
809                op.query_params.clone(),
810                // Round 24 (f) — see schema-mutation note above. Use
811                // `op_headers` (carries geo IP) instead of bare
812                // `op.header_params`.
813                op_headers.clone(),
814                &path_template,
815            )
816            .await,
817        );
818    }
819
820    // (e) Round 16 — path-param type probe. Send the first path
821    // parameter as a literal `"self-test-invalid-id"`: a string that
822    // contains hyphens, won't parse as an integer, won't parse as a
823    // UUID, and won't match any typical regex pattern. Operations
824    // whose spec types the param as `integer` or `string` with a
825    // `format`/`pattern` will catch this (caught: server returned
826    // 4xx); operations whose spec lets path params be free-form
827    // strings will let it through (missed: server returned 2xx).
828    // Either outcome is informative: a category that's all "missed"
829    // tells the user their spec is loose on path-param types, which
830    // is itself worth knowing. Addresses Srikanth's "always all
831    // passing" report — operations with a path param now produce at
832    // least one probe instead of zero.
833    if !op.path_params.is_empty() {
834        let mut url_with_placeholder = op.path.clone();
835        if let Some((first_name, _)) = op.path_params.first() {
836            // Substitute every other path-param with its sample so the
837            // route shape stays intact and only the first param is bad.
838            for (name, value) in op.path_params.iter().skip(1) {
839                if !value.is_empty() {
840                    url_with_placeholder =
841                        url_with_placeholder.replace(&format!("{{{name}}}"), value);
842                }
843            }
844            // Substitute the first param with a guaranteed-invalid
845            // sentinel that's unlikely to match any reasonable schema:
846            // contains characters disallowed in numeric IDs *and* UUIDs.
847            url_with_placeholder =
848                url_with_placeholder.replace(&format!("{{{first_name}}}"), "self-test-invalid-id");
849            // Round 18.1 — honour `base_path` here too, otherwise the
850            // probe URL differs from the positive case and the
851            // resulting 404 is misattributed to "bad path param".
852            let bad_url = build_url_with_base(
853                &config.target_url,
854                config.base_path.as_deref(),
855                &url_with_placeholder,
856                &[],
857            );
858            negatives.push(
859                send_case(
860                    client,
861                    config,
862                    method.clone(),
863                    &bad_url,
864                    "parameters:bad-path-param",
865                    ExpectedOutcome::ClientError,
866                    op.sample_body.as_deref(),
867                    op.query_params.clone(),
868                    op_headers.clone(),
869                    &path_template,
870                )
871                .await,
872            );
873        }
874    }
875
876    // (c) drop the first required query param
877    if !op.query_params.is_empty() {
878        let mut q = op.query_params.clone();
879        q.remove(0);
880        negatives.push(
881            send_case(
882                client,
883                config,
884                method.clone(),
885                &url,
886                "parameters:missing-query",
887                ExpectedOutcome::ClientError,
888                op.sample_body.as_deref(),
889                q,
890                op_headers.clone(),
891                &path_template,
892            )
893            .await,
894        );
895    }
896
897    // (s) Round 17.3 — security probes.
898    //
899    // Operations whose spec declares a security requirement get a
900    // dedicated set of negatives. The point isn't to test whether the
901    // server's *real* auth works (the positive case already does that
902    // via `extra_headers`) — it's to check whether deliberately-bad
903    // credentials are still rejected, which is exactly the failure
904    // mode that lets an attacker through a half-wired validator.
905    //
906    // Each probe replaces or omits the relevant auth credential and
907    // expects 401 / 403. A 2xx here is a hard finding: "spec says
908    // this endpoint is protected, server let unauthenticated /
909    // wrong-credential traffic through".
910    //
911    // Bounded: at most one probe per declared scheme kind, so an
912    // operation with 3 security requirements doesn't 4× the request
913    // volume. Skips entirely when `op.security_schemes` is empty.
914    for probe in build_security_probes(&op.security_schemes) {
915        // Strip any pre-existing Authorization or known API-key
916        // header from extra_headers + header_params so the probe
917        // value is the *only* credential the server sees.
918        let stripped_extra = strip_auth(&config.extra_headers, &op.security_schemes);
919        let stripped_headers = strip_auth(&op.header_params, &op.security_schemes);
920        let stripped_query = strip_auth_query(&op.query_params, &op.security_schemes);
921        let mut req_headers = stripped_headers;
922        for (k, v) in &probe.headers {
923            req_headers.push((k.clone(), v.clone()));
924        }
925        // Round 24 (f) — security probes build req_headers from
926        // `op.header_params` directly (we need the stripped-auth
927        // variant), so the geo-IP header doesn't ride along
928        // automatically. Append it here so a GEODB / WAF in front
929        // of the auth layer still sees the rotating source IP.
930        if let Some(ip) = geo_ip {
931            let ip_str = ip.to_string();
932            for h in &config.geo_source_headers {
933                let already = req_headers.iter().any(|(k, _)| k.eq_ignore_ascii_case(h));
934                if !already {
935                    req_headers.push((h.clone(), ip_str.clone()));
936                }
937            }
938        }
939        let mut req_query = stripped_query;
940        for (k, v) in &probe.query {
941            req_query.push((k.clone(), v.clone()));
942        }
943        negatives.push(
944            send_case_with_extra(
945                client,
946                config,
947                method.clone(),
948                &url,
949                &probe.label,
950                ExpectedOutcome::ClientError,
951                op.sample_body.as_deref(),
952                req_query,
953                req_headers,
954                stripped_extra,
955                &path_template,
956            )
957            .await,
958        );
959    }
960
961    // (d) drop the first required header
962    if !op.header_params.is_empty() {
963        // Round 24 (f) — start from `op_headers` (so the geo IP rides
964        // along) and only strip the first OPERATION-declared header.
965        // Slicing past `op.header_params.len()` would otherwise risk
966        // dropping the geo header itself; `op_headers` is built as
967        // `op.header_params ++ geo` so index 0 is always operational.
968        let mut h = op_headers.clone();
969        if !h.is_empty() {
970            h.remove(0);
971        }
972        negatives.push(
973            send_case(
974                client,
975                config,
976                method.clone(),
977                &url,
978                "parameters:missing-header",
979                ExpectedOutcome::ClientError,
980                op.sample_body.as_deref(),
981                op.query_params.clone(),
982                h,
983                &path_template,
984            )
985            .await,
986        );
987    }
988
989    // (w) Round 17.5 — OWASP/WAF unification.
990    //
991    // Pull one canonical payload per OWASP category from the existing
992    // `SecurityPayloads` library and emit an injection probe per
993    // category. Targets in priority order: (1) substitute the first
994    // query param's value, (2) substitute the first string field of
995    // the positive JSON body, (3) skip if neither is available.
996    //
997    // Label format `owasp:<category>`, so the existing
998    // `negative_caught` / `negative_missed` rollup groups all OWASP
999    // findings under one `owasp` bucket. Expected 4xx (server should
1000    // reject malicious input). A 5xx is a hard finding (server
1001    // crashed on the payload); a 2xx is a soft finding (input passed
1002    // through unfiltered — may or may not be a real vuln).
1003    //
1004    // Bounded: at most one probe per category (7 categories total).
1005    // Skips the operation entirely if no injection target is
1006    // available — open GET endpoints with no params get zero OWASP
1007    // probes, no false signal.
1008    for probe in build_owasp_probes(op) {
1009        negatives.push(
1010            send_case(
1011                client,
1012                config,
1013                method.clone(),
1014                &url,
1015                &probe.label,
1016                ExpectedOutcome::ClientError,
1017                probe.body.as_deref(),
1018                probe.query,
1019                // Round 24 (f) — OWASP injection probes must also
1020                // carry the geo IP, otherwise a WAF / GEODB rule
1021                // tuned to a specific source IP would silently let
1022                // them through.
1023                op_headers.clone(),
1024                &path_template,
1025            )
1026            .await,
1027        );
1028    }
1029
1030    // Round 25 — response-body shape validation pass. For each capture
1031    // this op pushed onto the sink, look up the spec's schema for the
1032    // actual response status and validate. Result lands in
1033    // `response_schema_error` (Some(message) on failure, None on
1034    // pass or no-schema-for-this-status). Runs only when the user
1035    // opted in AND capture is on (we need the body).
1036    if config.validate_response_schemas {
1037        if let (Some(sink), Some(start)) = (config.capture.as_ref(), sink_start) {
1038            if !op.response_schemas.is_empty() {
1039                if let Ok(mut guard) = sink.lock() {
1040                    let end = guard.len();
1041                    for i in start..end {
1042                        let Some(entry) = guard.get_mut(i) else {
1043                            continue;
1044                        };
1045                        let Some(body) = entry.response_body.as_deref() else {
1046                            continue;
1047                        };
1048                        let Some(schema) = op.response_schemas.get(&entry.response_status) else {
1049                            continue;
1050                        };
1051                        entry.response_schema_error = validate_body_against_schema(body, schema);
1052                    }
1053                }
1054            }
1055        }
1056    }
1057
1058    OperationResult {
1059        method: op.method.clone(),
1060        path: op.path.clone(),
1061        positive: Some(positive),
1062        negatives,
1063    }
1064}
1065
1066/// Round 25 — validate a JSON body string against an OpenAPI response
1067/// schema (already converted to a `serde_json::Value`). Returns
1068/// `Some(message)` describing the first violation, or `None` on a
1069/// clean pass / non-JSON body / schema-build failure (in which case
1070/// the absence of an error means "we didn't have anything to compare
1071/// against", not "passed"; the caller-side semantics treat absence as
1072/// success because that's what the user sees as silence).
1073/// Round 27 (k variant b) — return a JSON body string identical to
1074/// `sample` except that the first string-valued leaf has been
1075/// replaced with `snippet`. Walks objects depth-first and stops at
1076/// the first string. Returns `None` when `sample` is not parseable
1077/// JSON or has no string field anywhere; the caller skips emitting
1078/// a probe in that case (Round 34 #829: Srikanth on 0.3.178 found
1079/// that the previous `{"data": <snippet>}` fallback envelope didn't
1080/// match real-API schemas like vCenter's `{enabled: bool}` and the
1081/// server correctly 400'd, which the bench then misreported as a
1082/// `2xx-3xx` expectation miss).
1083fn embed_payload_in_first_string_field(sample: &str, snippet: &str) -> Option<String> {
1084    let mut parsed: serde_json::Value = serde_json::from_str(sample).ok()?;
1085    if !replace_first_string(&mut parsed, snippet) {
1086        return None;
1087    }
1088    serde_json::to_string(&parsed).ok()
1089}
1090
1091/// Helper for `embed_payload_in_first_string_field`: recursively
1092/// walk the value and replace the FIRST string leaf encountered.
1093/// Returns true when a replacement happened. Honors document order
1094/// for objects (BTreeMap-backed `serde_json::Map` iterates in
1095/// insertion order) so the choice of which field to mutate is
1096/// stable across runs.
1097fn replace_first_string(v: &mut serde_json::Value, snippet: &str) -> bool {
1098    match v {
1099        serde_json::Value::String(s) => {
1100            *s = snippet.to_string();
1101            true
1102        }
1103        serde_json::Value::Object(map) => {
1104            for (_k, child) in map.iter_mut() {
1105                if replace_first_string(child, snippet) {
1106                    return true;
1107                }
1108            }
1109            false
1110        }
1111        serde_json::Value::Array(arr) => {
1112            for child in arr.iter_mut() {
1113                if replace_first_string(child, snippet) {
1114                    return true;
1115                }
1116            }
1117            false
1118        }
1119        _ => false,
1120    }
1121}
1122
1123fn validate_body_against_schema(body: &str, schema: &serde_json::Value) -> Option<String> {
1124    let parsed: serde_json::Value = serde_json::from_str(body).ok()?;
1125    let validator = jsonschema::validator_for(schema).ok()?;
1126    let mut errors = validator.iter_errors(&parsed);
1127    let first = errors.next()?;
1128    // Round 28 — Srikanth on 0.3.170 wanted the message to show the
1129    // actual expected schema alongside the kind label so it reads as
1130    // "expected schema {...} but got <kind>". We emit a compact JSON
1131    // serialisation of the schema as a suffix; the kind label still
1132    // names what went wrong in plain English for quick scanning.
1133    // Round 26 — Srikanth on 0.3.169: the prior `format!("{:?}", first.kind)
1134    // .split('(').next()` produced "Type { kind: Single" (broken Rust
1135    // syntax, mismatched braces). Switch to the human-readable mapping
1136    // already used in executor.rs: handle the common kinds (Type,
1137    // Required, AdditionalProperties, Enum, MinLength, MaxLength,
1138    // Minimum, Maximum, Pattern) explicitly; fall back to the
1139    // jsonschema crate's Display impl on the error (which produces
1140    // something like "{...} is not of type \"string\"") for the long
1141    // tail. Combined with `at <instance-path>` for the field location.
1142    let path = first.instance_path.to_string();
1143    let path = if path.is_empty() { "/" } else { path.as_str() };
1144    // Round 31 — Srikanth on 0.3.174 hit the vCenter case where the
1145    // error is "required field missing: comment" but the printed
1146    // schema was the WHOLE parent object schema (with descriptions of
1147    // every property), not just the missing field's sub-schema. The
1148    // jsonschema crate emits `Required` errors with
1149    // `instance_path == /` (the parent), so the round-30 sub-schema
1150    // walker had no extra info to focus the suffix. Carry the missing
1151    // property name out of the kind match so we can descend one more
1152    // step into `properties[property]` for the printed schema.
1153    let mut required_property: Option<String> = None;
1154    let kind_msg: String = match &first.kind {
1155        jsonschema::error::ValidationErrorKind::Type { kind } => {
1156            // `kind` is `TypeKind::Single(JsonType)` or
1157            // `TypeKind::Multiple(JsonTypeSet)`. `JsonType` has its
1158            // own `Display` impl ("string", "object", etc.).
1159            match kind {
1160                jsonschema::error::TypeKind::Single(t) => format!("expected type {t}"),
1161                jsonschema::error::TypeKind::Multiple(_) => "expected one of multiple types".into(),
1162            }
1163        }
1164        jsonschema::error::ValidationErrorKind::Required { property } => {
1165            // `property.to_string()` returns the Display of the JSON
1166            // value, which for a string is `"name"` (with quotes).
1167            // Strip them for the lookup; keep them in the human message.
1168            let raw = property.to_string();
1169            let unquoted = raw
1170                .strip_prefix('"')
1171                .and_then(|s| s.strip_suffix('"'))
1172                .unwrap_or(&raw)
1173                .to_string();
1174            required_property = Some(unquoted);
1175            format!("required field missing: {property}")
1176        }
1177        jsonschema::error::ValidationErrorKind::AdditionalProperties { unexpected } => {
1178            format!("unexpected additional properties: {unexpected:?}")
1179        }
1180        jsonschema::error::ValidationErrorKind::Enum { options } => {
1181            format!("value not in allowed enum: {options}")
1182        }
1183        jsonschema::error::ValidationErrorKind::MinLength { limit } => {
1184            format!("string shorter than min length ({limit})")
1185        }
1186        jsonschema::error::ValidationErrorKind::MaxLength { limit } => {
1187            format!("string longer than max length ({limit})")
1188        }
1189        jsonschema::error::ValidationErrorKind::Minimum { limit } => {
1190            format!("value below minimum ({limit})")
1191        }
1192        jsonschema::error::ValidationErrorKind::Maximum { limit } => {
1193            format!("value above maximum ({limit})")
1194        }
1195        jsonschema::error::ValidationErrorKind::Pattern { pattern } => {
1196            format!("value did not match pattern {pattern}")
1197        }
1198        // Long tail: lean on jsonschema's Display impl, which is the
1199        // built-in human-readable error message ("X is not of type Y").
1200        // Strip trailing newlines so the JSONL line stays one line.
1201        _ => first.to_string().trim().to_string(),
1202    };
1203    // Round 30 — Srikanth on 0.3.173 asked how a deeper nested mismatch
1204    // reads. The prior output printed the WHOLE top-level schema even for
1205    // a single-field mismatch, which buried the actual constraint that
1206    // failed. Walk the instance pointer through the schema's properties
1207    // chain and print the most specific sub-schema we can find. Falls
1208    // back to the full schema for paths the walker can't resolve
1209    // (additionalProperties, oneOf, allOf, $ref un-resolved, etc.).
1210    let mut focused_schema = sub_schema_at_pointer(schema, path).unwrap_or_else(|| schema.clone());
1211    // Round 31 — for Required errors, descend one more step into
1212    // `properties[<missing>]` so the printed schema is the missing
1213    // field's own constraint, not the whole parent.
1214    if let Some(prop_name) = required_property.as_ref() {
1215        if let Some(prop_schema) =
1216            focused_schema.get("properties").and_then(|p| p.get(prop_name.as_str()))
1217        {
1218            focused_schema = prop_schema.clone();
1219        }
1220    }
1221    // Round 34 (#827) — Srikanth on 0.3.178 hit the vCenter
1222    // `enabled: boolean` case where the schema's multi-paragraph
1223    // `description` (and other prose fields) ate the 300-char budget
1224    // before the actually-useful `type` keyword could appear. Strip
1225    // the noise-fields recursively before serializing so the type
1226    // signal survives truncation; constraint keywords (`type`,
1227    // `properties`, `required`, `format`, `items`, etc.) stay.
1228    let focused_schema = strip_schema_noise(&focused_schema);
1229    let schema_str = serde_json::to_string(&focused_schema).unwrap_or_else(|_| "<schema>".into());
1230    let schema_str = if schema_str.len() > 300 {
1231        format!("{}...", &schema_str[..300])
1232    } else {
1233        schema_str
1234    };
1235    // Round 29 — Srikanth on 0.3.172 was confused by `at /:` thinking
1236    // it referenced the URL path; it's actually a JSON pointer into
1237    // the RESPONSE BODY. Reword so that's unambiguous: explicit
1238    // "response body" prefix and a human label for the root case.
1239    let location = if path == "/" {
1240        "response body root".to_string()
1241    } else {
1242        format!("response body at {path}")
1243    };
1244    Some(format!("{location}: {kind_msg}; expected schema {schema_str}"))
1245}
1246
1247/// Round 34 (#827) — drop the human-readable / documentation-only
1248/// fields from a JSON Schema before printing it inside a
1249/// `response_schema_error` message. The validator only cares about
1250/// constraint keywords (`type`, `required`, `properties`, `items`,
1251/// `format`, `enum`, `min*`/`max*`, `pattern`, `oneOf`/`anyOf`/
1252/// `allOf`/`not`); the prose fields can be paragraphs long for real-
1253/// world specs (vCenter's `enabled: bool` field has a multi-paragraph
1254/// description) and were eating the 300-char truncation budget before
1255/// the actually-useful type info could appear. Stripped fields:
1256/// `description`, `example`, `examples`, `summary`, `title`,
1257/// `externalDocs`, `xml`, `discriminator.description`.
1258fn strip_schema_noise(schema: &serde_json::Value) -> serde_json::Value {
1259    const NOISE_KEYS: &[&str] = &[
1260        "description",
1261        "example",
1262        "examples",
1263        "summary",
1264        "title",
1265        "externalDocs",
1266        "xml",
1267    ];
1268    match schema {
1269        serde_json::Value::Object(map) => {
1270            let mut out = serde_json::Map::with_capacity(map.len());
1271            for (k, v) in map {
1272                if NOISE_KEYS.contains(&k.as_str()) {
1273                    continue;
1274                }
1275                out.insert(k.clone(), strip_schema_noise(v));
1276            }
1277            serde_json::Value::Object(out)
1278        }
1279        serde_json::Value::Array(items) => {
1280            serde_json::Value::Array(items.iter().map(strip_schema_noise).collect())
1281        }
1282        other => other.clone(),
1283    }
1284}
1285
1286/// Round 30 — walk a JSON-Pointer-style instance path through a JSON
1287/// Schema and return the sub-schema describing the value at that
1288/// position. For path `/name/age` on
1289/// `{"properties":{"name":{"properties":{"age":{"type":"integer"}}}}}`
1290/// returns `{"type":"integer"}`. Returns `None` for paths the walker
1291/// can't follow (array indices into `items` with no per-index schema,
1292/// `additionalProperties`, `oneOf`/`allOf`, unresolved `$ref`); callers
1293/// should fall back to the full schema in that case.
1294fn sub_schema_at_pointer(schema: &serde_json::Value, pointer: &str) -> Option<serde_json::Value> {
1295    if pointer.is_empty() || pointer == "/" {
1296        return Some(schema.clone());
1297    }
1298    let mut current = schema;
1299    for seg in pointer.trim_start_matches('/').split('/') {
1300        let unescaped = seg.replace("~1", "/").replace("~0", "~");
1301        if let Some(props) = current.get("properties") {
1302            if let Some(sub) = props.get(&unescaped) {
1303                current = sub;
1304                continue;
1305            }
1306        }
1307        if let Some(items) = current.get("items") {
1308            if items.is_object() {
1309                current = items;
1310                continue;
1311            }
1312        }
1313        return None;
1314    }
1315    Some(current.clone())
1316}
1317
1318/// Round 17.5 — one OWASP injection probe to send.
1319#[derive(Debug, Clone)]
1320struct OwaspProbe {
1321    label: String,
1322    body: Option<String>,
1323    query: Vec<(String, String)>,
1324}
1325
1326/// Build one OWASP probe per `SecurityCategory` for `op`. Targets the
1327/// first query param if any, else the first string field of the
1328/// positive JSON body. Returns empty if neither target is available.
1329fn build_owasp_probes(op: &AnnotatedOperation) -> Vec<OwaspProbe> {
1330    use crate::security_payloads::{SecurityCategory, SecurityPayloads};
1331
1332    let categories = [
1333        SecurityCategory::SqlInjection,
1334        SecurityCategory::Xss,
1335        SecurityCategory::CommandInjection,
1336        SecurityCategory::PathTraversal,
1337        SecurityCategory::Ssti,
1338        SecurityCategory::LdapInjection,
1339        SecurityCategory::Xxe,
1340    ];
1341
1342    // Pick an injection target ONCE per operation; reuse it across
1343    // categories. (A single op gets up to 7 probes — one per category
1344    // — all attacking the same field.)
1345    let injection_target = pick_injection_target(op);
1346    let Some(target) = injection_target else {
1347        return Vec::new();
1348    };
1349
1350    let mut probes = Vec::new();
1351    for cat in categories {
1352        // Take the *first* payload from each category. The
1353        // collection's first entry is the canonical low-risk
1354        // representative; later entries include time-based / blind
1355        // probes that aren't useful as a one-shot rejection test.
1356        let Some(payload) = SecurityPayloads::get_by_category(cat).into_iter().next() else {
1357            continue;
1358        };
1359        let mut query = op.query_params.clone();
1360        let mut body = op.sample_body.clone();
1361        match &target {
1362            InjectionTarget::Query(idx) => {
1363                if let Some(slot) = query.get_mut(*idx) {
1364                    slot.1 = payload.payload.clone();
1365                }
1366            }
1367            InjectionTarget::BodyStringField(field) => {
1368                body = inject_into_body_field(body.as_deref(), field, &payload.payload);
1369            }
1370        }
1371        probes.push(OwaspProbe {
1372            label: format!("owasp:{}", cat),
1373            body,
1374            query,
1375        });
1376    }
1377    probes
1378}
1379
1380#[derive(Debug, Clone)]
1381enum InjectionTarget {
1382    Query(usize),
1383    BodyStringField(String),
1384}
1385
1386fn pick_injection_target(op: &AnnotatedOperation) -> Option<InjectionTarget> {
1387    if !op.query_params.is_empty() {
1388        return Some(InjectionTarget::Query(0));
1389    }
1390    let sample = op.sample_body.as_deref()?;
1391    let parsed: serde_json::Value = serde_json::from_str(sample).ok()?;
1392    let obj = parsed.as_object()?;
1393    for (k, v) in obj {
1394        if v.is_string() {
1395            return Some(InjectionTarget::BodyStringField(k.clone()));
1396        }
1397    }
1398    None
1399}
1400
1401/// Replace the value of `field` in a JSON-object body with `payload`.
1402/// Returns the mutated body as a JSON string. Returns `None` if the
1403/// body doesn't parse as a JSON object.
1404fn inject_into_body_field(body: Option<&str>, field: &str, payload: &str) -> Option<String> {
1405    let raw = body?;
1406    let mut parsed: serde_json::Value = serde_json::from_str(raw).ok()?;
1407    let obj = parsed.as_object_mut()?;
1408    obj.insert(field.to_string(), serde_json::json!(payload));
1409    serde_json::to_string(&parsed).ok()
1410}
1411
1412#[allow(clippy::too_many_arguments)]
1413/// Round 17.3 — one synthesised bad credential to send.
1414#[derive(Debug, Clone)]
1415struct SecurityProbe {
1416    /// Self-test label, e.g. `security:bad-bearer`.
1417    label: String,
1418    /// Headers to attach to the probe request.
1419    headers: Vec<(String, String)>,
1420    /// Query parameters to attach (API key in query case).
1421    query: Vec<(String, String)>,
1422}
1423
1424/// For each declared security scheme, produce one bad-credential
1425/// probe plus a single "no auth at all" probe that exercises the
1426/// missing-credential code path. Deduplicates by scheme kind so an
1427/// operation declaring `[bearer, bearer]` only yields one Bearer
1428/// probe.
1429fn build_security_probes(schemes: &[SecuritySchemeInfo]) -> Vec<SecurityProbe> {
1430    if schemes.is_empty() {
1431        return Vec::new();
1432    }
1433    let mut probes: Vec<SecurityProbe> = Vec::new();
1434    let mut seen_bearer = false;
1435    let mut seen_basic = false;
1436    // `(loc_tag, name)` — ApiKeyLocation doesn't implement Ord, so
1437    // we tag it with a short discriminant string for dedup.
1438    let mut seen_apikey: std::collections::BTreeSet<(&'static str, String)> = Default::default();
1439    for s in schemes {
1440        match s {
1441            SecuritySchemeInfo::Bearer if !seen_bearer => {
1442                seen_bearer = true;
1443                probes.push(SecurityProbe {
1444                    label: "security:bad-bearer".into(),
1445                    headers: vec![(
1446                        "Authorization".into(),
1447                        "Bearer self-test-invalid-token".into(),
1448                    )],
1449                    query: Vec::new(),
1450                });
1451            }
1452            SecuritySchemeInfo::Basic if !seen_basic => {
1453                seen_basic = true;
1454                // base64("self-test:invalid") — valid base64, wrong creds.
1455                probes.push(SecurityProbe {
1456                    label: "security:bad-basic".into(),
1457                    headers: vec![(
1458                        "Authorization".into(),
1459                        "Basic c2VsZi10ZXN0OmludmFsaWQ=".into(),
1460                    )],
1461                    query: Vec::new(),
1462                });
1463            }
1464            SecuritySchemeInfo::ApiKey { location, name } => {
1465                let loc_tag = match location {
1466                    ApiKeyLocation::Header => "header",
1467                    ApiKeyLocation::Query => "query",
1468                    ApiKeyLocation::Cookie => "cookie",
1469                };
1470                if seen_apikey.contains(&(loc_tag, name.clone())) {
1471                    continue;
1472                }
1473                seen_apikey.insert((loc_tag, name.clone()));
1474                let label = format!("security:bad-apikey:{}", name);
1475                let bad = "self-test-invalid-key".to_string();
1476                match location {
1477                    ApiKeyLocation::Header => probes.push(SecurityProbe {
1478                        label,
1479                        headers: vec![(name.clone(), bad)],
1480                        query: Vec::new(),
1481                    }),
1482                    ApiKeyLocation::Query => probes.push(SecurityProbe {
1483                        label,
1484                        headers: Vec::new(),
1485                        query: vec![(name.clone(), bad)],
1486                    }),
1487                    ApiKeyLocation::Cookie => probes.push(SecurityProbe {
1488                        label,
1489                        headers: vec![("Cookie".into(), format!("{}={}", name, bad))],
1490                        query: Vec::new(),
1491                    }),
1492                }
1493            }
1494            _ => {}
1495        }
1496    }
1497    // Always add a "no auth at all" probe when *any* security scheme
1498    // is declared — useful even if all schemes failed to resolve to a
1499    // testable kind, because it surfaces validators that aren't
1500    // checking auth presence at all.
1501    probes.push(SecurityProbe {
1502        label: "security:no-auth".into(),
1503        headers: Vec::new(),
1504        query: Vec::new(),
1505    });
1506    probes
1507}
1508
1509/// Remove Authorization and any API-key headers declared by the
1510/// operation's security schemes from `headers`, so a security probe
1511/// can supply its own credential (or none) cleanly.
1512fn strip_auth(
1513    headers: &[(String, String)],
1514    schemes: &[SecuritySchemeInfo],
1515) -> Vec<(String, String)> {
1516    let mut apikey_headers: std::collections::BTreeSet<String> = Default::default();
1517    for s in schemes {
1518        if let SecuritySchemeInfo::ApiKey {
1519            location: ApiKeyLocation::Header,
1520            name,
1521        } = s
1522        {
1523            apikey_headers.insert(name.to_lowercase());
1524        }
1525        if let SecuritySchemeInfo::ApiKey {
1526            location: ApiKeyLocation::Cookie,
1527            ..
1528        } = s
1529        {
1530            apikey_headers.insert("cookie".into());
1531        }
1532    }
1533    headers
1534        .iter()
1535        .filter(|(k, _)| {
1536            let lk = k.to_lowercase();
1537            lk != "authorization" && !apikey_headers.contains(&lk)
1538        })
1539        .cloned()
1540        .collect()
1541}
1542
1543/// Remove API-key query parameters declared by the operation's
1544/// security schemes from `query`, so a probe can supply its own.
1545fn strip_auth_query(
1546    query: &[(String, String)],
1547    schemes: &[SecuritySchemeInfo],
1548) -> Vec<(String, String)> {
1549    let mut apikey_query: std::collections::BTreeSet<String> = Default::default();
1550    for s in schemes {
1551        if let SecuritySchemeInfo::ApiKey {
1552            location: ApiKeyLocation::Query,
1553            name,
1554        } = s
1555        {
1556            apikey_query.insert(name.clone());
1557        }
1558    }
1559    query.iter().filter(|(k, _)| !apikey_query.contains(k)).cloned().collect()
1560}
1561
1562/// Round 35 (#859) — Srikanth on 0.3.179: embedded-content variant-b
1563/// probes were flagging well-behaved 4xx responses as mismatches when
1564/// in reality only a 5xx (server CRASHED trying to parse the embedded
1565/// XML/YAML/multipart/urlencoded payload) is the bug the probe was
1566/// designed to find. Tristate replaces the older `expected_4xx: bool`
1567/// so variant-b probes can opt into "anything but 5xx is fine".
1568#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1569pub(crate) enum ExpectedOutcome {
1570    /// Positive probe: spec-compliant request, expect 2xx or 3xx.
1571    Success,
1572    /// Negative probe: invalid request, expect 4xx.
1573    ClientError,
1574    /// Embedded-content variant-b probe: spec-shape envelope with a
1575    /// non-JSON payload embedded in the first string field. Any
1576    /// response that isn't a 5xx is fine; the probe is here to catch
1577    /// server crashes on the embedded payload.
1578    NotServerError,
1579}
1580
1581impl ExpectedOutcome {
1582    /// Whether `actual_status` counts as a pass for this outcome.
1583    fn passes(self, actual_status: u16) -> bool {
1584        match self {
1585            ExpectedOutcome::Success => (200..400).contains(&actual_status),
1586            ExpectedOutcome::ClientError => (400..500).contains(&actual_status),
1587            ExpectedOutcome::NotServerError => {
1588                actual_status >= 200 && !(500..600).contains(&actual_status)
1589            }
1590        }
1591    }
1592
1593    /// Human-readable hint persisted in the JSONL capture + HTML
1594    /// viewer's "show mismatches only" filter; also what users `jq`
1595    /// against.
1596    fn as_str(self) -> &'static str {
1597        match self {
1598            ExpectedOutcome::Success => "2xx-3xx",
1599            ExpectedOutcome::ClientError => "4xx",
1600            ExpectedOutcome::NotServerError => "2xx-4xx",
1601        }
1602    }
1603}
1604
1605/// Variant of `send_case` that takes an explicit `extra_headers`
1606/// (rather than reading them from `config`). Used by security probes
1607/// to substitute or strip the configured Authorization header.
1608#[allow(clippy::too_many_arguments)]
1609async fn send_case_with_extra(
1610    client: &Client,
1611    config: &SelfTestConfig,
1612    method: Method,
1613    url: &str,
1614    label: &str,
1615    expected: ExpectedOutcome,
1616    body: Option<&str>,
1617    query: Vec<(String, String)>,
1618    headers: Vec<(String, String)>,
1619    extra_headers: Vec<(String, String)>,
1620    // Round 33 (#823) — spec path template (e.g. `/users/{id}`)
1621    // for the operation this probe belongs to. Stamped on the
1622    // capture so the per-endpoint summary can group by template.
1623    path_template: &str,
1624) -> CaseOutcome {
1625    let mut req = client.request(method.clone(), url);
1626    let mut capture_headers: BTreeMap<String, String> = BTreeMap::new();
1627    for (k, v) in &query {
1628        req = req.query(&[(k.as_str(), v.as_str())]);
1629    }
1630    // Round 36 (#876) — stamp the client side first so the same
1631    // `client_sent_at` string flows into both the request headers
1632    // (so the server-side `ServerConformanceViolation` records it
1633    // verbatim) and the on-disk `CaseCapture` JSONL line. Don't
1634    // re-call `Utc::now()` after `req.send()` — that would record
1635    // a different timestamp than the server sees.
1636    let mockforge_version = env!("CARGO_PKG_VERSION").to_string();
1637    let client_sent_at = chrono::Utc::now().to_rfc3339();
1638    // Round 28 — reqwest's `.header(k, v)` APPENDS rather than replaces
1639    // (.headers().insert() would replace but isn't on the builder).
1640    // The previous round-25 fix relied on "last-write-wins" semantics
1641    // that don't exist; for content-type-swap probes the request went
1642    // out with BOTH `Content-Type: application/json` AND `Content-Type:
1643    // application/xml`, and axum's `Json<>` extractor picked the JSON
1644    // one and accepted, so the server-side validator never saw the
1645    // mismatch. Build a `HeaderMap` ourselves so the override
1646    // replaces the body-block default exactly once.
1647    let mut final_headers: reqwest::header::HeaderMap = reqwest::header::HeaderMap::new();
1648    if let Some(_b) = body {
1649        if let Ok(v) = reqwest::header::HeaderValue::from_str("application/json") {
1650            final_headers.insert(reqwest::header::CONTENT_TYPE, v);
1651        }
1652        capture_headers.insert("Content-Type".to_string(), "application/json".to_string());
1653    }
1654    for (k, v) in &headers {
1655        if let (Ok(hn), Ok(hv)) = (
1656            reqwest::header::HeaderName::from_bytes(k.as_bytes()),
1657            reqwest::header::HeaderValue::from_str(v),
1658        ) {
1659            final_headers.insert(hn, hv);
1660        }
1661        capture_headers.insert(k.clone(), v.clone());
1662    }
1663    for (k, v) in &extra_headers {
1664        if let (Ok(hn), Ok(hv)) = (
1665            reqwest::header::HeaderName::from_bytes(k.as_bytes()),
1666            reqwest::header::HeaderValue::from_str(v),
1667        ) {
1668            final_headers.insert(hn, hv);
1669        }
1670        capture_headers.insert(k.clone(), v.clone());
1671    }
1672    // Round 36 (#876) — outbound client stamps. Inserted last so
1673    // they can't be clobbered by user-supplied extra-headers, and
1674    // recorded in `capture_headers` so the JSONL line shows the
1675    // exact bytes that went on the wire.
1676    {
1677        let v_header = mockforge_foundation::conformance_violations::CLIENT_VERSION_HEADER;
1678        let s_header = mockforge_foundation::conformance_violations::CLIENT_SENT_AT_HEADER;
1679        if let (Ok(hn), Ok(hv)) = (
1680            reqwest::header::HeaderName::from_bytes(v_header.as_bytes()),
1681            reqwest::header::HeaderValue::from_str(&mockforge_version),
1682        ) {
1683            final_headers.insert(hn, hv);
1684        }
1685        if let (Ok(hn), Ok(hv)) = (
1686            reqwest::header::HeaderName::from_bytes(s_header.as_bytes()),
1687            reqwest::header::HeaderValue::from_str(&client_sent_at),
1688        ) {
1689            final_headers.insert(hn, hv);
1690        }
1691        capture_headers.insert(v_header.to_string(), mockforge_version.clone());
1692        capture_headers.insert(s_header.to_string(), client_sent_at.clone());
1693    }
1694    if let Some(b) = body {
1695        req = req.body(b.to_string());
1696    }
1697    req = req.headers(final_headers);
1698    let (actual_status, response_capture) = match req.send().await {
1699        Ok(resp) => {
1700            let status = resp.status().as_u16();
1701            if let Some(sink) = &config.capture {
1702                let resp_headers: BTreeMap<String, String> = resp
1703                    .headers()
1704                    .iter()
1705                    .map(|(k, v)| (k.as_str().to_string(), v.to_str().unwrap_or("").to_string()))
1706                    .collect();
1707                let text = resp.text().await.unwrap_or_default();
1708                let (rb, truncated) = truncate_body_for_capture(&text);
1709                (status, Some((Some((rb, truncated)), resp_headers, None, sink.clone())))
1710            } else {
1711                (status, None)
1712            }
1713        }
1714        Err(e) => {
1715            let err_str = e.to_string();
1716            // Round 47 (#79) — classify + push to the wire-level
1717            // network-events sink (when present) so the user has a
1718            // grep-able log of connect/timeout/tls failures during
1719            // self-test, matching the r46 native-executor behaviour.
1720            if let Some(sink) = &config.network_events {
1721                let kind = if e.is_connect() {
1722                    "connect"
1723                } else if e.is_timeout() {
1724                    "timeout"
1725                } else if e.is_request() {
1726                    "request"
1727                } else if e.is_body() {
1728                    "body"
1729                } else if e.is_decode() {
1730                    "decode"
1731                } else if err_str.to_ascii_lowercase().contains("tls") {
1732                    "tls"
1733                } else {
1734                    "other"
1735                };
1736                if let Ok(mut guard) = sink.lock() {
1737                    guard.push(NetworkEvent {
1738                        timestamp: chrono::Utc::now(),
1739                        check: label.to_string(),
1740                        method: method.to_string(),
1741                        url: build_query_url(url, &query),
1742                        kind: kind.to_string(),
1743                        message: err_str.clone(),
1744                    });
1745                }
1746            }
1747            if let Some(sink) = &config.capture {
1748                (0, Some((None, BTreeMap::new(), Some(err_str), sink.clone())))
1749            } else {
1750                (0, None)
1751            }
1752        }
1753    };
1754    let passed = expected.passes(actual_status);
1755    if let Some((resp_body, resp_headers, error, sink)) = response_capture {
1756        let (request_body, request_body_truncated) = match body {
1757            Some(b) => {
1758                let (rb, t) = truncate_body_for_capture(b);
1759                (Some(rb), t)
1760            }
1761            None => (None, false),
1762        };
1763        let (response_body, response_body_truncated) = match resp_body {
1764            Some((rb, t)) => (Some(rb), t),
1765            None => (None, false),
1766        };
1767        let entry = CaseCapture {
1768            label: label.to_string(),
1769            method: method.to_string(),
1770            url: build_query_url(url, &query),
1771            request_headers: capture_headers,
1772            request_body,
1773            request_body_truncated,
1774            response_status: actual_status,
1775            response_headers: resp_headers,
1776            response_body,
1777            response_body_truncated,
1778            error,
1779            // Filled in by the per-operation validation pass after
1780            // every probe finishes; the capture itself is unaware of
1781            // the schema map.
1782            response_schema_error: None,
1783            // Round 28 — derive the expected range from the probe's
1784            // outcome shape so the JSONL line and HTML viewer can
1785            // filter mismatches without re-deriving on the read side.
1786            // Round 35 (#859) — add a third value `"2xx-4xx"` for
1787            // embedded-content variant-b probes whose only failure
1788            // mode is a 5xx server crash.
1789            expected_status_range: expected.as_str().to_string(),
1790            // Round 33 (#823) — path_template carries the spec's
1791            // pre-substitution path so the per-endpoint summary can
1792            // collapse `/users/X` and `/users/Y` into one row.
1793            // spec_label is constant per run, read from the config.
1794            path_template: path_template.to_string(),
1795            spec_label: config.spec_label.clone(),
1796            // Round 36 (#876) — same values that went on the wire as
1797            // request headers, so a server-side
1798            // `ServerConformanceViolation` recorded with
1799            // `client_mockforge_version` + `client_sent_at` matches
1800            // the JSONL line byte-for-byte.
1801            mockforge_version: mockforge_version.clone(),
1802            client_sent_at: client_sent_at.clone(),
1803            iteration: config.current_iteration.max(1),
1804        };
1805        if let Ok(mut guard) = sink.lock() {
1806            guard.push(entry);
1807        }
1808    }
1809    // Round 35 (#859) — keep the `expected_4xx` field on `CaseOutcome`
1810    // semantically tied to "negative probe expecting 400-class", so
1811    // downstream code in `report_html.rs` doesn't have to learn about
1812    // the new tristate. `NotServerError` reports as `expected_4xx:
1813    // false` (it's a positive probe in spirit) and instead carries
1814    // its outcome through the per-capture `expected_status_range`.
1815    let expected_4xx = matches!(expected, ExpectedOutcome::ClientError);
1816    CaseOutcome {
1817        label: label.to_string(),
1818        expected_4xx,
1819        actual_status,
1820        passed,
1821    }
1822}
1823
1824// HTTP request shape needs all of these: client, config (for capture
1825// sink + extra headers), method, url, label (probe id), expected_4xx
1826// (pass/fail decision), body, query, headers. A struct wrapper would
1827// just move the arity from positional to field access without making
1828// the call sites clearer.
1829#[allow(clippy::too_many_arguments)]
1830async fn send_case(
1831    client: &Client,
1832    config: &SelfTestConfig,
1833    method: Method,
1834    url: &str,
1835    label: &str,
1836    expected: ExpectedOutcome,
1837    body: Option<&str>,
1838    query: Vec<(String, String)>,
1839    headers: Vec<(String, String)>,
1840    path_template: &str,
1841) -> CaseOutcome {
1842    // Forwarding to `send_case_with_extra` keeps the capture logic in
1843    // one place so request/response tracing can't drift between the
1844    // two entrypoints.
1845    send_case_with_extra(
1846        client,
1847        config,
1848        method,
1849        url,
1850        label,
1851        expected,
1852        body,
1853        query,
1854        headers,
1855        config.extra_headers.clone(),
1856        path_template,
1857    )
1858    .await
1859}
1860
1861/// Round 23 (c-iii) — rebuild the query-stringified URL for capture so
1862/// the JSONL trace shows the URL that actually went over the wire
1863/// (reqwest applies `.query(..)` after the request URL string is
1864/// rendered, so capturing the raw `url` argument alone loses the
1865/// query params).
1866fn build_query_url(base: &str, query: &[(String, String)]) -> String {
1867    if query.is_empty() {
1868        return base.to_string();
1869    }
1870    let qs: String = query
1871        .iter()
1872        .map(|(k, v)| format!("{}={}", urlencoding::encode(k), urlencoding::encode(v)))
1873        .collect::<Vec<_>>()
1874        .join("&");
1875    if base.contains('?') {
1876        format!("{base}&{qs}")
1877    } else {
1878        format!("{base}?{qs}")
1879    }
1880}
1881
1882/// Substitute `{param}` placeholders in the spec path with their
1883/// sample values from `path_params`, then prepend `target_url`. Empty
1884/// values are kept as `{param}` so an upstream router still matches
1885/// the template — useful when `path_params` is empty and we want to
1886/// hit the same route the spec defines.
1887///
1888/// All current call sites went through `build_url_with_base` after
1889/// round 18.1, so this no-base-path helper is unused; keep it as the
1890/// documented shim for future external callers (one-arg simplification).
1891#[allow(dead_code)]
1892fn build_url(target: &str, path_template: &str, path_params: &[(String, String)]) -> String {
1893    build_url_with_base(target, None, path_template, path_params)
1894}
1895
1896/// Round 18.1 — variant of `build_url` that takes a `base_path`
1897/// (e.g. `Some("/api")`). When set, prepends it to the spec path so a
1898/// spec declaring `/users` against a target served behind `/api`
1899/// resolves to `<target>/api/users`. `base_path` is normalised: leading
1900/// `/` is auto-added, trailing `/` is stripped.
1901fn build_url_with_base(
1902    target: &str,
1903    base_path: Option<&str>,
1904    path_template: &str,
1905    path_params: &[(String, String)],
1906) -> String {
1907    let mut url = path_template.to_string();
1908    for (name, value) in path_params {
1909        let placeholder = format!("{{{}}}", name);
1910        if !value.is_empty() {
1911            url = url.replace(&placeholder, value);
1912        }
1913    }
1914    let target = target.trim_end_matches('/');
1915    let prefix = match base_path {
1916        Some(bp) if !bp.is_empty() => {
1917            let trimmed = bp.trim_end_matches('/');
1918            if trimmed.starts_with('/') {
1919                trimmed.to_string()
1920            } else {
1921                format!("/{}", trimmed)
1922            }
1923        }
1924        _ => String::new(),
1925    };
1926    let path = if url.starts_with('/') {
1927        url
1928    } else {
1929        format!("/{url}")
1930    };
1931    format!("{target}{prefix}{path}")
1932}
1933
1934#[cfg(test)]
1935mod tests {
1936    use super::*;
1937
1938    fn op(
1939        method: &str,
1940        path: &str,
1941        body: Option<&str>,
1942        query: Vec<(&str, &str)>,
1943        headers: Vec<(&str, &str)>,
1944        path_params: Vec<(&str, &str)>,
1945    ) -> AnnotatedOperation {
1946        AnnotatedOperation {
1947            method: method.into(),
1948            path: path.into(),
1949            features: Vec::new(),
1950            request_body_content_type: body.map(|_| "application/json".into()),
1951            sample_body: body.map(|s| s.to_string()),
1952            query_params: query.into_iter().map(|(a, b)| (a.into(), b.into())).collect(),
1953            header_params: headers.into_iter().map(|(a, b)| (a.into(), b.into())).collect(),
1954            path_params: path_params.into_iter().map(|(a, b)| (a.into(), b.into())).collect(),
1955            response_schema: None,
1956            response_schemas: std::collections::BTreeMap::new(),
1957            request_body_schema: None,
1958            security_schemes: Vec::new(),
1959        }
1960    }
1961
1962    /// Round 36 (#876) — older JSONL lines (written before the stamp
1963    /// fields existed) must still deserialise without error and
1964    /// default to empty strings. Prevents a back-compat regression
1965    /// the next time we extend `CaseCapture`.
1966    #[test]
1967    fn case_capture_back_compat_when_stamp_fields_missing() {
1968        let pre_r36 = serde_json::json!({
1969            "label": "positive",
1970            "method": "GET",
1971            "url": "http://api/users",
1972            "request_headers": {},
1973            "request_body_truncated": false,
1974            "response_status": 200,
1975            "response_headers": {},
1976            "response_body_truncated": false,
1977        });
1978        let capture: CaseCapture =
1979            serde_json::from_value(pre_r36).expect("pre-r36 payload must deserialise");
1980        assert!(capture.mockforge_version.is_empty(), "default to empty");
1981        assert!(capture.client_sent_at.is_empty(), "default to empty");
1982    }
1983
1984    /// Round 36 (#876) — when the bench stamps fields itself (the
1985    /// happy path), they round-trip through serde unchanged. Pins
1986    /// the on-wire shape so tooling that grep's `mockforge_version`
1987    /// out of the JSONL stays valid.
1988    #[test]
1989    fn case_capture_stamps_round_trip_through_serde() {
1990        let stamped = CaseCapture {
1991            label: "positive".into(),
1992            method: "GET".into(),
1993            url: "http://api/users".into(),
1994            request_headers: BTreeMap::new(),
1995            request_body: None,
1996            request_body_truncated: false,
1997            response_status: 200,
1998            response_headers: BTreeMap::new(),
1999            response_body: None,
2000            response_body_truncated: false,
2001            error: None,
2002            response_schema_error: None,
2003            expected_status_range: "2xx-3xx".into(),
2004            path_template: "/users".into(),
2005            spec_label: None,
2006            mockforge_version: "0.3.183".into(),
2007            client_sent_at: "2026-06-17T12:34:56+00:00".into(),
2008            iteration: 1,
2009        };
2010        let json = serde_json::to_string(&stamped).unwrap();
2011        assert!(json.contains("\"mockforge_version\":\"0.3.183\""));
2012        assert!(json.contains("\"client_sent_at\":\"2026-06-17T12:34:56+00:00\""));
2013        let back: CaseCapture = serde_json::from_str(&json).unwrap();
2014        assert_eq!(back.mockforge_version, "0.3.183");
2015        assert_eq!(back.client_sent_at, "2026-06-17T12:34:56+00:00");
2016    }
2017
2018    #[test]
2019    fn build_url_substitutes_path_params() {
2020        let url = build_url(
2021            "https://api.test/",
2022            "/users/{id}/posts/{pid}",
2023            &[("id".into(), "42".into()), ("pid".into(), "7".into())],
2024        );
2025        assert_eq!(url, "https://api.test/users/42/posts/7");
2026    }
2027
2028    /// Round 18.1 — a run where every positive 404s should be flagged
2029    /// as a likely target misconfiguration, not silently treated as a
2030    /// successful conformance run.
2031    #[test]
2032    fn detect_target_misconfiguration_when_all_positives_share_status() {
2033        let mut report = SelfTestReport {
2034            positive_pass: 0,
2035            positive_fail: 50,
2036            ..Default::default()
2037        };
2038        for i in 0..50 {
2039            report.operations.push(OperationResult {
2040                method: "GET".into(),
2041                path: format!("/r/{i}"),
2042                positive: Some(CaseOutcome {
2043                    label: "positive".into(),
2044                    expected_4xx: false,
2045                    actual_status: 404,
2046                    passed: false,
2047                }),
2048                negatives: Vec::new(),
2049            });
2050        }
2051        assert_eq!(report.detect_target_misconfiguration(), Some(404));
2052    }
2053
2054    #[test]
2055    fn detect_target_misconfiguration_returns_none_when_some_pass() {
2056        let mut report = SelfTestReport {
2057            positive_pass: 5,
2058            positive_fail: 50,
2059            ..Default::default()
2060        };
2061        for i in 0..55 {
2062            report.operations.push(OperationResult {
2063                method: "GET".into(),
2064                path: format!("/r/{i}"),
2065                positive: Some(CaseOutcome {
2066                    label: "positive".into(),
2067                    expected_4xx: false,
2068                    actual_status: if i < 5 { 200 } else { 404 },
2069                    passed: i < 5,
2070                }),
2071                negatives: Vec::new(),
2072            });
2073        }
2074        assert_eq!(report.detect_target_misconfiguration(), None);
2075    }
2076
2077    /// Round 18.1 — `--base-path /api` should prepend `/api` to
2078    /// every spec path. Pre-fix, the self-test ignored base_path and
2079    /// 404'd every positive when the deployed API was behind a path
2080    /// prefix.
2081    #[test]
2082    fn build_url_applies_base_path_when_present() {
2083        let url = build_url_with_base(
2084            "https://api.example.com",
2085            Some("/api"),
2086            "/users/{id}",
2087            &[("id".into(), "42".into())],
2088        );
2089        assert_eq!(url, "https://api.example.com/api/users/42");
2090    }
2091
2092    /// Round 18.1 — base_path is normalised: missing leading slash
2093    /// gets one added, trailing slash is stripped, empty string is
2094    /// the same as None.
2095    #[test]
2096    fn build_url_normalises_base_path() {
2097        let no_slash = build_url_with_base("https://t", Some("api"), "/x", &[]);
2098        assert_eq!(no_slash, "https://t/api/x");
2099        let trailing = build_url_with_base("https://t", Some("/api/"), "/x", &[]);
2100        assert_eq!(trailing, "https://t/api/x");
2101        let empty = build_url_with_base("https://t", Some(""), "/x", &[]);
2102        assert_eq!(empty, "https://t/x");
2103        let none = build_url_with_base("https://t", None, "/x", &[]);
2104        assert_eq!(none, "https://t/x");
2105    }
2106
2107    #[test]
2108    fn build_url_keeps_placeholders_when_no_sample() {
2109        let url = build_url("https://api.test", "/users/{id}", &[]);
2110        assert_eq!(url, "https://api.test/users/{id}");
2111    }
2112
2113    #[test]
2114    fn report_summary_calls_out_misses() {
2115        let r = SelfTestReport {
2116            positive_pass: 3,
2117            positive_fail: 0,
2118            negative_caught: BTreeMap::from([("request-body".into(), 2)]),
2119            negative_missed: BTreeMap::from([("request-body".into(), 1)]),
2120            operations: Vec::new(),
2121        };
2122        let summary = r.render_summary();
2123        assert!(summary.contains("Positives: 3 pass / 0 fail"));
2124        assert!(summary.contains("Negatives [request-body]: 2 caught / 1 missed"));
2125        assert!(summary.contains("⚠"));
2126        assert!(!r.all_passed());
2127    }
2128
2129    #[test]
2130    fn report_all_passed_when_no_miss() {
2131        let r = SelfTestReport {
2132            positive_pass: 5,
2133            positive_fail: 0,
2134            negative_caught: BTreeMap::from([("parameters".into(), 3)]),
2135            negative_missed: BTreeMap::new(),
2136            operations: Vec::new(),
2137        };
2138        assert!(r.all_passed());
2139        assert!(r.render_summary().contains("✓"));
2140    }
2141
2142    #[tokio::test]
2143    async fn run_self_test_against_unreachable_target_marks_all_failed() {
2144        // Use an obviously-dead port so we exercise the timeout/error
2145        // path without needing a live server in tests.
2146        let cfg = SelfTestConfig {
2147            target_url: "http://127.0.0.1:1".into(),
2148            timeout: Duration::from_millis(200),
2149            ..Default::default()
2150        };
2151        let ops = vec![op(
2152            "POST",
2153            "/users",
2154            Some("{\"name\":\"a\"}"),
2155            vec![],
2156            vec![],
2157            vec![],
2158        )];
2159        let report = run_self_test(&ops, &cfg).await.expect("client builds");
2160        // All cases hit the connect-error path → actual_status=0.
2161        // Positive expects 2xx-3xx → 0 is fail. Negatives expect 4xx
2162        // → 0 is also fail (we missed catching).
2163        assert_eq!(report.positive_fail, 1);
2164        assert!(report.negative_missed.values().sum::<usize>() >= 1);
2165        assert!(!report.all_passed());
2166    }
2167
2168    /// Round 17.2 — operations with both a positive sample AND a
2169    /// resolved request-body schema produce schema-driven negatives
2170    /// in addition to the spec-agnostic empty/wrong-type ones. The
2171    /// labels carry the field path so a per-category report can tell
2172    /// you exactly which field caught.
2173    #[tokio::test]
2174    async fn schema_driven_negatives_fire_when_schema_present() {
2175        use openapiv3::{ObjectType, ReferenceOr, Schema, SchemaData, SchemaKind, Type};
2176        let cfg = SelfTestConfig {
2177            target_url: "http://127.0.0.1:1".into(),
2178            timeout: Duration::from_millis(200),
2179            ..Default::default()
2180        };
2181        // Build an operation whose schema has a required `name` string
2182        // and an `age` integer. The mutator should produce, at
2183        // minimum: required-removed:name, required-removed:age,
2184        // type-mismatch:name, type-mismatch:age, integer-as-float:age,
2185        // plus the root-level type-mismatch.
2186        let mut obj = ObjectType::default();
2187        obj.properties.insert(
2188            "name".to_string(),
2189            ReferenceOr::Item(Box::new(Schema {
2190                schema_data: SchemaData::default(),
2191                schema_kind: SchemaKind::Type(Type::String(Default::default())),
2192            })),
2193        );
2194        obj.properties.insert(
2195            "age".to_string(),
2196            ReferenceOr::Item(Box::new(Schema {
2197                schema_data: SchemaData::default(),
2198                schema_kind: SchemaKind::Type(Type::Integer(Default::default())),
2199            })),
2200        );
2201        obj.required = vec!["name".into(), "age".into()];
2202        let schema = Schema {
2203            schema_data: SchemaData::default(),
2204            schema_kind: SchemaKind::Type(Type::Object(obj)),
2205        };
2206
2207        let mut o =
2208            op("POST", "/users", Some(r#"{"name":"Ada","age":30}"#), vec![], vec![], vec![]);
2209        o.request_body_schema = Some(schema);
2210        let report = run_self_test(&[o], &cfg).await.expect("client builds");
2211        // Bucket labels from the operation result.
2212        let labels: std::collections::BTreeSet<String> = report
2213            .operations
2214            .iter()
2215            .flat_map(|op| op.negatives.iter().map(|n| n.label.clone()))
2216            .collect();
2217        assert!(
2218            labels.iter().any(|l| l.starts_with("request-body:type-mismatch:")),
2219            "missing type-mismatch negative; got {labels:?}"
2220        );
2221        assert!(
2222            labels.iter().any(|l| l.starts_with("request-body:required-removed:")),
2223            "missing required-removed negative; got {labels:?}"
2224        );
2225        assert!(
2226            labels.iter().any(|l| l == "parameters:uri-too-long"),
2227            "missing URI-length negative; got {labels:?}"
2228        );
2229    }
2230
2231    /// Round 16 — operations with a body OR a path-param now produce
2232    /// negatives even without a sample body. Previously a POST whose
2233    /// body annotator failed produced *zero* negatives, so the self-test
2234    /// always reported "all passing" for that endpoint.
2235    #[tokio::test]
2236    async fn no_sample_body_still_produces_request_body_negatives() {
2237        let cfg = SelfTestConfig {
2238            target_url: "http://127.0.0.1:1".into(),
2239            timeout: Duration::from_millis(200),
2240            ..Default::default()
2241        };
2242        // POST with a body content type but no sample (annotator gap).
2243        let ops = vec![op("POST", "/x", None, vec![], vec![], vec![])];
2244        // No sample_body but request_body_content_type set:
2245        let mut ops_fixed = ops;
2246        ops_fixed[0].request_body_content_type = Some("application/json".into());
2247        let report = run_self_test(&ops_fixed, &cfg).await.expect("client builds");
2248        // Both request-body negatives (empty + wrong-type) should fire,
2249        // landing in `negative_missed` because the unreachable target
2250        // returns no 4xx. The point: count > 0.
2251        assert!(
2252            report.negative_missed.values().sum::<usize>() >= 2,
2253            "expected ≥2 request-body negatives, got {:?}",
2254            report.negative_missed
2255        );
2256    }
2257
2258    /// Round 16 — operations with a path-param now get a probe even
2259    /// when there's no body / required query / required header.
2260    /// Previously `/teams/{team-id}` with no other required fields
2261    /// produced zero negatives → always "all passing".
2262    #[tokio::test]
2263    async fn path_param_only_endpoint_produces_a_probe() {
2264        let cfg = SelfTestConfig {
2265            target_url: "http://127.0.0.1:1".into(),
2266            timeout: Duration::from_millis(200),
2267            ..Default::default()
2268        };
2269        let ops = vec![op(
2270            "GET",
2271            "/teams/{team-id}",
2272            None,
2273            vec![],
2274            vec![],
2275            vec![("team-id", "1")],
2276        )];
2277        let report = run_self_test(&ops, &cfg).await.expect("client builds");
2278        let total: usize = report.negative_caught.values().sum::<usize>()
2279            + report.negative_missed.values().sum::<usize>();
2280        assert!(total >= 1, "expected ≥1 path-param probe, got {:?}", report);
2281    }
2282
2283    /// Round 18.5 — when `geo_ip` is set, every default forwarded-
2284    /// IP header gets the IP appended (X-Forwarded-For,
2285    /// True-Client-IP, CF-Connecting-IP).
2286    #[test]
2287    fn effective_op_headers_appends_geo_ip_to_default_headers() {
2288        let ip: IpAddr = "203.0.113.42".parse().unwrap();
2289        let headers = effective_op_headers(
2290            &[("Accept".into(), "application/json".into())],
2291            Some(ip),
2292            &default_geo_source_headers(),
2293        );
2294        let names: Vec<&str> = headers.iter().map(|(k, _)| k.as_str()).collect();
2295        assert!(names.contains(&"Accept"));
2296        assert!(names.contains(&"X-Forwarded-For"));
2297        assert!(names.contains(&"True-Client-IP"));
2298        assert!(names.contains(&"CF-Connecting-IP"));
2299        // Every geo header carries the same IP value.
2300        let geo_values: Vec<&str> =
2301            headers.iter().filter(|(k, _)| k != "Accept").map(|(_, v)| v.as_str()).collect();
2302        for v in geo_values {
2303            assert_eq!(v, "203.0.113.42");
2304        }
2305    }
2306
2307    /// Round 18.5 — operations that already declare a forwarded-IP
2308    /// header (rare but legal — some specs hard-code one) keep their
2309    /// declared value; we don't clobber the spec.
2310    #[test]
2311    fn effective_op_headers_respects_spec_declared_header() {
2312        let ip: IpAddr = "203.0.113.99".parse().unwrap();
2313        let headers = effective_op_headers(
2314            &[("x-forwarded-for".into(), "10.0.0.1".into())],
2315            Some(ip),
2316            &["X-Forwarded-For".to_string()],
2317        );
2318        // The spec's lower-case value wins; we shouldn't add a
2319        // second X-Forwarded-For row that overrides it.
2320        let xff: Vec<&str> = headers
2321            .iter()
2322            .filter(|(k, _)| k.eq_ignore_ascii_case("x-forwarded-for"))
2323            .map(|(_, v)| v.as_str())
2324            .collect();
2325        assert_eq!(xff, vec!["10.0.0.1"]);
2326    }
2327
2328    /// Round 18.5 — None geo_ip and/or empty header list is a no-op.
2329    #[test]
2330    fn effective_op_headers_is_a_noop_without_geo_ip() {
2331        let base = vec![("Accept".into(), "json".into())];
2332        let h1 = effective_op_headers(&base, None, &default_geo_source_headers());
2333        assert_eq!(h1, base);
2334        let ip: IpAddr = "10.0.0.1".parse().unwrap();
2335        let h2 = effective_op_headers(&base, Some(ip), &[]);
2336        assert_eq!(h2, base);
2337    }
2338
2339    /// Round 18.5 — empty `source_ips` builds a single default
2340    /// client; a non-empty list builds N clients each attempting to
2341    /// bind. We can't reliably test the actual bind on CI (no
2342    /// loopback aliases), but a loopback IP is always bind-able.
2343    #[test]
2344    fn build_client_pool_one_per_source_ip() {
2345        let mut cfg = SelfTestConfig {
2346            target_url: "http://127.0.0.1:1".into(),
2347            timeout: Duration::from_millis(200),
2348            ..Default::default()
2349        };
2350        // Empty → one default client.
2351        assert_eq!(build_client_pool(&cfg).expect("default builds").len(), 1);
2352        // Non-empty → one per IP. Loopback bind is portable.
2353        cfg.source_ips = vec!["127.0.0.1".parse().unwrap()];
2354        assert_eq!(build_client_pool(&cfg).expect("bind loopback").len(), 1);
2355    }
2356
2357    /// Round 18.5 — geo IPs round-robin across operations. Hits an
2358    /// unreachable target so we can inspect the case outcomes; the
2359    /// point is to confirm `op_headers` carried the geo IP through
2360    /// (CaseOutcome doesn't surface headers directly, so we just
2361    /// verify the run completes without panicking and the result
2362    /// shape is correct when source_ips is non-empty too).
2363    #[tokio::test]
2364    async fn run_self_test_with_geo_source_completes() {
2365        let cfg = SelfTestConfig {
2366            target_url: "http://127.0.0.1:1".into(),
2367            timeout: Duration::from_millis(200),
2368            geo_source_ips: vec![
2369                "203.0.113.1".parse().unwrap(),
2370                "203.0.113.2".parse().unwrap(),
2371            ],
2372            ..Default::default()
2373        };
2374        let ops = vec![
2375            op("GET", "/a", None, vec![], vec![], vec![]),
2376            op("GET", "/b", None, vec![], vec![], vec![]),
2377            op("GET", "/c", None, vec![], vec![], vec![]),
2378        ];
2379        let report = run_self_test(&ops, &cfg).await.expect("client builds");
2380        assert_eq!(report.operations.len(), 3);
2381    }
2382
2383    /// Round 24 (f) — Srikanth saw the geo header on positive probes
2384    /// only; the four negative-probe call sites were passing
2385    /// `op.header_params` directly instead of `op_headers`, so the
2386    /// geo IP got dropped. This test runs a self-test that includes
2387    /// negative probes (uri-too-long, missing-query, etc.) under
2388    /// `--conformance-self-test-capture`, then asserts that EVERY
2389    /// captured probe (positive AND negative) carries one of the
2390    /// configured forwarded-IP headers.
2391    #[tokio::test]
2392    async fn geo_headers_present_on_every_probe_with_capture() {
2393        let sink: Arc<Mutex<Vec<CaseCapture>>> = Arc::new(Mutex::new(Vec::new()));
2394        let cfg = SelfTestConfig {
2395            target_url: "http://127.0.0.1:1".into(),
2396            timeout: Duration::from_millis(50),
2397            geo_source_ips: vec!["203.0.113.5".parse().unwrap()],
2398            capture: Some(sink.clone()),
2399            ..Default::default()
2400        };
2401        // An operation rich enough to trip several negative-probe
2402        // branches: header param (→ missing-header), query param
2403        // (→ missing-query), and a sample body (→ schema mutations
2404        // wouldn't fire without a schema, but uri-too-long always
2405        // does).
2406        let ops = vec![op(
2407            "GET",
2408            "/items",
2409            Some("{}"),
2410            vec![("id", "1")],
2411            vec![("X-Trace", "x")],
2412            vec![],
2413        )];
2414        let _ = run_self_test(&ops, &cfg).await.expect("client builds");
2415        let captures = sink.lock().unwrap();
2416        assert!(!captures.is_empty(), "self-test should record probes");
2417        // For every captured probe, at least one of the default geo
2418        // headers must be present and equal to the configured IP.
2419        let geo_headers: std::collections::HashSet<&str> =
2420            ["X-Forwarded-For", "True-Client-IP", "CF-Connecting-IP"].into_iter().collect();
2421        for c in captures.iter() {
2422            let has_geo = c
2423                .request_headers
2424                .iter()
2425                .any(|(k, v)| geo_headers.contains(k.as_str()) && v == "203.0.113.5");
2426            assert!(
2427                has_geo,
2428                "probe `{}` is missing the geo IP header; got headers: {:?}",
2429                c.label, c.request_headers
2430            );
2431        }
2432    }
2433
2434    /// Round 25 (k) — operations with a JSON request body now get four
2435    /// content-type-swap probes (xml / yaml / multipart / urlencoded).
2436    /// Verify they:
2437    ///   1. fire only when the operation declares a JSON body
2438    ///   2. carry the wrong Content-Type the probe is testing for
2439    ///   3. don't fire on body-less operations
2440    #[tokio::test]
2441    async fn content_type_swap_probes_fire_for_json_bodies() {
2442        let sink: Arc<Mutex<Vec<CaseCapture>>> = Arc::new(Mutex::new(Vec::new()));
2443        let cfg = SelfTestConfig {
2444            target_url: "http://127.0.0.1:1".into(),
2445            timeout: Duration::from_millis(50),
2446            capture: Some(sink.clone()),
2447            ..Default::default()
2448        };
2449        let ops = vec![
2450            op("POST", "/users", Some("{\"name\":\"a\"}"), vec![], vec![], vec![]),
2451            op("GET", "/ping", None, vec![], vec![], vec![]),
2452        ];
2453        let _ = run_self_test(&ops, &cfg).await.expect("client builds");
2454        let captures = sink.lock().unwrap();
2455
2456        let swap_labels: Vec<&str> = captures
2457            .iter()
2458            .filter(|c| c.label.starts_with("request-body:content-type-mismatch:"))
2459            .map(|c| c.label.as_str())
2460            .collect();
2461        assert_eq!(
2462            swap_labels.len(),
2463            4,
2464            "expected 4 content-type-swap probes (one per variant), got: {swap_labels:?}"
2465        );
2466        let expected_labels = [
2467            "request-body:content-type-mismatch:xml",
2468            "request-body:content-type-mismatch:yaml",
2469            "request-body:content-type-mismatch:multipart",
2470            "request-body:content-type-mismatch:urlencoded",
2471        ];
2472        for want in expected_labels {
2473            assert!(swap_labels.contains(&want), "missing swap probe `{want}`");
2474        }
2475
2476        // Each swap probe must carry the wrong Content-Type it's
2477        // testing for — that's the whole point.
2478        for c in captures.iter() {
2479            let Some(suffix) = c.label.strip_prefix("request-body:content-type-mismatch:") else {
2480                continue;
2481            };
2482            let want_ct = match suffix {
2483                "xml" => "application/xml",
2484                "yaml" => "application/yaml",
2485                "multipart" => "multipart/form-data",
2486                "urlencoded" => "application/x-www-form-urlencoded",
2487                _ => continue,
2488            };
2489            let got_ct = c
2490                .request_headers
2491                .iter()
2492                .find(|(k, _)| k.eq_ignore_ascii_case("content-type"))
2493                .map(|(_, v)| v.as_str())
2494                .unwrap_or("");
2495            assert_eq!(got_ct, want_ct, "swap probe `{}` sent wrong CT", c.label);
2496        }
2497
2498        // The body-less operation must NOT produce content-type-swap
2499        // probes (no body → no content type to lie about).
2500        let body_less_swaps = captures
2501            .iter()
2502            .filter(|c| {
2503                c.label.starts_with("request-body:content-type-mismatch:")
2504                    && c.url.ends_with("/ping")
2505            })
2506            .count();
2507        assert_eq!(
2508            body_less_swaps, 0,
2509            "GET /ping has no request body; should not produce content-type-swap probes"
2510        );
2511    }
2512
2513    /// Round 27 (k variant b) — Srikanth's round-23 follow-up on (k):
2514    /// JSON envelope with embedded non-JSON field values. For each
2515    /// JSON-body operation, four extra probes fire that send valid
2516    /// JSON with an XML/YAML/multipart/urlencoded snippet stuffed
2517    /// into a string field. Content-Type stays `application/json`;
2518    /// expected is 2xx-3xx (the body parses); a 5xx flags a server
2519    /// that crashed on the embedded content.
2520    #[tokio::test]
2521    async fn embedded_content_probes_fire_with_honest_content_type() {
2522        let sink: Arc<Mutex<Vec<CaseCapture>>> = Arc::new(Mutex::new(Vec::new()));
2523        let cfg = SelfTestConfig {
2524            target_url: "http://127.0.0.1:1".into(),
2525            timeout: Duration::from_millis(50),
2526            capture: Some(sink.clone()),
2527            ..Default::default()
2528        };
2529        let ops = vec![op(
2530            "POST",
2531            "/users",
2532            Some("{\"name\":\"alice\",\"age\":30}"),
2533            vec![],
2534            vec![],
2535            vec![],
2536        )];
2537        let _ = run_self_test(&ops, &cfg).await.expect("client builds");
2538        let captures = sink.lock().unwrap();
2539        let embedded: Vec<&CaseCapture> = captures
2540            .iter()
2541            .filter(|c| c.label.starts_with("request-body:embedded-content:"))
2542            .collect();
2543        assert_eq!(
2544            embedded.len(),
2545            4,
2546            "expected 4 embedded-content probes, got: {:?}",
2547            embedded.iter().map(|c| &c.label).collect::<Vec<_>>()
2548        );
2549        // Every embedded probe must carry the honest application/json
2550        // Content-Type (NOT lie like the variant-a content-type-swap
2551        // probes do) and a request body that still parses as JSON.
2552        for c in &embedded {
2553            let ct = c
2554                .request_headers
2555                .iter()
2556                .find(|(k, _)| k.eq_ignore_ascii_case("content-type"))
2557                .map(|(_, v)| v.as_str())
2558                .unwrap_or("");
2559            assert!(
2560                ct.contains("application/json"),
2561                "embedded probe `{}` should keep Content-Type honest, got {ct}",
2562                c.label
2563            );
2564            let body = c.request_body.as_deref().unwrap_or("");
2565            assert!(
2566                serde_json::from_str::<serde_json::Value>(body).is_ok(),
2567                "embedded probe `{}` body should still be valid JSON, got: {body}",
2568                c.label
2569            );
2570        }
2571    }
2572
2573    /// `embed_payload_in_first_string_field` walks objects depth-first
2574    /// and replaces only the FIRST string-valued leaf, leaving the
2575    /// surrounding structure intact.
2576    #[test]
2577    fn embed_payload_replaces_first_string_only() {
2578        let sample = r#"{"name":"alice","age":30,"tags":["admin","user"]}"#;
2579        let mutated = embed_payload_in_first_string_field(sample, "<x/>")
2580            .expect("string field present so probe constructed");
2581        let v: serde_json::Value = serde_json::from_str(&mutated).unwrap();
2582        assert_eq!(v["name"], serde_json::json!("<x/>"));
2583        // age stays an integer (not stringified by the mutation).
2584        assert_eq!(v["age"], serde_json::json!(30));
2585        // tags array's strings stay untouched (we only replace the
2586        // first encountered string leaf, depth-first).
2587        assert_eq!(v["tags"][0], serde_json::json!("admin"));
2588        assert_eq!(v["tags"][1], serde_json::json!("user"));
2589    }
2590
2591    /// Round 34 (#829) — Srikanth on 0.3.178: when the positive
2592    /// sample has NO string field, the previous `{"data": <snippet>}`
2593    /// fallback produced an envelope that doesn't match real-API
2594    /// schemas (e.g. vCenter's `consolecli` PUT wants
2595    /// `{enabled: bool}`), so the server correctly 400'd and the
2596    /// bench misreported the 2xx-3xx expectation. Now we return None
2597    /// and the caller skips the probe.
2598    #[test]
2599    fn embed_payload_returns_none_when_no_string_field() {
2600        let no_strings = r#"{"a":1,"b":[2,3]}"#;
2601        assert!(embed_payload_in_first_string_field(no_strings, "<x><y></y></x>").is_none());
2602        // The exact vCenter-style case Srikanth hit.
2603        let bool_only = r#"{"enabled":true}"#;
2604        assert!(embed_payload_in_first_string_field(bool_only, "<x/>").is_none());
2605    }
2606
2607    #[test]
2608    fn embed_payload_returns_none_for_invalid_json_sample() {
2609        assert!(embed_payload_in_first_string_field("garbage", "a=1&b=2").is_none());
2610    }
2611
2612    /// Round 35 (#859) — Srikanth on 0.3.179 saw variant-b probes flag
2613    /// every 4xx as a mismatch when the spec field had a `pattern` /
2614    /// `format` validator that correctly rejected the embedded
2615    /// payload. The probe was only ever meant to catch 5xx (server
2616    /// crashed parsing the embedded content); 4xx is the well-behaved
2617    /// outcome. Tristate `ExpectedOutcome::NotServerError` lets a
2618    /// variant-b probe pass on 2xx-4xx and fail only on 5xx.
2619    #[test]
2620    fn expected_outcome_pass_rules() {
2621        // Success (positive): 2xx-3xx pass, 4xx + 5xx fail.
2622        assert!(ExpectedOutcome::Success.passes(200));
2623        assert!(ExpectedOutcome::Success.passes(201));
2624        assert!(ExpectedOutcome::Success.passes(204));
2625        assert!(ExpectedOutcome::Success.passes(301));
2626        assert!(!ExpectedOutcome::Success.passes(400));
2627        assert!(!ExpectedOutcome::Success.passes(415));
2628        assert!(!ExpectedOutcome::Success.passes(500));
2629        assert!(!ExpectedOutcome::Success.passes(0));
2630
2631        // ClientError (negative): only 4xx pass.
2632        assert!(!ExpectedOutcome::ClientError.passes(200));
2633        assert!(ExpectedOutcome::ClientError.passes(400));
2634        assert!(ExpectedOutcome::ClientError.passes(404));
2635        assert!(ExpectedOutcome::ClientError.passes(422));
2636        assert!(!ExpectedOutcome::ClientError.passes(500));
2637
2638        // NotServerError (variant-b): 2xx-4xx pass, 5xx fails.
2639        assert!(ExpectedOutcome::NotServerError.passes(200));
2640        assert!(ExpectedOutcome::NotServerError.passes(204));
2641        assert!(ExpectedOutcome::NotServerError.passes(400), "Srikanth's vCenter consolecli case: 400 from a pattern validator should NOT be a probe failure");
2642        assert!(ExpectedOutcome::NotServerError.passes(415));
2643        assert!(ExpectedOutcome::NotServerError.passes(422));
2644        assert!(
2645            !ExpectedOutcome::NotServerError.passes(500),
2646            "Server CRASH on embedded content is the only real failure"
2647        );
2648        assert!(!ExpectedOutcome::NotServerError.passes(502));
2649        assert!(!ExpectedOutcome::NotServerError.passes(503));
2650        // status 0 (network error / probe never reached the server) does not pass either
2651        assert!(!ExpectedOutcome::NotServerError.passes(0));
2652    }
2653
2654    /// Round 35 (#859) — the per-capture `expected_status_range`
2655    /// string is what the HTML viewer's "show mismatches only"
2656    /// filter and Srikanth's `jq` pipelines key off, so the new
2657    /// tristate must surface a third distinct value.
2658    #[test]
2659    fn expected_outcome_string_labels() {
2660        assert_eq!(ExpectedOutcome::Success.as_str(), "2xx-3xx");
2661        assert_eq!(ExpectedOutcome::ClientError.as_str(), "4xx");
2662        assert_eq!(ExpectedOutcome::NotServerError.as_str(), "2xx-4xx");
2663    }
2664
2665    /// Round 26 — Srikanth saw `at /: Type { kind: Single` in his
2666    /// 0.3.169 capture for the vCenter `infraprofile/configs` 202
2667    /// response (spec promised `type: string`, server returned a
2668    /// JSON object). The output was a broken-syntax debug string.
2669    /// This test reproduces his exact spec+body and asserts the
2670    /// message is readable.
2671    #[test]
2672    fn response_schema_error_message_is_readable() {
2673        let schema = serde_json::json!({"type": "string"});
2674        let body = r#"{"data":{},"id":"generated_id","status":"created"}"#;
2675        let err = validate_body_against_schema(body, &schema).expect("type-mismatch fires");
2676        // The message must NOT contain Rust debug syntax leftovers
2677        // ("Type { kind:", trailing "{" or "(" tokens). It SHOULD say
2678        // what type was expected.
2679        assert!(!err.contains("Type { kind"), "stale debug output: {err}");
2680        assert!(!err.contains("{ kind:"), "stale debug output: {err}");
2681        assert!(err.contains("string"), "should name expected type: {err}");
2682        // Round 29 — Srikanth on 0.3.172 was confused by `at /:`,
2683        // thinking it pointed to the URL path. The new format
2684        // explicitly says "response body root" for the root case
2685        // (and "response body at /<pointer>" for nested fields).
2686        assert!(
2687            err.contains("response body root"),
2688            "should label root explicitly so reader knows it's not the URL: {err}"
2689        );
2690        // Round 28 — Srikanth wanted the expected schema embedded
2691        // in the message so it reads as 'expected schema {"type":"string"}'.
2692        assert!(
2693            err.contains("expected schema") && err.contains("\"type\":\"string\""),
2694            "should include expected schema JSON: {err}"
2695        );
2696    }
2697
2698    /// Round 29 — for non-root paths the format reads
2699    /// "response body at /name: ...". Catches the case where the
2700    /// root rewording accidentally dropped the JSON-pointer for
2701    /// nested fields.
2702    #[test]
2703    fn response_schema_error_uses_response_body_prefix_for_nested_paths() {
2704        let schema = serde_json::json!({
2705            "type": "object",
2706            "required": ["name"],
2707            "properties": {"name": {"type": "string"}}
2708        });
2709        let body = r#"{"name": 123}"#;
2710        let err = validate_body_against_schema(body, &schema).expect("type-mismatch fires");
2711        assert!(
2712            err.contains("response body at /name"),
2713            "nested path should read 'response body at /name': {err}"
2714        );
2715        assert!(!err.contains("response body root"), "wrong label for nested: {err}");
2716        // Round 30 — the "expected schema" suffix should be the
2717        // sub-schema at /name, not the entire object schema. Reader
2718        // shouldn't have to scan a 300-char object to find the
2719        // constraint that failed.
2720        assert!(
2721            err.contains(r#"expected schema {"type":"string"}"#),
2722            "should show only the /name sub-schema, not the full object: {err}"
2723        );
2724    }
2725
2726    /// Round 30 — Srikanth asked how a deeper nested mismatch reads.
2727    /// Schema: `name.type` should be a string; body has it as a number.
2728    /// JSON pointer is `/name/type`.
2729    #[test]
2730    fn response_schema_error_uses_response_body_prefix_for_deep_nested_paths() {
2731        let schema = serde_json::json!({
2732            "type": "object",
2733            "properties": {
2734                "name": {
2735                    "type": "object",
2736                    "properties": {"type": {"type": "string"}}
2737                }
2738            }
2739        });
2740        let body = r#"{"name": {"type": 123}}"#;
2741        let err = validate_body_against_schema(body, &schema).expect("type-mismatch fires");
2742        assert!(
2743            err.contains("response body at /name/type"),
2744            "deep nested path should read 'response body at /name/type': {err}"
2745        );
2746        // Round 30 — for deep paths the sub-schema is the leaf
2747        // {"type":"string"}, not the wrapping object schemas.
2748        assert!(
2749            err.contains(r#"expected schema {"type":"string"}"#),
2750            "should show only the /name/type leaf sub-schema: {err}"
2751        );
2752    }
2753
2754    /// Round 30 — when the instance pointer can't be resolved through
2755    /// the schema's `properties` chain (e.g. additionalProperties hit),
2756    /// `sub_schema_at_pointer` returns None and the message falls back
2757    /// to the full schema. Verifies the fallback path is wired.
2758    #[test]
2759    fn sub_schema_at_pointer_falls_back_for_unresolvable_paths() {
2760        let schema = serde_json::json!({"type":"object","additionalProperties":true});
2761        // Walker can't resolve /unknown, so we get the full schema back.
2762        assert_eq!(
2763            sub_schema_at_pointer(&schema, "/unknown"),
2764            None,
2765            "unresolvable path should return None to trigger fallback"
2766        );
2767        // Root path returns the whole schema.
2768        assert_eq!(sub_schema_at_pointer(&schema, "/"), Some(schema.clone()));
2769        assert_eq!(sub_schema_at_pointer(&schema, ""), Some(schema));
2770    }
2771
2772    #[test]
2773    fn response_schema_error_required_field_is_readable() {
2774        let schema = serde_json::json!({
2775            "type": "object",
2776            "required": ["id"],
2777            "properties": {"id": {"type": "integer"}}
2778        });
2779        let body = r#"{"other": 1}"#;
2780        let err = validate_body_against_schema(body, &schema).expect("required-missing fires");
2781        assert!(err.contains("required field missing"), "{err}");
2782        assert!(err.contains("id"), "{err}");
2783    }
2784
2785    /// Round 31 — Srikanth's vCenter case on 0.3.174: the
2786    /// `Appliance.Recovery.Backup.SystemName.Archive.Info` schema has
2787    /// a multi-paragraph description and ~6 required fields, of which
2788    /// `comment` was missing in the response. Before this fix the
2789    /// printed schema was the WHOLE parent object schema (parent's
2790    /// description bleeding in, all sibling property schemas dumped)
2791    /// truncated to 300 chars; after the fix it's the missing field's
2792    /// own schema. Verifies (a) parent description is gone and
2793    /// (b) sibling property names don't appear in the message.
2794    #[test]
2795    fn response_schema_error_required_focuses_on_missing_field_only() {
2796        let schema = serde_json::json!({
2797            "description": "The Appliance.Recovery.Backup.SystemName.Archive.Info schema represents backup archive information.\n\nThis schema was added in vSphere API 6.7.",
2798            "type": "object",
2799            "required": ["comment", "location", "parts", "system_name", "timestamp", "version"],
2800            "properties": {
2801                "comment": {
2802                    "type": "string",
2803                    "description": "Custom comment added by the user for this backup."
2804                },
2805                "location": {"type": "string", "description": "Backup location URL."},
2806                "parts": {"type": "array", "items": {"type": "string"}},
2807                "system_name": {"type": "string"},
2808                "timestamp": {"type": "string", "format": "date-time"},
2809                "version": {"type": "string"}
2810            }
2811        });
2812        let body = r#"{"location":"x","parts":[],"system_name":"y","timestamp":"z","version":"v"}"#;
2813        let err = validate_body_against_schema(body, &schema).expect("required-missing fires");
2814        assert!(err.contains("required field missing: \"comment\""), "{err}");
2815        // Parent's description should not appear; only the `comment`
2816        // field's own description (if any) may.
2817        assert!(
2818            !err.contains("Appliance.Recovery.Backup"),
2819            "parent description should not bleed into focused schema: {err}"
2820        );
2821        // No sibling property names should appear in the focused schema
2822        // suffix.
2823        for sibling in ["location", "parts", "system_name", "timestamp", "version"] {
2824            assert!(
2825                !err.contains(&format!("\"{sibling}\"")),
2826                "sibling field {sibling} should not appear in focused schema: {err}"
2827            );
2828        }
2829    }
2830
2831    #[test]
2832    fn response_schema_error_none_on_match() {
2833        let schema = serde_json::json!({"type": "string"});
2834        assert_eq!(validate_body_against_schema("\"hello\"", &schema), None);
2835    }
2836
2837    /// Round 34 (#827) — Srikanth on 0.3.178 hit the vCenter
2838    /// `consolecli` PUT where the `enabled: boolean` property has a
2839    /// multi-paragraph description. The schema printout truncated
2840    /// mid-description, hiding `type: boolean` past the 300-char cap.
2841    /// Stripping `description` (and friends) before serializing must
2842    /// keep the type info visible.
2843    #[test]
2844    fn response_schema_error_strips_description_so_type_survives_truncation() {
2845        // Schema crafted so without stripping, `description` would
2846        // push `type` past the 300-char truncation cap. The
2847        // description we use here is intentionally close to the
2848        // vCenter-spec wording Srikanth quoted.
2849        let big_desc = "In the result of the #get and #list operations this property indicates whether proxying is enabled for a particular protocol. In the input to the test and set operations this property specifies whether proxying should be enabled for a particular protocol. This property was added in vSphere API 6.7. Defaults to enabled if both this field and the value field are unset.";
2850        let schema = serde_json::json!({
2851            "type": "object",
2852            "required": ["enabled"],
2853            "properties": {
2854                "enabled": {
2855                    "type": "boolean",
2856                    "description": big_desc,
2857                    "example": true,
2858                }
2859            }
2860        });
2861        let body = r#"{}"#;
2862        let err = validate_body_against_schema(body, &schema).expect("required-missing fires");
2863        assert!(err.contains("required field missing: \"enabled\""), "{err}");
2864        assert!(
2865            err.contains(r#""type":"boolean""#),
2866            "the `type: boolean` keyword must survive truncation: {err}"
2867        );
2868        // Description should NOT appear (we stripped it) so the
2869        // suffix is type-focused, not prose.
2870        assert!(
2871            !err.contains("proxying is enabled"),
2872            "description should be stripped from the printed schema: {err}"
2873        );
2874        assert!(
2875            !err.contains("\"example\""),
2876            "`example` field should be stripped from the printed schema: {err}"
2877        );
2878    }
2879
2880    /// Round 34 (#827) — strip_schema_noise should keep all
2881    /// constraint keywords intact; only the prose noise goes.
2882    #[test]
2883    fn strip_schema_noise_preserves_constraint_keywords() {
2884        let schema = serde_json::json!({
2885            "type": "object",
2886            "required": ["a", "b"],
2887            "description": "should be stripped",
2888            "title": "should be stripped",
2889            "example": {"a": 1, "b": 2},
2890            "properties": {
2891                "a": {"type": "string", "format": "uri", "minLength": 1, "description": "drop"},
2892                "b": {"type": "integer", "minimum": 0, "maximum": 100, "summary": "drop"},
2893            },
2894        });
2895        let stripped = strip_schema_noise(&schema);
2896        let s = serde_json::to_string(&stripped).unwrap();
2897        // Constraint keywords survive.
2898        for keep in [
2899            "\"type\"",
2900            "\"required\"",
2901            "\"properties\"",
2902            "\"format\"",
2903            "\"minLength\"",
2904            "\"minimum\"",
2905            "\"maximum\"",
2906        ] {
2907            assert!(s.contains(keep), "should keep {keep}: {s}");
2908        }
2909        // Noise fields are gone.
2910        for drop in ["description", "title", "example", "summary"] {
2911            assert!(!s.contains(&format!("\"{drop}\"")), "should strip {drop}: {s}");
2912        }
2913    }
2914
2915    #[test]
2916    fn json_serialises_report() {
2917        let r = SelfTestReport {
2918            positive_pass: 1,
2919            positive_fail: 0,
2920            negative_caught: BTreeMap::new(),
2921            negative_missed: BTreeMap::new(),
2922            operations: vec![OperationResult {
2923                method: "GET".into(),
2924                path: "/x".into(),
2925                positive: Some(CaseOutcome {
2926                    label: "positive".into(),
2927                    expected_4xx: false,
2928                    actual_status: 200,
2929                    passed: true,
2930                }),
2931                negatives: Vec::new(),
2932            }],
2933        };
2934        let json = serde_json::to_value(&r).expect("serialises");
2935        assert_eq!(json["positive_pass"], serde_json::json!(1));
2936        assert_eq!(json["operations"][0]["positive"]["actual_status"], serde_json::json!(200));
2937    }
2938}